diff --git a/.gitattributes b/.gitattributes index 637fa167e56685c01bc97f08a420ea76330cf6df..b04256600059c29fb2f343821acee9dab257d4ed 100644 --- a/.gitattributes +++ b/.gitattributes @@ -30,3 +30,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +*.nemo filter=lfs diff=lfs merge=lfs -text diff --git a/__pycache__/process_asr_text_tokenizer.cpython-39.pyc b/__pycache__/process_asr_text_tokenizer.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..cce9e3b4c0af7aba1f78f163da989fc577f3eb00 Binary files /dev/null and b/__pycache__/process_asr_text_tokenizer.cpython-39.pyc differ diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..d70b0a22f53a118f816fb8d9d21fadf6f44af557 --- /dev/null +++ b/all_results.json @@ -0,0 +1,21 @@ +{ + "epoch": 7.38, + "eval_loss": 8.706663131713867, + "eval_runtime": 970.2156, + "eval_samples": 13098, + "eval_samples_per_second": 13.5, + "eval_steps_per_second": 3.376, + "eval_wer": 0.20430683297635546, + "test_cer": 0.08093431359873023, + "test_loss": 5.917323112487793, + "test_runtime": 946.7263, + "test_samples": 12643, + "test_samples_per_second": 13.354, + "test_steps_per_second": 3.339, + "test_wer": 0.17709850666607363, + "train_loss": 10.025987887954182, + "train_runtime": 56856.134, + "train_samples": 108449, + "train_samples_per_second": 14.077, + "train_steps_per_second": 1.76 +} \ No newline at end of file diff --git a/check_bnb_install.py b/check_bnb_install.py new file mode 100644 index 0000000000000000000000000000000000000000..1feed00eb70348a9c4018e6f5b7cfc678dcc3891 --- /dev/null +++ b/check_bnb_install.py @@ -0,0 +1,19 @@ +import bitsandbytes as bnb +import torch + +p = torch.nn.Parameter(torch.rand(10, 10).cuda()) +a = torch.rand(10, 10).cuda() + +p1 = p.data.sum().item() + +adam = bnb.optim.Adam([p]) + +out = a * p +loss = out.sum() +loss.backward() +adam.step() + +p2 = p.data.sum().item() + +assert p1 != p2 +print('bnb: installed successfully!') diff --git a/checkpoint-100000/optimizer.pt b/checkpoint-100000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2372eb7875fc054eba826746c35214ae89df21c2 --- /dev/null +++ b/checkpoint-100000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ca838bfd7e8d7e8ebc431190243148d186a5f1ed5cd674b751f6079710ab95 +size 5154565443 diff --git a/checkpoint-100000/rng_state.pth b/checkpoint-100000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..d6a0ae1a03db6a13be57849d9bb90669b4b23d08 --- /dev/null +++ b/checkpoint-100000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0714299d2503f04c887174fcb2c5995d31c2a8dd3d887f5907696d7a91cbcb1a +size 14503 diff --git a/checkpoint-100000/scheduler.pt b/checkpoint-100000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e089ed196cc8f8f755227f9636b98ddc6a5355ea --- /dev/null +++ b/checkpoint-100000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:591846b441d543caac3afc7202fecfc43bf20ba0c611a291457e9c81cc395399 +size 623 diff --git a/checkpoint-100000/stt_en_conformer_transducer_xlarge.nemo b/checkpoint-100000/stt_en_conformer_transducer_xlarge.nemo new file mode 100644 index 0000000000000000000000000000000000000000..fc2947e2b4f4453ef4aa3440e2f69b3d9824199d --- /dev/null +++ b/checkpoint-100000/stt_en_conformer_transducer_xlarge.nemo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4ddd41c1adabfce64125bbf639cadda2f044651386a1060440b2e49caea9f52 +size 2577971200 diff --git a/checkpoint-100000/trainer_state.json b/checkpoint-100000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1e7587284cdedc499a5f9eb3a4b9f77e520d830e --- /dev/null +++ b/checkpoint-100000/trainer_state.json @@ -0,0 +1,12061 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.376263185070443, + "global_step": 100000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 178.9465, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 164.9707, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 3e-05, + "loss": 142.2782, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4e-05, + "loss": 121.5122, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 91.8622, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 6e-05, + "loss": 82.2062, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 7e-05, + "loss": 72.6893, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 8e-05, + "loss": 71.8709, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 9e-05, + "loss": 69.9995, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001, + "loss": 70.6458, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 9.994977448744865e-05, + "loss": 73.9929, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 9.989954897489729e-05, + "loss": 66.52, + "step": 600 + }, + { + "epoch": 0.05, + "learning_rate": 9.984932346234594e-05, + "loss": 65.8947, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 9.979909794979458e-05, + "loss": 62.5809, + "step": 700 + }, + { + "epoch": 0.06, + "learning_rate": 9.974887243724323e-05, + "loss": 61.212, + "step": 750 + }, + { + "epoch": 0.06, + "learning_rate": 9.969864692469187e-05, + "loss": 68.2408, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 9.964842141214051e-05, + "loss": 61.5308, + "step": 850 + }, + { + "epoch": 0.07, + "learning_rate": 9.959819589958916e-05, + "loss": 58.9116, + "step": 900 + }, + { + "epoch": 0.07, + "learning_rate": 9.95479703870378e-05, + "loss": 60.0702, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 9.949774487448646e-05, + "loss": 57.6135, + "step": 1000 + }, + { + "epoch": 0.08, + "learning_rate": 9.944751936193509e-05, + "loss": 50.9231, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 9.939729384938373e-05, + "loss": 51.187, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 9.934706833683238e-05, + "loss": 52.1127, + "step": 1150 + }, + { + "epoch": 0.09, + "learning_rate": 9.929684282428102e-05, + "loss": 47.4608, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 9.924661731172968e-05, + "loss": 51.6108, + "step": 1250 + }, + { + "epoch": 0.1, + "learning_rate": 9.919639179917831e-05, + "loss": 46.5874, + "step": 1300 + }, + { + "epoch": 0.1, + "learning_rate": 9.914616628662697e-05, + "loss": 41.4706, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 9.90959407740756e-05, + "loss": 43.7544, + "step": 1400 + }, + { + "epoch": 0.11, + "learning_rate": 9.904571526152426e-05, + "loss": 44.6039, + "step": 1450 + }, + { + "epoch": 0.11, + "learning_rate": 9.899548974897289e-05, + "loss": 41.4384, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 9.894526423642154e-05, + "loss": 42.8289, + "step": 1550 + }, + { + "epoch": 0.12, + "learning_rate": 9.889503872387019e-05, + "loss": 39.9726, + "step": 1600 + }, + { + "epoch": 0.12, + "learning_rate": 9.884481321131882e-05, + "loss": 43.9533, + "step": 1650 + }, + { + "epoch": 0.13, + "learning_rate": 9.879458769876748e-05, + "loss": 38.7605, + "step": 1700 + }, + { + "epoch": 0.13, + "learning_rate": 9.87443621862161e-05, + "loss": 39.5425, + "step": 1750 + }, + { + "epoch": 0.13, + "learning_rate": 9.869413667366476e-05, + "loss": 37.588, + "step": 1800 + }, + { + "epoch": 0.14, + "learning_rate": 9.86439111611134e-05, + "loss": 39.7744, + "step": 1850 + }, + { + "epoch": 0.14, + "learning_rate": 9.859368564856205e-05, + "loss": 38.2154, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 9.85434601360107e-05, + "loss": 35.0806, + "step": 1950 + }, + { + "epoch": 0.15, + "learning_rate": 9.849323462345934e-05, + "loss": 39.061, + "step": 2000 + }, + { + "epoch": 0.15, + "learning_rate": 9.844300911090798e-05, + "loss": 35.1544, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 9.839278359835663e-05, + "loss": 38.123, + "step": 2100 + }, + { + "epoch": 0.16, + "learning_rate": 9.834255808580527e-05, + "loss": 33.1144, + "step": 2150 + }, + { + "epoch": 0.16, + "learning_rate": 9.829233257325392e-05, + "loss": 34.3476, + "step": 2200 + }, + { + "epoch": 0.17, + "learning_rate": 9.824210706070256e-05, + "loss": 29.5665, + "step": 2250 + }, + { + "epoch": 0.17, + "learning_rate": 9.81918815481512e-05, + "loss": 35.8756, + "step": 2300 + }, + { + "epoch": 0.17, + "learning_rate": 9.814165603559985e-05, + "loss": 37.2579, + "step": 2350 + }, + { + "epoch": 0.18, + "learning_rate": 9.809143052304849e-05, + "loss": 33.6245, + "step": 2400 + }, + { + "epoch": 0.18, + "learning_rate": 9.804120501049714e-05, + "loss": 35.6543, + "step": 2450 + }, + { + "epoch": 0.18, + "learning_rate": 9.799097949794578e-05, + "loss": 36.7847, + "step": 2500 + }, + { + "epoch": 0.19, + "learning_rate": 9.794075398539442e-05, + "loss": 33.463, + "step": 2550 + }, + { + "epoch": 0.19, + "learning_rate": 9.789052847284307e-05, + "loss": 32.2215, + "step": 2600 + }, + { + "epoch": 0.2, + "learning_rate": 9.784030296029171e-05, + "loss": 33.4301, + "step": 2650 + }, + { + "epoch": 0.2, + "learning_rate": 9.779007744774036e-05, + "loss": 29.9579, + "step": 2700 + }, + { + "epoch": 0.2, + "learning_rate": 9.773985193518901e-05, + "loss": 31.9141, + "step": 2750 + }, + { + "epoch": 0.21, + "learning_rate": 9.768962642263764e-05, + "loss": 33.2049, + "step": 2800 + }, + { + "epoch": 0.21, + "learning_rate": 9.763940091008629e-05, + "loss": 32.8774, + "step": 2850 + }, + { + "epoch": 0.21, + "learning_rate": 9.758917539753493e-05, + "loss": 29.0858, + "step": 2900 + }, + { + "epoch": 0.22, + "learning_rate": 9.753894988498358e-05, + "loss": 30.1145, + "step": 2950 + }, + { + "epoch": 0.22, + "learning_rate": 9.748872437243222e-05, + "loss": 27.6986, + "step": 3000 + }, + { + "epoch": 0.22, + "learning_rate": 9.743849885988087e-05, + "loss": 31.7807, + "step": 3050 + }, + { + "epoch": 0.23, + "learning_rate": 9.738827334732952e-05, + "loss": 30.5108, + "step": 3100 + }, + { + "epoch": 0.23, + "learning_rate": 9.733804783477815e-05, + "loss": 31.0909, + "step": 3150 + }, + { + "epoch": 0.24, + "learning_rate": 9.728782232222681e-05, + "loss": 27.9057, + "step": 3200 + }, + { + "epoch": 0.24, + "learning_rate": 9.723759680967544e-05, + "loss": 29.7323, + "step": 3250 + }, + { + "epoch": 0.24, + "learning_rate": 9.71873712971241e-05, + "loss": 29.7527, + "step": 3300 + }, + { + "epoch": 0.25, + "learning_rate": 9.713714578457273e-05, + "loss": 29.1442, + "step": 3350 + }, + { + "epoch": 0.25, + "learning_rate": 9.708692027202137e-05, + "loss": 30.8906, + "step": 3400 + }, + { + "epoch": 0.25, + "learning_rate": 9.703669475947003e-05, + "loss": 26.8419, + "step": 3450 + }, + { + "epoch": 0.26, + "learning_rate": 9.698646924691866e-05, + "loss": 29.2181, + "step": 3500 + }, + { + "epoch": 0.26, + "learning_rate": 9.693624373436732e-05, + "loss": 27.6549, + "step": 3550 + }, + { + "epoch": 0.27, + "learning_rate": 9.688601822181595e-05, + "loss": 34.0701, + "step": 3600 + }, + { + "epoch": 0.27, + "learning_rate": 9.683579270926461e-05, + "loss": 24.7487, + "step": 3650 + }, + { + "epoch": 0.27, + "learning_rate": 9.678556719671325e-05, + "loss": 30.0266, + "step": 3700 + }, + { + "epoch": 0.28, + "learning_rate": 9.67353416841619e-05, + "loss": 25.5011, + "step": 3750 + }, + { + "epoch": 0.28, + "learning_rate": 9.668511617161054e-05, + "loss": 26.1437, + "step": 3800 + }, + { + "epoch": 0.28, + "learning_rate": 9.663489065905918e-05, + "loss": 23.2303, + "step": 3850 + }, + { + "epoch": 0.29, + "learning_rate": 9.658466514650783e-05, + "loss": 26.357, + "step": 3900 + }, + { + "epoch": 0.29, + "learning_rate": 9.653443963395646e-05, + "loss": 27.2201, + "step": 3950 + }, + { + "epoch": 0.3, + "learning_rate": 9.648421412140512e-05, + "loss": 25.5695, + "step": 4000 + }, + { + "epoch": 0.3, + "learning_rate": 9.643398860885376e-05, + "loss": 24.8346, + "step": 4050 + }, + { + "epoch": 0.3, + "learning_rate": 9.63837630963024e-05, + "loss": 22.3957, + "step": 4100 + }, + { + "epoch": 0.31, + "learning_rate": 9.633353758375105e-05, + "loss": 24.9532, + "step": 4150 + }, + { + "epoch": 0.31, + "learning_rate": 9.628331207119969e-05, + "loss": 23.1574, + "step": 4200 + }, + { + "epoch": 0.31, + "learning_rate": 9.623308655864834e-05, + "loss": 23.7018, + "step": 4250 + }, + { + "epoch": 0.32, + "learning_rate": 9.618286104609698e-05, + "loss": 25.1433, + "step": 4300 + }, + { + "epoch": 0.32, + "learning_rate": 9.613263553354562e-05, + "loss": 25.0571, + "step": 4350 + }, + { + "epoch": 0.32, + "learning_rate": 9.608241002099427e-05, + "loss": 24.2231, + "step": 4400 + }, + { + "epoch": 0.33, + "learning_rate": 9.603218450844291e-05, + "loss": 23.0983, + "step": 4450 + }, + { + "epoch": 0.33, + "learning_rate": 9.598195899589156e-05, + "loss": 25.0078, + "step": 4500 + }, + { + "epoch": 0.34, + "learning_rate": 9.59317334833402e-05, + "loss": 20.6933, + "step": 4550 + }, + { + "epoch": 0.34, + "learning_rate": 9.588150797078884e-05, + "loss": 23.6196, + "step": 4600 + }, + { + "epoch": 0.34, + "learning_rate": 9.583128245823749e-05, + "loss": 25.2331, + "step": 4650 + }, + { + "epoch": 0.35, + "learning_rate": 9.578105694568613e-05, + "loss": 24.7932, + "step": 4700 + }, + { + "epoch": 0.35, + "learning_rate": 9.573083143313478e-05, + "loss": 24.3586, + "step": 4750 + }, + { + "epoch": 0.35, + "learning_rate": 9.568060592058342e-05, + "loss": 22.7161, + "step": 4800 + }, + { + "epoch": 0.36, + "learning_rate": 9.563038040803208e-05, + "loss": 22.4188, + "step": 4850 + }, + { + "epoch": 0.36, + "learning_rate": 9.558015489548071e-05, + "loss": 21.6516, + "step": 4900 + }, + { + "epoch": 0.37, + "learning_rate": 9.552992938292937e-05, + "loss": 21.78, + "step": 4950 + }, + { + "epoch": 0.37, + "learning_rate": 9.5479703870378e-05, + "loss": 21.0172, + "step": 5000 + }, + { + "epoch": 0.37, + "learning_rate": 9.542947835782665e-05, + "loss": 22.4624, + "step": 5050 + }, + { + "epoch": 0.38, + "learning_rate": 9.537925284527528e-05, + "loss": 23.6615, + "step": 5100 + }, + { + "epoch": 0.38, + "learning_rate": 9.532902733272393e-05, + "loss": 21.8091, + "step": 5150 + }, + { + "epoch": 0.38, + "learning_rate": 9.527880182017259e-05, + "loss": 21.4173, + "step": 5200 + }, + { + "epoch": 0.39, + "learning_rate": 9.522857630762122e-05, + "loss": 20.5415, + "step": 5250 + }, + { + "epoch": 0.39, + "learning_rate": 9.517835079506987e-05, + "loss": 21.0639, + "step": 5300 + }, + { + "epoch": 0.39, + "learning_rate": 9.51281252825185e-05, + "loss": 21.6078, + "step": 5350 + }, + { + "epoch": 0.4, + "learning_rate": 9.507789976996716e-05, + "loss": 19.4142, + "step": 5400 + }, + { + "epoch": 0.4, + "learning_rate": 9.50276742574158e-05, + "loss": 20.2504, + "step": 5450 + }, + { + "epoch": 0.41, + "learning_rate": 9.497744874486445e-05, + "loss": 23.8683, + "step": 5500 + }, + { + "epoch": 0.41, + "learning_rate": 9.49272232323131e-05, + "loss": 19.7559, + "step": 5550 + }, + { + "epoch": 0.41, + "learning_rate": 9.487699771976174e-05, + "loss": 21.1743, + "step": 5600 + }, + { + "epoch": 0.42, + "learning_rate": 9.482677220721038e-05, + "loss": 21.1908, + "step": 5650 + }, + { + "epoch": 0.42, + "learning_rate": 9.477654669465901e-05, + "loss": 20.9591, + "step": 5700 + }, + { + "epoch": 0.42, + "learning_rate": 9.472632118210767e-05, + "loss": 20.9036, + "step": 5750 + }, + { + "epoch": 0.43, + "learning_rate": 9.46760956695563e-05, + "loss": 22.249, + "step": 5800 + }, + { + "epoch": 0.43, + "learning_rate": 9.462587015700496e-05, + "loss": 19.1093, + "step": 5850 + }, + { + "epoch": 0.44, + "learning_rate": 9.45756446444536e-05, + "loss": 21.2714, + "step": 5900 + }, + { + "epoch": 0.44, + "learning_rate": 9.452541913190225e-05, + "loss": 21.3794, + "step": 5950 + }, + { + "epoch": 0.44, + "learning_rate": 9.447519361935089e-05, + "loss": 20.0326, + "step": 6000 + }, + { + "epoch": 0.45, + "learning_rate": 9.442496810679954e-05, + "loss": 19.8004, + "step": 6050 + }, + { + "epoch": 0.45, + "learning_rate": 9.437474259424818e-05, + "loss": 19.0229, + "step": 6100 + }, + { + "epoch": 0.45, + "learning_rate": 9.432451708169682e-05, + "loss": 17.6587, + "step": 6150 + }, + { + "epoch": 0.46, + "learning_rate": 9.427429156914547e-05, + "loss": 21.9247, + "step": 6200 + }, + { + "epoch": 0.46, + "learning_rate": 9.422406605659411e-05, + "loss": 19.743, + "step": 6250 + }, + { + "epoch": 0.46, + "learning_rate": 9.417384054404276e-05, + "loss": 22.9746, + "step": 6300 + }, + { + "epoch": 0.47, + "learning_rate": 9.41236150314914e-05, + "loss": 19.6693, + "step": 6350 + }, + { + "epoch": 0.47, + "learning_rate": 9.407338951894004e-05, + "loss": 19.1141, + "step": 6400 + }, + { + "epoch": 0.48, + "learning_rate": 9.402316400638869e-05, + "loss": 18.3847, + "step": 6450 + }, + { + "epoch": 0.48, + "learning_rate": 9.397293849383733e-05, + "loss": 18.9357, + "step": 6500 + }, + { + "epoch": 0.48, + "learning_rate": 9.392271298128598e-05, + "loss": 18.9316, + "step": 6550 + }, + { + "epoch": 0.49, + "learning_rate": 9.387248746873462e-05, + "loss": 20.9141, + "step": 6600 + }, + { + "epoch": 0.49, + "learning_rate": 9.382226195618326e-05, + "loss": 18.7472, + "step": 6650 + }, + { + "epoch": 0.49, + "learning_rate": 9.377203644363192e-05, + "loss": 18.8577, + "step": 6700 + }, + { + "epoch": 0.5, + "learning_rate": 9.372181093108055e-05, + "loss": 17.8061, + "step": 6750 + }, + { + "epoch": 0.5, + "learning_rate": 9.36715854185292e-05, + "loss": 19.4687, + "step": 6800 + }, + { + "epoch": 0.51, + "learning_rate": 9.362135990597784e-05, + "loss": 19.5103, + "step": 6850 + }, + { + "epoch": 0.51, + "learning_rate": 9.357113439342648e-05, + "loss": 18.5319, + "step": 6900 + }, + { + "epoch": 0.51, + "learning_rate": 9.352090888087514e-05, + "loss": 20.16, + "step": 6950 + }, + { + "epoch": 0.52, + "learning_rate": 9.347068336832377e-05, + "loss": 18.1913, + "step": 7000 + }, + { + "epoch": 0.52, + "learning_rate": 9.342045785577243e-05, + "loss": 21.341, + "step": 7050 + }, + { + "epoch": 0.52, + "learning_rate": 9.337023234322106e-05, + "loss": 16.7701, + "step": 7100 + }, + { + "epoch": 0.53, + "learning_rate": 9.332000683066972e-05, + "loss": 18.045, + "step": 7150 + }, + { + "epoch": 0.53, + "learning_rate": 9.326978131811835e-05, + "loss": 16.0393, + "step": 7200 + }, + { + "epoch": 0.53, + "learning_rate": 9.3219555805567e-05, + "loss": 17.4833, + "step": 7250 + }, + { + "epoch": 0.54, + "learning_rate": 9.316933029301565e-05, + "loss": 17.3978, + "step": 7300 + }, + { + "epoch": 0.54, + "learning_rate": 9.31191047804643e-05, + "loss": 18.2649, + "step": 7350 + }, + { + "epoch": 0.55, + "learning_rate": 9.306887926791294e-05, + "loss": 16.3891, + "step": 7400 + }, + { + "epoch": 0.55, + "learning_rate": 9.301865375536157e-05, + "loss": 21.4399, + "step": 7450 + }, + { + "epoch": 0.55, + "learning_rate": 9.296842824281023e-05, + "loss": 16.3082, + "step": 7500 + }, + { + "epoch": 0.56, + "learning_rate": 9.291820273025886e-05, + "loss": 14.8713, + "step": 7550 + }, + { + "epoch": 0.56, + "learning_rate": 9.286797721770751e-05, + "loss": 16.3099, + "step": 7600 + }, + { + "epoch": 0.56, + "learning_rate": 9.281775170515616e-05, + "loss": 17.8771, + "step": 7650 + }, + { + "epoch": 0.57, + "learning_rate": 9.27675261926048e-05, + "loss": 17.1421, + "step": 7700 + }, + { + "epoch": 0.57, + "learning_rate": 9.271730068005345e-05, + "loss": 16.6478, + "step": 7750 + }, + { + "epoch": 0.58, + "learning_rate": 9.266707516750209e-05, + "loss": 15.3247, + "step": 7800 + }, + { + "epoch": 0.58, + "learning_rate": 9.261684965495073e-05, + "loss": 17.6577, + "step": 7850 + }, + { + "epoch": 0.58, + "learning_rate": 9.256662414239938e-05, + "loss": 18.8549, + "step": 7900 + }, + { + "epoch": 0.59, + "learning_rate": 9.251639862984802e-05, + "loss": 17.4187, + "step": 7950 + }, + { + "epoch": 0.59, + "learning_rate": 9.246617311729667e-05, + "loss": 15.6643, + "step": 8000 + }, + { + "epoch": 0.59, + "learning_rate": 9.241594760474531e-05, + "loss": 17.1987, + "step": 8050 + }, + { + "epoch": 0.6, + "learning_rate": 9.236572209219396e-05, + "loss": 18.1712, + "step": 8100 + }, + { + "epoch": 0.6, + "learning_rate": 9.23154965796426e-05, + "loss": 15.8015, + "step": 8150 + }, + { + "epoch": 0.6, + "learning_rate": 9.226527106709124e-05, + "loss": 19.064, + "step": 8200 + }, + { + "epoch": 0.61, + "learning_rate": 9.221504555453989e-05, + "loss": 18.2748, + "step": 8250 + }, + { + "epoch": 0.61, + "learning_rate": 9.216482004198853e-05, + "loss": 15.0679, + "step": 8300 + }, + { + "epoch": 0.62, + "learning_rate": 9.211459452943718e-05, + "loss": 17.995, + "step": 8350 + }, + { + "epoch": 0.62, + "learning_rate": 9.206436901688582e-05, + "loss": 17.467, + "step": 8400 + }, + { + "epoch": 0.62, + "learning_rate": 9.201414350433448e-05, + "loss": 18.6665, + "step": 8450 + }, + { + "epoch": 0.63, + "learning_rate": 9.196391799178311e-05, + "loss": 17.2848, + "step": 8500 + }, + { + "epoch": 0.63, + "learning_rate": 9.191369247923175e-05, + "loss": 14.4767, + "step": 8550 + }, + { + "epoch": 0.63, + "learning_rate": 9.18634669666804e-05, + "loss": 17.5444, + "step": 8600 + }, + { + "epoch": 0.64, + "learning_rate": 9.181324145412904e-05, + "loss": 14.4661, + "step": 8650 + }, + { + "epoch": 0.64, + "learning_rate": 9.176301594157768e-05, + "loss": 16.3339, + "step": 8700 + }, + { + "epoch": 0.65, + "learning_rate": 9.171279042902633e-05, + "loss": 17.5122, + "step": 8750 + }, + { + "epoch": 0.65, + "learning_rate": 9.166256491647499e-05, + "loss": 16.7631, + "step": 8800 + }, + { + "epoch": 0.65, + "learning_rate": 9.161233940392362e-05, + "loss": 16.5193, + "step": 8850 + }, + { + "epoch": 0.66, + "learning_rate": 9.156211389137227e-05, + "loss": 17.8364, + "step": 8900 + }, + { + "epoch": 0.66, + "learning_rate": 9.15118883788209e-05, + "loss": 16.2916, + "step": 8950 + }, + { + "epoch": 0.66, + "learning_rate": 9.146166286626956e-05, + "loss": 14.1719, + "step": 9000 + }, + { + "epoch": 0.67, + "learning_rate": 9.141143735371819e-05, + "loss": 18.2987, + "step": 9050 + }, + { + "epoch": 0.67, + "learning_rate": 9.136121184116684e-05, + "loss": 17.4248, + "step": 9100 + }, + { + "epoch": 0.67, + "learning_rate": 9.13109863286155e-05, + "loss": 16.1862, + "step": 9150 + }, + { + "epoch": 0.68, + "learning_rate": 9.126076081606412e-05, + "loss": 16.3134, + "step": 9200 + }, + { + "epoch": 0.68, + "learning_rate": 9.121053530351278e-05, + "loss": 14.9158, + "step": 9250 + }, + { + "epoch": 0.69, + "learning_rate": 9.116030979096141e-05, + "loss": 15.2504, + "step": 9300 + }, + { + "epoch": 0.69, + "learning_rate": 9.111008427841007e-05, + "loss": 14.1967, + "step": 9350 + }, + { + "epoch": 0.69, + "learning_rate": 9.105985876585871e-05, + "loss": 17.3165, + "step": 9400 + }, + { + "epoch": 0.7, + "learning_rate": 9.100963325330736e-05, + "loss": 14.5912, + "step": 9450 + }, + { + "epoch": 0.7, + "learning_rate": 9.0959407740756e-05, + "loss": 17.5593, + "step": 9500 + }, + { + "epoch": 0.7, + "learning_rate": 9.090918222820465e-05, + "loss": 16.3421, + "step": 9550 + }, + { + "epoch": 0.71, + "learning_rate": 9.085895671565329e-05, + "loss": 16.2821, + "step": 9600 + }, + { + "epoch": 0.71, + "learning_rate": 9.080873120310192e-05, + "loss": 16.4985, + "step": 9650 + }, + { + "epoch": 0.72, + "learning_rate": 9.075850569055058e-05, + "loss": 16.1138, + "step": 9700 + }, + { + "epoch": 0.72, + "learning_rate": 9.070828017799922e-05, + "loss": 16.3997, + "step": 9750 + }, + { + "epoch": 0.72, + "learning_rate": 9.065805466544787e-05, + "loss": 15.518, + "step": 9800 + }, + { + "epoch": 0.73, + "learning_rate": 9.060782915289651e-05, + "loss": 13.8424, + "step": 9850 + }, + { + "epoch": 0.73, + "learning_rate": 9.055760364034515e-05, + "loss": 15.0784, + "step": 9900 + }, + { + "epoch": 0.73, + "learning_rate": 9.05073781277938e-05, + "loss": 14.0163, + "step": 9950 + }, + { + "epoch": 0.74, + "learning_rate": 9.045715261524244e-05, + "loss": 16.7863, + "step": 10000 + }, + { + "epoch": 0.74, + "learning_rate": 9.040692710269109e-05, + "loss": 13.6715, + "step": 10050 + }, + { + "epoch": 0.75, + "learning_rate": 9.035670159013973e-05, + "loss": 15.1071, + "step": 10100 + }, + { + "epoch": 0.75, + "learning_rate": 9.030647607758837e-05, + "loss": 14.2658, + "step": 10150 + }, + { + "epoch": 0.75, + "learning_rate": 9.025625056503703e-05, + "loss": 15.1115, + "step": 10200 + }, + { + "epoch": 0.76, + "learning_rate": 9.020602505248566e-05, + "loss": 14.028, + "step": 10250 + }, + { + "epoch": 0.76, + "learning_rate": 9.015579953993431e-05, + "loss": 13.3066, + "step": 10300 + }, + { + "epoch": 0.76, + "learning_rate": 9.010557402738295e-05, + "loss": 14.1185, + "step": 10350 + }, + { + "epoch": 0.77, + "learning_rate": 9.00553485148316e-05, + "loss": 14.061, + "step": 10400 + }, + { + "epoch": 0.77, + "learning_rate": 9.000512300228024e-05, + "loss": 15.2439, + "step": 10450 + }, + { + "epoch": 0.77, + "learning_rate": 8.995489748972888e-05, + "loss": 13.3617, + "step": 10500 + }, + { + "epoch": 0.78, + "learning_rate": 8.990467197717754e-05, + "loss": 14.5514, + "step": 10550 + }, + { + "epoch": 0.78, + "learning_rate": 8.985444646462617e-05, + "loss": 15.2426, + "step": 10600 + }, + { + "epoch": 0.79, + "learning_rate": 8.980422095207483e-05, + "loss": 16.6418, + "step": 10650 + }, + { + "epoch": 0.79, + "learning_rate": 8.975399543952346e-05, + "loss": 13.3146, + "step": 10700 + }, + { + "epoch": 0.79, + "learning_rate": 8.970376992697212e-05, + "loss": 14.9333, + "step": 10750 + }, + { + "epoch": 0.8, + "learning_rate": 8.965354441442075e-05, + "loss": 14.4502, + "step": 10800 + }, + { + "epoch": 0.8, + "learning_rate": 8.960331890186939e-05, + "loss": 14.7886, + "step": 10850 + }, + { + "epoch": 0.8, + "learning_rate": 8.955309338931805e-05, + "loss": 15.0266, + "step": 10900 + }, + { + "epoch": 0.81, + "learning_rate": 8.950286787676668e-05, + "loss": 14.543, + "step": 10950 + }, + { + "epoch": 0.81, + "learning_rate": 8.945264236421534e-05, + "loss": 15.8078, + "step": 11000 + }, + { + "epoch": 0.82, + "learning_rate": 8.940241685166397e-05, + "loss": 13.6052, + "step": 11050 + }, + { + "epoch": 0.82, + "learning_rate": 8.935219133911263e-05, + "loss": 14.2995, + "step": 11100 + }, + { + "epoch": 0.82, + "learning_rate": 8.930196582656126e-05, + "loss": 15.732, + "step": 11150 + }, + { + "epoch": 0.83, + "learning_rate": 8.925174031400991e-05, + "loss": 14.0573, + "step": 11200 + }, + { + "epoch": 0.83, + "learning_rate": 8.920151480145856e-05, + "loss": 17.5941, + "step": 11250 + }, + { + "epoch": 0.83, + "learning_rate": 8.91512892889072e-05, + "loss": 14.7829, + "step": 11300 + }, + { + "epoch": 0.84, + "learning_rate": 8.910106377635585e-05, + "loss": 14.6669, + "step": 11350 + }, + { + "epoch": 0.84, + "learning_rate": 8.905083826380448e-05, + "loss": 14.3315, + "step": 11400 + }, + { + "epoch": 0.84, + "learning_rate": 8.900061275125313e-05, + "loss": 14.2639, + "step": 11450 + }, + { + "epoch": 0.85, + "learning_rate": 8.895038723870176e-05, + "loss": 14.3226, + "step": 11500 + }, + { + "epoch": 0.85, + "learning_rate": 8.890016172615042e-05, + "loss": 14.4975, + "step": 11550 + }, + { + "epoch": 0.86, + "learning_rate": 8.884993621359907e-05, + "loss": 14.8436, + "step": 11600 + }, + { + "epoch": 0.86, + "learning_rate": 8.879971070104771e-05, + "loss": 13.8481, + "step": 11650 + }, + { + "epoch": 0.86, + "learning_rate": 8.874948518849635e-05, + "loss": 12.8151, + "step": 11700 + }, + { + "epoch": 0.87, + "learning_rate": 8.8699259675945e-05, + "loss": 13.1659, + "step": 11750 + }, + { + "epoch": 0.87, + "learning_rate": 8.864903416339364e-05, + "loss": 15.0919, + "step": 11800 + }, + { + "epoch": 0.87, + "learning_rate": 8.859880865084229e-05, + "loss": 14.4382, + "step": 11850 + }, + { + "epoch": 0.88, + "learning_rate": 8.854858313829093e-05, + "loss": 14.0989, + "step": 11900 + }, + { + "epoch": 0.88, + "learning_rate": 8.849835762573957e-05, + "loss": 14.5763, + "step": 11950 + }, + { + "epoch": 0.89, + "learning_rate": 8.844813211318822e-05, + "loss": 13.4144, + "step": 12000 + }, + { + "epoch": 0.89, + "learning_rate": 8.839790660063686e-05, + "loss": 15.6018, + "step": 12050 + }, + { + "epoch": 0.89, + "learning_rate": 8.83476810880855e-05, + "loss": 14.7849, + "step": 12100 + }, + { + "epoch": 0.9, + "learning_rate": 8.829745557553415e-05, + "loss": 14.441, + "step": 12150 + }, + { + "epoch": 0.9, + "learning_rate": 8.82472300629828e-05, + "loss": 14.2135, + "step": 12200 + }, + { + "epoch": 0.9, + "learning_rate": 8.819700455043144e-05, + "loss": 17.1245, + "step": 12250 + }, + { + "epoch": 0.91, + "learning_rate": 8.814677903788008e-05, + "loss": 14.6629, + "step": 12300 + }, + { + "epoch": 0.91, + "learning_rate": 8.809655352532873e-05, + "loss": 16.6715, + "step": 12350 + }, + { + "epoch": 0.91, + "learning_rate": 8.804632801277738e-05, + "loss": 13.0133, + "step": 12400 + }, + { + "epoch": 0.92, + "learning_rate": 8.799610250022601e-05, + "loss": 14.1551, + "step": 12450 + }, + { + "epoch": 0.92, + "learning_rate": 8.794587698767466e-05, + "loss": 14.019, + "step": 12500 + }, + { + "epoch": 0.93, + "learning_rate": 8.78956514751233e-05, + "loss": 14.4279, + "step": 12550 + }, + { + "epoch": 0.93, + "learning_rate": 8.784542596257195e-05, + "loss": 12.5293, + "step": 12600 + }, + { + "epoch": 0.93, + "learning_rate": 8.77952004500206e-05, + "loss": 15.0403, + "step": 12650 + }, + { + "epoch": 0.94, + "learning_rate": 8.774497493746924e-05, + "loss": 13.8193, + "step": 12700 + }, + { + "epoch": 0.94, + "learning_rate": 8.769474942491789e-05, + "loss": 13.1564, + "step": 12750 + }, + { + "epoch": 0.94, + "learning_rate": 8.764452391236652e-05, + "loss": 14.6415, + "step": 12800 + }, + { + "epoch": 0.95, + "learning_rate": 8.759429839981518e-05, + "loss": 12.2339, + "step": 12850 + }, + { + "epoch": 0.95, + "learning_rate": 8.754407288726381e-05, + "loss": 12.1604, + "step": 12900 + }, + { + "epoch": 0.96, + "learning_rate": 8.749384737471247e-05, + "loss": 15.4939, + "step": 12950 + }, + { + "epoch": 0.96, + "learning_rate": 8.744362186216111e-05, + "loss": 13.9713, + "step": 13000 + }, + { + "epoch": 0.96, + "learning_rate": 8.739339634960976e-05, + "loss": 14.0986, + "step": 13050 + }, + { + "epoch": 0.97, + "learning_rate": 8.73431708370584e-05, + "loss": 13.6334, + "step": 13100 + }, + { + "epoch": 0.97, + "learning_rate": 8.729294532450703e-05, + "loss": 13.5201, + "step": 13150 + }, + { + "epoch": 0.97, + "learning_rate": 8.724271981195569e-05, + "loss": 14.3793, + "step": 13200 + }, + { + "epoch": 0.98, + "learning_rate": 8.719249429940432e-05, + "loss": 13.1741, + "step": 13250 + }, + { + "epoch": 0.98, + "learning_rate": 8.714226878685298e-05, + "loss": 11.7782, + "step": 13300 + }, + { + "epoch": 0.98, + "learning_rate": 8.709204327430162e-05, + "loss": 12.2758, + "step": 13350 + }, + { + "epoch": 0.99, + "learning_rate": 8.704181776175027e-05, + "loss": 13.1723, + "step": 13400 + }, + { + "epoch": 0.99, + "learning_rate": 8.699159224919891e-05, + "loss": 14.0858, + "step": 13450 + }, + { + "epoch": 1.0, + "learning_rate": 8.694136673664755e-05, + "loss": 11.2836, + "step": 13500 + }, + { + "epoch": 1.0, + "learning_rate": 8.68911412240962e-05, + "loss": 15.7226, + "step": 13550 + }, + { + "epoch": 1.0, + "learning_rate": 8.684091571154484e-05, + "loss": 15.8889, + "step": 13600 + }, + { + "epoch": 1.01, + "learning_rate": 8.679069019899349e-05, + "loss": 12.2185, + "step": 13650 + }, + { + "epoch": 1.01, + "learning_rate": 8.674046468644213e-05, + "loss": 11.4647, + "step": 13700 + }, + { + "epoch": 1.01, + "learning_rate": 8.669023917389077e-05, + "loss": 13.1238, + "step": 13750 + }, + { + "epoch": 1.02, + "learning_rate": 8.664001366133942e-05, + "loss": 11.909, + "step": 13800 + }, + { + "epoch": 1.02, + "learning_rate": 8.658978814878806e-05, + "loss": 12.5478, + "step": 13850 + }, + { + "epoch": 1.03, + "learning_rate": 8.65395626362367e-05, + "loss": 13.017, + "step": 13900 + }, + { + "epoch": 1.03, + "learning_rate": 8.648933712368535e-05, + "loss": 12.9134, + "step": 13950 + }, + { + "epoch": 1.03, + "learning_rate": 8.6439111611134e-05, + "loss": 13.3485, + "step": 14000 + }, + { + "epoch": 1.04, + "learning_rate": 8.638888609858264e-05, + "loss": 11.4706, + "step": 14050 + }, + { + "epoch": 1.04, + "learning_rate": 8.633866058603128e-05, + "loss": 11.1063, + "step": 14100 + }, + { + "epoch": 1.04, + "learning_rate": 8.628843507347994e-05, + "loss": 12.7408, + "step": 14150 + }, + { + "epoch": 1.05, + "learning_rate": 8.623820956092857e-05, + "loss": 12.0689, + "step": 14200 + }, + { + "epoch": 1.05, + "learning_rate": 8.618798404837721e-05, + "loss": 11.0724, + "step": 14250 + }, + { + "epoch": 1.05, + "learning_rate": 8.613775853582586e-05, + "loss": 12.5685, + "step": 14300 + }, + { + "epoch": 1.06, + "learning_rate": 8.60875330232745e-05, + "loss": 12.7776, + "step": 14350 + }, + { + "epoch": 1.06, + "learning_rate": 8.603730751072315e-05, + "loss": 11.3066, + "step": 14400 + }, + { + "epoch": 1.07, + "learning_rate": 8.598708199817179e-05, + "loss": 13.06, + "step": 14450 + }, + { + "epoch": 1.07, + "learning_rate": 8.593685648562045e-05, + "loss": 15.6523, + "step": 14500 + }, + { + "epoch": 1.07, + "learning_rate": 8.588663097306908e-05, + "loss": 12.019, + "step": 14550 + }, + { + "epoch": 1.08, + "learning_rate": 8.583640546051774e-05, + "loss": 11.0941, + "step": 14600 + }, + { + "epoch": 1.08, + "learning_rate": 8.578617994796637e-05, + "loss": 12.4755, + "step": 14650 + }, + { + "epoch": 1.08, + "learning_rate": 8.573595443541502e-05, + "loss": 13.7012, + "step": 14700 + }, + { + "epoch": 1.09, + "learning_rate": 8.568572892286366e-05, + "loss": 12.2024, + "step": 14750 + }, + { + "epoch": 1.09, + "learning_rate": 8.56355034103123e-05, + "loss": 12.4744, + "step": 14800 + }, + { + "epoch": 1.1, + "learning_rate": 8.558527789776096e-05, + "loss": 12.3234, + "step": 14850 + }, + { + "epoch": 1.1, + "learning_rate": 8.553505238520959e-05, + "loss": 12.5616, + "step": 14900 + }, + { + "epoch": 1.1, + "learning_rate": 8.548482687265824e-05, + "loss": 11.9559, + "step": 14950 + }, + { + "epoch": 1.11, + "learning_rate": 8.543460136010688e-05, + "loss": 12.0734, + "step": 15000 + }, + { + "epoch": 1.11, + "learning_rate": 8.538437584755553e-05, + "loss": 13.0341, + "step": 15050 + }, + { + "epoch": 1.11, + "learning_rate": 8.533415033500418e-05, + "loss": 12.7406, + "step": 15100 + }, + { + "epoch": 1.12, + "learning_rate": 8.528392482245282e-05, + "loss": 11.7258, + "step": 15150 + }, + { + "epoch": 1.12, + "learning_rate": 8.523369930990147e-05, + "loss": 11.8709, + "step": 15200 + }, + { + "epoch": 1.12, + "learning_rate": 8.518347379735011e-05, + "loss": 11.7021, + "step": 15250 + }, + { + "epoch": 1.13, + "learning_rate": 8.513324828479875e-05, + "loss": 13.2674, + "step": 15300 + }, + { + "epoch": 1.13, + "learning_rate": 8.508302277224738e-05, + "loss": 11.9099, + "step": 15350 + }, + { + "epoch": 1.14, + "learning_rate": 8.503279725969604e-05, + "loss": 11.7841, + "step": 15400 + }, + { + "epoch": 1.14, + "learning_rate": 8.498257174714469e-05, + "loss": 11.9573, + "step": 15450 + }, + { + "epoch": 1.14, + "learning_rate": 8.493234623459333e-05, + "loss": 11.7211, + "step": 15500 + }, + { + "epoch": 1.15, + "learning_rate": 8.488212072204197e-05, + "loss": 12.3513, + "step": 15550 + }, + { + "epoch": 1.15, + "learning_rate": 8.483189520949062e-05, + "loss": 11.0709, + "step": 15600 + }, + { + "epoch": 1.15, + "learning_rate": 8.478166969693926e-05, + "loss": 11.6544, + "step": 15650 + }, + { + "epoch": 1.16, + "learning_rate": 8.47314441843879e-05, + "loss": 11.8285, + "step": 15700 + }, + { + "epoch": 1.16, + "learning_rate": 8.468121867183655e-05, + "loss": 10.4208, + "step": 15750 + }, + { + "epoch": 1.17, + "learning_rate": 8.46309931592852e-05, + "loss": 10.7821, + "step": 15800 + }, + { + "epoch": 1.17, + "learning_rate": 8.458076764673384e-05, + "loss": 13.2724, + "step": 15850 + }, + { + "epoch": 1.17, + "learning_rate": 8.45305421341825e-05, + "loss": 10.9219, + "step": 15900 + }, + { + "epoch": 1.18, + "learning_rate": 8.448031662163113e-05, + "loss": 12.2532, + "step": 15950 + }, + { + "epoch": 1.18, + "learning_rate": 8.443009110907977e-05, + "loss": 11.0132, + "step": 16000 + }, + { + "epoch": 1.18, + "learning_rate": 8.437986559652841e-05, + "loss": 12.319, + "step": 16050 + }, + { + "epoch": 1.19, + "learning_rate": 8.432964008397706e-05, + "loss": 12.9871, + "step": 16100 + }, + { + "epoch": 1.19, + "learning_rate": 8.42794145714257e-05, + "loss": 12.0625, + "step": 16150 + }, + { + "epoch": 1.19, + "learning_rate": 8.422918905887435e-05, + "loss": 13.4629, + "step": 16200 + }, + { + "epoch": 1.2, + "learning_rate": 8.4178963546323e-05, + "loss": 10.9291, + "step": 16250 + }, + { + "epoch": 1.2, + "learning_rate": 8.412873803377163e-05, + "loss": 13.7719, + "step": 16300 + }, + { + "epoch": 1.21, + "learning_rate": 8.407851252122029e-05, + "loss": 11.3634, + "step": 16350 + }, + { + "epoch": 1.21, + "learning_rate": 8.402828700866892e-05, + "loss": 12.7941, + "step": 16400 + }, + { + "epoch": 1.21, + "learning_rate": 8.397806149611758e-05, + "loss": 11.8863, + "step": 16450 + }, + { + "epoch": 1.22, + "learning_rate": 8.392783598356621e-05, + "loss": 9.5225, + "step": 16500 + }, + { + "epoch": 1.22, + "learning_rate": 8.387761047101485e-05, + "loss": 12.983, + "step": 16550 + }, + { + "epoch": 1.22, + "learning_rate": 8.382738495846351e-05, + "loss": 11.8489, + "step": 16600 + }, + { + "epoch": 1.23, + "learning_rate": 8.377715944591214e-05, + "loss": 11.8122, + "step": 16650 + }, + { + "epoch": 1.23, + "learning_rate": 8.37269339333608e-05, + "loss": 12.3387, + "step": 16700 + }, + { + "epoch": 1.24, + "learning_rate": 8.367670842080943e-05, + "loss": 13.4648, + "step": 16750 + }, + { + "epoch": 1.24, + "learning_rate": 8.362648290825809e-05, + "loss": 10.2301, + "step": 16800 + }, + { + "epoch": 1.24, + "learning_rate": 8.357625739570672e-05, + "loss": 11.492, + "step": 16850 + }, + { + "epoch": 1.25, + "learning_rate": 8.352603188315538e-05, + "loss": 12.5997, + "step": 16900 + }, + { + "epoch": 1.25, + "learning_rate": 8.347580637060402e-05, + "loss": 11.5588, + "step": 16950 + }, + { + "epoch": 1.25, + "learning_rate": 8.342558085805266e-05, + "loss": 11.8627, + "step": 17000 + }, + { + "epoch": 1.26, + "learning_rate": 8.337535534550131e-05, + "loss": 13.2469, + "step": 17050 + }, + { + "epoch": 1.26, + "learning_rate": 8.332512983294994e-05, + "loss": 10.4327, + "step": 17100 + }, + { + "epoch": 1.27, + "learning_rate": 8.32749043203986e-05, + "loss": 12.7566, + "step": 17150 + }, + { + "epoch": 1.27, + "learning_rate": 8.322467880784723e-05, + "loss": 11.0729, + "step": 17200 + }, + { + "epoch": 1.27, + "learning_rate": 8.317445329529588e-05, + "loss": 12.3484, + "step": 17250 + }, + { + "epoch": 1.28, + "learning_rate": 8.312422778274453e-05, + "loss": 10.5193, + "step": 17300 + }, + { + "epoch": 1.28, + "learning_rate": 8.307400227019317e-05, + "loss": 12.2369, + "step": 17350 + }, + { + "epoch": 1.28, + "learning_rate": 8.302377675764182e-05, + "loss": 12.2976, + "step": 17400 + }, + { + "epoch": 1.29, + "learning_rate": 8.297355124509046e-05, + "loss": 12.3852, + "step": 17450 + }, + { + "epoch": 1.29, + "learning_rate": 8.29233257325391e-05, + "loss": 11.2137, + "step": 17500 + }, + { + "epoch": 1.29, + "learning_rate": 8.287310021998775e-05, + "loss": 11.609, + "step": 17550 + }, + { + "epoch": 1.3, + "learning_rate": 8.282287470743639e-05, + "loss": 13.3339, + "step": 17600 + }, + { + "epoch": 1.3, + "learning_rate": 8.277264919488504e-05, + "loss": 11.4263, + "step": 17650 + }, + { + "epoch": 1.31, + "learning_rate": 8.272242368233368e-05, + "loss": 12.6949, + "step": 17700 + }, + { + "epoch": 1.31, + "learning_rate": 8.267219816978233e-05, + "loss": 11.4767, + "step": 17750 + }, + { + "epoch": 1.31, + "learning_rate": 8.262197265723097e-05, + "loss": 12.2225, + "step": 17800 + }, + { + "epoch": 1.32, + "learning_rate": 8.257174714467961e-05, + "loss": 11.0755, + "step": 17850 + }, + { + "epoch": 1.32, + "learning_rate": 8.252152163212826e-05, + "loss": 11.9677, + "step": 17900 + }, + { + "epoch": 1.32, + "learning_rate": 8.24712961195769e-05, + "loss": 11.098, + "step": 17950 + }, + { + "epoch": 1.33, + "learning_rate": 8.242107060702555e-05, + "loss": 11.1102, + "step": 18000 + }, + { + "epoch": 1.33, + "learning_rate": 8.237084509447419e-05, + "loss": 11.4985, + "step": 18050 + }, + { + "epoch": 1.34, + "learning_rate": 8.232061958192285e-05, + "loss": 11.7356, + "step": 18100 + }, + { + "epoch": 1.34, + "learning_rate": 8.227039406937148e-05, + "loss": 11.3336, + "step": 18150 + }, + { + "epoch": 1.34, + "learning_rate": 8.222016855682012e-05, + "loss": 11.0448, + "step": 18200 + }, + { + "epoch": 1.35, + "learning_rate": 8.216994304426877e-05, + "loss": 10.9986, + "step": 18250 + }, + { + "epoch": 1.35, + "learning_rate": 8.211971753171741e-05, + "loss": 10.768, + "step": 18300 + }, + { + "epoch": 1.35, + "learning_rate": 8.206949201916607e-05, + "loss": 11.6844, + "step": 18350 + }, + { + "epoch": 1.36, + "learning_rate": 8.20192665066147e-05, + "loss": 11.5615, + "step": 18400 + }, + { + "epoch": 1.36, + "learning_rate": 8.196904099406336e-05, + "loss": 11.4019, + "step": 18450 + }, + { + "epoch": 1.36, + "learning_rate": 8.191881548151199e-05, + "loss": 12.1784, + "step": 18500 + }, + { + "epoch": 1.37, + "learning_rate": 8.186858996896064e-05, + "loss": 12.4565, + "step": 18550 + }, + { + "epoch": 1.37, + "learning_rate": 8.181836445640927e-05, + "loss": 11.0557, + "step": 18600 + }, + { + "epoch": 1.38, + "learning_rate": 8.176813894385793e-05, + "loss": 12.1892, + "step": 18650 + }, + { + "epoch": 1.38, + "learning_rate": 8.171791343130658e-05, + "loss": 12.0531, + "step": 18700 + }, + { + "epoch": 1.38, + "learning_rate": 8.166768791875522e-05, + "loss": 10.1791, + "step": 18750 + }, + { + "epoch": 1.39, + "learning_rate": 8.161746240620386e-05, + "loss": 11.2501, + "step": 18800 + }, + { + "epoch": 1.39, + "learning_rate": 8.15672368936525e-05, + "loss": 9.92, + "step": 18850 + }, + { + "epoch": 1.39, + "learning_rate": 8.151701138110115e-05, + "loss": 10.0603, + "step": 18900 + }, + { + "epoch": 1.4, + "learning_rate": 8.146678586854978e-05, + "loss": 10.9477, + "step": 18950 + }, + { + "epoch": 1.4, + "learning_rate": 8.141656035599844e-05, + "loss": 9.7579, + "step": 19000 + }, + { + "epoch": 1.41, + "learning_rate": 8.136633484344708e-05, + "loss": 11.243, + "step": 19050 + }, + { + "epoch": 1.41, + "learning_rate": 8.131610933089573e-05, + "loss": 11.0069, + "step": 19100 + }, + { + "epoch": 1.41, + "learning_rate": 8.126588381834437e-05, + "loss": 9.7387, + "step": 19150 + }, + { + "epoch": 1.42, + "learning_rate": 8.121565830579302e-05, + "loss": 11.4624, + "step": 19200 + }, + { + "epoch": 1.42, + "learning_rate": 8.116543279324166e-05, + "loss": 12.1299, + "step": 19250 + }, + { + "epoch": 1.42, + "learning_rate": 8.11152072806903e-05, + "loss": 12.2796, + "step": 19300 + }, + { + "epoch": 1.43, + "learning_rate": 8.106498176813895e-05, + "loss": 10.3295, + "step": 19350 + }, + { + "epoch": 1.43, + "learning_rate": 8.101475625558759e-05, + "loss": 10.0709, + "step": 19400 + }, + { + "epoch": 1.43, + "learning_rate": 8.096453074303624e-05, + "loss": 11.0725, + "step": 19450 + }, + { + "epoch": 1.44, + "learning_rate": 8.091430523048488e-05, + "loss": 10.7882, + "step": 19500 + }, + { + "epoch": 1.44, + "learning_rate": 8.086407971793352e-05, + "loss": 11.4124, + "step": 19550 + }, + { + "epoch": 1.45, + "learning_rate": 8.081385420538217e-05, + "loss": 10.4941, + "step": 19600 + }, + { + "epoch": 1.45, + "learning_rate": 8.076362869283081e-05, + "loss": 11.8687, + "step": 19650 + }, + { + "epoch": 1.45, + "learning_rate": 8.071340318027946e-05, + "loss": 11.3221, + "step": 19700 + }, + { + "epoch": 1.46, + "learning_rate": 8.06631776677281e-05, + "loss": 10.2167, + "step": 19750 + }, + { + "epoch": 1.46, + "learning_rate": 8.061295215517675e-05, + "loss": 10.5425, + "step": 19800 + }, + { + "epoch": 1.46, + "learning_rate": 8.05627266426254e-05, + "loss": 11.2982, + "step": 19850 + }, + { + "epoch": 1.47, + "learning_rate": 8.051250113007403e-05, + "loss": 12.0685, + "step": 19900 + }, + { + "epoch": 1.47, + "learning_rate": 8.046227561752268e-05, + "loss": 10.6613, + "step": 19950 + }, + { + "epoch": 1.48, + "learning_rate": 8.041205010497132e-05, + "loss": 10.8245, + "step": 20000 + }, + { + "epoch": 1.48, + "eval_loss": 10.409339904785156, + "eval_runtime": 890.9956, + "eval_samples_per_second": 14.7, + "eval_steps_per_second": 3.676, + "eval_wer": 0.2624627273109067, + "step": 20000 + }, + { + "epoch": 1.48, + "learning_rate": 8.036182459241997e-05, + "loss": 10.671, + "step": 20050 + }, + { + "epoch": 1.48, + "learning_rate": 8.031159907986861e-05, + "loss": 11.0263, + "step": 20100 + }, + { + "epoch": 1.49, + "learning_rate": 8.026137356731725e-05, + "loss": 11.0571, + "step": 20150 + }, + { + "epoch": 1.49, + "learning_rate": 8.021114805476591e-05, + "loss": 13.0778, + "step": 20200 + }, + { + "epoch": 1.49, + "learning_rate": 8.016092254221454e-05, + "loss": 11.0495, + "step": 20250 + }, + { + "epoch": 1.5, + "learning_rate": 8.01106970296632e-05, + "loss": 10.6039, + "step": 20300 + }, + { + "epoch": 1.5, + "learning_rate": 8.006047151711183e-05, + "loss": 11.4221, + "step": 20350 + }, + { + "epoch": 1.5, + "learning_rate": 8.001024600456049e-05, + "loss": 10.7975, + "step": 20400 + }, + { + "epoch": 1.51, + "learning_rate": 7.996002049200912e-05, + "loss": 10.1123, + "step": 20450 + }, + { + "epoch": 1.51, + "learning_rate": 7.990979497945776e-05, + "loss": 10.2241, + "step": 20500 + }, + { + "epoch": 1.52, + "learning_rate": 7.985956946690642e-05, + "loss": 10.0191, + "step": 20550 + }, + { + "epoch": 1.52, + "learning_rate": 7.980934395435505e-05, + "loss": 10.649, + "step": 20600 + }, + { + "epoch": 1.52, + "learning_rate": 7.975911844180371e-05, + "loss": 9.6091, + "step": 20650 + }, + { + "epoch": 1.53, + "learning_rate": 7.970889292925234e-05, + "loss": 9.9386, + "step": 20700 + }, + { + "epoch": 1.53, + "learning_rate": 7.9658667416701e-05, + "loss": 11.2646, + "step": 20750 + }, + { + "epoch": 1.53, + "learning_rate": 7.960844190414964e-05, + "loss": 10.0181, + "step": 20800 + }, + { + "epoch": 1.54, + "learning_rate": 7.955821639159828e-05, + "loss": 11.9437, + "step": 20850 + }, + { + "epoch": 1.54, + "learning_rate": 7.950799087904693e-05, + "loss": 10.9254, + "step": 20900 + }, + { + "epoch": 1.55, + "learning_rate": 7.945776536649557e-05, + "loss": 11.7954, + "step": 20950 + }, + { + "epoch": 1.55, + "learning_rate": 7.940753985394422e-05, + "loss": 9.6569, + "step": 21000 + }, + { + "epoch": 1.55, + "learning_rate": 7.935731434139286e-05, + "loss": 10.6546, + "step": 21050 + }, + { + "epoch": 1.56, + "learning_rate": 7.93070888288415e-05, + "loss": 10.2795, + "step": 21100 + }, + { + "epoch": 1.56, + "learning_rate": 7.925686331629015e-05, + "loss": 10.4595, + "step": 21150 + }, + { + "epoch": 1.56, + "learning_rate": 7.920663780373879e-05, + "loss": 9.2921, + "step": 21200 + }, + { + "epoch": 1.57, + "learning_rate": 7.915641229118744e-05, + "loss": 10.1245, + "step": 21250 + }, + { + "epoch": 1.57, + "learning_rate": 7.910618677863608e-05, + "loss": 11.2896, + "step": 21300 + }, + { + "epoch": 1.57, + "learning_rate": 7.905596126608472e-05, + "loss": 11.3328, + "step": 21350 + }, + { + "epoch": 1.58, + "learning_rate": 7.900573575353337e-05, + "loss": 10.0718, + "step": 21400 + }, + { + "epoch": 1.58, + "learning_rate": 7.895551024098201e-05, + "loss": 10.8954, + "step": 21450 + }, + { + "epoch": 1.59, + "learning_rate": 7.890528472843066e-05, + "loss": 10.2921, + "step": 21500 + }, + { + "epoch": 1.59, + "learning_rate": 7.88550592158793e-05, + "loss": 9.4609, + "step": 21550 + }, + { + "epoch": 1.59, + "learning_rate": 7.880483370332796e-05, + "loss": 11.4751, + "step": 21600 + }, + { + "epoch": 1.6, + "learning_rate": 7.875460819077659e-05, + "loss": 10.1189, + "step": 21650 + }, + { + "epoch": 1.6, + "learning_rate": 7.870438267822523e-05, + "loss": 11.6478, + "step": 21700 + }, + { + "epoch": 1.6, + "learning_rate": 7.865415716567388e-05, + "loss": 11.2943, + "step": 21750 + }, + { + "epoch": 1.61, + "learning_rate": 7.860393165312252e-05, + "loss": 11.5788, + "step": 21800 + }, + { + "epoch": 1.61, + "learning_rate": 7.855370614057116e-05, + "loss": 10.638, + "step": 21850 + }, + { + "epoch": 1.62, + "learning_rate": 7.850348062801981e-05, + "loss": 9.2895, + "step": 21900 + }, + { + "epoch": 1.62, + "learning_rate": 7.845325511546847e-05, + "loss": 11.4984, + "step": 21950 + }, + { + "epoch": 1.62, + "learning_rate": 7.84030296029171e-05, + "loss": 10.3685, + "step": 22000 + }, + { + "epoch": 1.63, + "learning_rate": 7.835280409036575e-05, + "loss": 10.0115, + "step": 22050 + }, + { + "epoch": 1.63, + "learning_rate": 7.830257857781439e-05, + "loss": 10.2941, + "step": 22100 + }, + { + "epoch": 1.63, + "learning_rate": 7.825235306526304e-05, + "loss": 10.8751, + "step": 22150 + }, + { + "epoch": 1.64, + "learning_rate": 7.820212755271167e-05, + "loss": 10.7477, + "step": 22200 + }, + { + "epoch": 1.64, + "learning_rate": 7.815190204016032e-05, + "loss": 12.2573, + "step": 22250 + }, + { + "epoch": 1.64, + "learning_rate": 7.810167652760897e-05, + "loss": 10.1055, + "step": 22300 + }, + { + "epoch": 1.65, + "learning_rate": 7.80514510150576e-05, + "loss": 10.7913, + "step": 22350 + }, + { + "epoch": 1.65, + "learning_rate": 7.800122550250626e-05, + "loss": 9.4701, + "step": 22400 + }, + { + "epoch": 1.66, + "learning_rate": 7.79509999899549e-05, + "loss": 9.9434, + "step": 22450 + }, + { + "epoch": 1.66, + "learning_rate": 7.790077447740355e-05, + "loss": 10.9016, + "step": 22500 + }, + { + "epoch": 1.66, + "learning_rate": 7.785054896485218e-05, + "loss": 10.1733, + "step": 22550 + }, + { + "epoch": 1.67, + "learning_rate": 7.780032345230084e-05, + "loss": 11.0693, + "step": 22600 + }, + { + "epoch": 1.67, + "learning_rate": 7.775009793974948e-05, + "loss": 10.4538, + "step": 22650 + }, + { + "epoch": 1.67, + "learning_rate": 7.769987242719813e-05, + "loss": 10.5127, + "step": 22700 + }, + { + "epoch": 1.68, + "learning_rate": 7.764964691464677e-05, + "loss": 10.1074, + "step": 22750 + }, + { + "epoch": 1.68, + "learning_rate": 7.75994214020954e-05, + "loss": 11.2803, + "step": 22800 + }, + { + "epoch": 1.69, + "learning_rate": 7.754919588954406e-05, + "loss": 10.9954, + "step": 22850 + }, + { + "epoch": 1.69, + "learning_rate": 7.749897037699269e-05, + "loss": 10.1006, + "step": 22900 + }, + { + "epoch": 1.69, + "learning_rate": 7.744874486444135e-05, + "loss": 10.9978, + "step": 22950 + }, + { + "epoch": 1.7, + "learning_rate": 7.739851935188999e-05, + "loss": 10.5885, + "step": 23000 + }, + { + "epoch": 1.7, + "learning_rate": 7.734829383933864e-05, + "loss": 10.5676, + "step": 23050 + }, + { + "epoch": 1.7, + "learning_rate": 7.729806832678728e-05, + "loss": 11.3204, + "step": 23100 + }, + { + "epoch": 1.71, + "learning_rate": 7.724784281423592e-05, + "loss": 10.5388, + "step": 23150 + }, + { + "epoch": 1.71, + "learning_rate": 7.719761730168457e-05, + "loss": 10.7915, + "step": 23200 + }, + { + "epoch": 1.71, + "learning_rate": 7.714739178913321e-05, + "loss": 11.9486, + "step": 23250 + }, + { + "epoch": 1.72, + "learning_rate": 7.709716627658186e-05, + "loss": 11.6693, + "step": 23300 + }, + { + "epoch": 1.72, + "learning_rate": 7.70469407640305e-05, + "loss": 9.2664, + "step": 23350 + }, + { + "epoch": 1.73, + "learning_rate": 7.699671525147914e-05, + "loss": 12.1429, + "step": 23400 + }, + { + "epoch": 1.73, + "learning_rate": 7.694648973892779e-05, + "loss": 10.1155, + "step": 23450 + }, + { + "epoch": 1.73, + "learning_rate": 7.689626422637643e-05, + "loss": 10.1562, + "step": 23500 + }, + { + "epoch": 1.74, + "learning_rate": 7.684603871382508e-05, + "loss": 11.3484, + "step": 23550 + }, + { + "epoch": 1.74, + "learning_rate": 7.679581320127372e-05, + "loss": 9.5912, + "step": 23600 + }, + { + "epoch": 1.74, + "learning_rate": 7.674558768872236e-05, + "loss": 11.1067, + "step": 23650 + }, + { + "epoch": 1.75, + "learning_rate": 7.669536217617101e-05, + "loss": 11.7182, + "step": 23700 + }, + { + "epoch": 1.75, + "learning_rate": 7.664513666361965e-05, + "loss": 10.1444, + "step": 23750 + }, + { + "epoch": 1.76, + "learning_rate": 7.659491115106831e-05, + "loss": 11.2671, + "step": 23800 + }, + { + "epoch": 1.76, + "learning_rate": 7.654468563851694e-05, + "loss": 10.9027, + "step": 23850 + }, + { + "epoch": 1.76, + "learning_rate": 7.64944601259656e-05, + "loss": 10.9078, + "step": 23900 + }, + { + "epoch": 1.77, + "learning_rate": 7.644423461341423e-05, + "loss": 10.5441, + "step": 23950 + }, + { + "epoch": 1.77, + "learning_rate": 7.639400910086287e-05, + "loss": 9.8617, + "step": 24000 + }, + { + "epoch": 1.77, + "learning_rate": 7.634378358831153e-05, + "loss": 10.8022, + "step": 24050 + }, + { + "epoch": 1.78, + "learning_rate": 7.629355807576016e-05, + "loss": 10.3082, + "step": 24100 + }, + { + "epoch": 1.78, + "learning_rate": 7.624333256320882e-05, + "loss": 9.8398, + "step": 24150 + }, + { + "epoch": 1.79, + "learning_rate": 7.619310705065745e-05, + "loss": 10.3631, + "step": 24200 + }, + { + "epoch": 1.79, + "learning_rate": 7.61428815381061e-05, + "loss": 10.6078, + "step": 24250 + }, + { + "epoch": 1.79, + "learning_rate": 7.609265602555474e-05, + "loss": 11.366, + "step": 24300 + }, + { + "epoch": 1.8, + "learning_rate": 7.60424305130034e-05, + "loss": 12.1154, + "step": 24350 + }, + { + "epoch": 1.8, + "learning_rate": 7.599220500045204e-05, + "loss": 11.3429, + "step": 24400 + }, + { + "epoch": 1.8, + "learning_rate": 7.594197948790068e-05, + "loss": 9.135, + "step": 24450 + }, + { + "epoch": 1.81, + "learning_rate": 7.589175397534933e-05, + "loss": 10.3796, + "step": 24500 + }, + { + "epoch": 1.81, + "learning_rate": 7.584152846279796e-05, + "loss": 10.6452, + "step": 24550 + }, + { + "epoch": 1.81, + "learning_rate": 7.579130295024661e-05, + "loss": 9.6237, + "step": 24600 + }, + { + "epoch": 1.82, + "learning_rate": 7.574107743769525e-05, + "loss": 10.7158, + "step": 24650 + }, + { + "epoch": 1.82, + "learning_rate": 7.56908519251439e-05, + "loss": 9.8296, + "step": 24700 + }, + { + "epoch": 1.83, + "learning_rate": 7.564062641259255e-05, + "loss": 10.1654, + "step": 24750 + }, + { + "epoch": 1.83, + "learning_rate": 7.559040090004119e-05, + "loss": 10.395, + "step": 24800 + }, + { + "epoch": 1.83, + "learning_rate": 7.554017538748984e-05, + "loss": 10.3067, + "step": 24850 + }, + { + "epoch": 1.84, + "learning_rate": 7.548994987493848e-05, + "loss": 10.7243, + "step": 24900 + }, + { + "epoch": 1.84, + "learning_rate": 7.543972436238712e-05, + "loss": 10.4022, + "step": 24950 + }, + { + "epoch": 1.84, + "learning_rate": 7.538949884983577e-05, + "loss": 10.5045, + "step": 25000 + }, + { + "epoch": 1.85, + "learning_rate": 7.533927333728441e-05, + "loss": 11.2205, + "step": 25050 + }, + { + "epoch": 1.85, + "learning_rate": 7.528904782473306e-05, + "loss": 10.5375, + "step": 25100 + }, + { + "epoch": 1.86, + "learning_rate": 7.52388223121817e-05, + "loss": 10.4876, + "step": 25150 + }, + { + "epoch": 1.86, + "learning_rate": 7.518859679963034e-05, + "loss": 9.2096, + "step": 25200 + }, + { + "epoch": 1.86, + "learning_rate": 7.513837128707899e-05, + "loss": 10.0442, + "step": 25250 + }, + { + "epoch": 1.87, + "learning_rate": 7.508814577452763e-05, + "loss": 9.8174, + "step": 25300 + }, + { + "epoch": 1.87, + "learning_rate": 7.503792026197628e-05, + "loss": 10.8789, + "step": 25350 + }, + { + "epoch": 1.87, + "learning_rate": 7.498769474942492e-05, + "loss": 9.8789, + "step": 25400 + }, + { + "epoch": 1.88, + "learning_rate": 7.493746923687356e-05, + "loss": 11.1431, + "step": 25450 + }, + { + "epoch": 1.88, + "learning_rate": 7.488724372432221e-05, + "loss": 10.4659, + "step": 25500 + }, + { + "epoch": 1.88, + "learning_rate": 7.483701821177087e-05, + "loss": 10.7342, + "step": 25550 + }, + { + "epoch": 1.89, + "learning_rate": 7.47867926992195e-05, + "loss": 10.7841, + "step": 25600 + }, + { + "epoch": 1.89, + "learning_rate": 7.473656718666814e-05, + "loss": 9.6162, + "step": 25650 + }, + { + "epoch": 1.9, + "learning_rate": 7.468634167411678e-05, + "loss": 10.3568, + "step": 25700 + }, + { + "epoch": 1.9, + "learning_rate": 7.463611616156543e-05, + "loss": 9.6701, + "step": 25750 + }, + { + "epoch": 1.9, + "learning_rate": 7.458589064901407e-05, + "loss": 9.4003, + "step": 25800 + }, + { + "epoch": 1.91, + "learning_rate": 7.453566513646272e-05, + "loss": 9.6621, + "step": 25850 + }, + { + "epoch": 1.91, + "learning_rate": 7.448543962391137e-05, + "loss": 10.1086, + "step": 25900 + }, + { + "epoch": 1.91, + "learning_rate": 7.443521411136e-05, + "loss": 11.5655, + "step": 25950 + }, + { + "epoch": 1.92, + "learning_rate": 7.438498859880866e-05, + "loss": 8.9418, + "step": 26000 + }, + { + "epoch": 1.92, + "learning_rate": 7.433476308625729e-05, + "loss": 9.2415, + "step": 26050 + }, + { + "epoch": 1.93, + "learning_rate": 7.428453757370595e-05, + "loss": 9.4192, + "step": 26100 + }, + { + "epoch": 1.93, + "learning_rate": 7.423431206115458e-05, + "loss": 9.1755, + "step": 26150 + }, + { + "epoch": 1.93, + "learning_rate": 7.418408654860322e-05, + "loss": 9.6327, + "step": 26200 + }, + { + "epoch": 1.94, + "learning_rate": 7.413386103605188e-05, + "loss": 10.3333, + "step": 26250 + }, + { + "epoch": 1.94, + "learning_rate": 7.408363552350051e-05, + "loss": 10.298, + "step": 26300 + }, + { + "epoch": 1.94, + "learning_rate": 7.403341001094917e-05, + "loss": 10.7038, + "step": 26350 + }, + { + "epoch": 1.95, + "learning_rate": 7.39831844983978e-05, + "loss": 10.5099, + "step": 26400 + }, + { + "epoch": 1.95, + "learning_rate": 7.393295898584646e-05, + "loss": 9.8063, + "step": 26450 + }, + { + "epoch": 1.95, + "learning_rate": 7.38827334732951e-05, + "loss": 9.5784, + "step": 26500 + }, + { + "epoch": 1.96, + "learning_rate": 7.383250796074375e-05, + "loss": 10.1958, + "step": 26550 + }, + { + "epoch": 1.96, + "learning_rate": 7.378228244819239e-05, + "loss": 9.6869, + "step": 26600 + }, + { + "epoch": 1.97, + "learning_rate": 7.373205693564103e-05, + "loss": 10.3761, + "step": 26650 + }, + { + "epoch": 1.97, + "learning_rate": 7.368183142308968e-05, + "loss": 11.6806, + "step": 26700 + }, + { + "epoch": 1.97, + "learning_rate": 7.363160591053832e-05, + "loss": 10.3183, + "step": 26750 + }, + { + "epoch": 1.98, + "learning_rate": 7.358138039798697e-05, + "loss": 11.041, + "step": 26800 + }, + { + "epoch": 1.98, + "learning_rate": 7.353115488543561e-05, + "loss": 9.6997, + "step": 26850 + }, + { + "epoch": 1.98, + "learning_rate": 7.348092937288425e-05, + "loss": 9.6029, + "step": 26900 + }, + { + "epoch": 1.99, + "learning_rate": 7.34307038603329e-05, + "loss": 10.3322, + "step": 26950 + }, + { + "epoch": 1.99, + "learning_rate": 7.338047834778154e-05, + "loss": 9.9009, + "step": 27000 + }, + { + "epoch": 2.0, + "learning_rate": 7.333025283523019e-05, + "loss": 10.4815, + "step": 27050 + }, + { + "epoch": 2.0, + "learning_rate": 7.328002732267883e-05, + "loss": 11.7049, + "step": 27100 + }, + { + "epoch": 2.0, + "learning_rate": 7.322980181012748e-05, + "loss": 10.7831, + "step": 27150 + }, + { + "epoch": 2.01, + "learning_rate": 7.317957629757612e-05, + "loss": 8.735, + "step": 27200 + }, + { + "epoch": 2.01, + "learning_rate": 7.312935078502476e-05, + "loss": 9.4056, + "step": 27250 + }, + { + "epoch": 2.01, + "learning_rate": 7.307912527247342e-05, + "loss": 10.7689, + "step": 27300 + }, + { + "epoch": 2.02, + "learning_rate": 7.302889975992205e-05, + "loss": 9.5266, + "step": 27350 + }, + { + "epoch": 2.02, + "learning_rate": 7.29786742473707e-05, + "loss": 8.2467, + "step": 27400 + }, + { + "epoch": 2.02, + "learning_rate": 7.292844873481934e-05, + "loss": 8.6572, + "step": 27450 + }, + { + "epoch": 2.03, + "learning_rate": 7.287822322226798e-05, + "loss": 8.4693, + "step": 27500 + }, + { + "epoch": 2.03, + "learning_rate": 7.282799770971663e-05, + "loss": 10.4867, + "step": 27550 + }, + { + "epoch": 2.04, + "learning_rate": 7.277777219716527e-05, + "loss": 8.9364, + "step": 27600 + }, + { + "epoch": 2.04, + "learning_rate": 7.272754668461393e-05, + "loss": 10.0109, + "step": 27650 + }, + { + "epoch": 2.04, + "learning_rate": 7.267732117206256e-05, + "loss": 9.5535, + "step": 27700 + }, + { + "epoch": 2.05, + "learning_rate": 7.262709565951122e-05, + "loss": 9.3029, + "step": 27750 + }, + { + "epoch": 2.05, + "learning_rate": 7.257687014695985e-05, + "loss": 9.854, + "step": 27800 + }, + { + "epoch": 2.05, + "learning_rate": 7.25266446344085e-05, + "loss": 9.5327, + "step": 27850 + }, + { + "epoch": 2.06, + "learning_rate": 7.247641912185714e-05, + "loss": 9.8255, + "step": 27900 + }, + { + "epoch": 2.06, + "learning_rate": 7.242619360930578e-05, + "loss": 9.9737, + "step": 27950 + }, + { + "epoch": 2.07, + "learning_rate": 7.237596809675444e-05, + "loss": 9.0471, + "step": 28000 + }, + { + "epoch": 2.07, + "learning_rate": 7.232574258420307e-05, + "loss": 10.0566, + "step": 28050 + }, + { + "epoch": 2.07, + "learning_rate": 7.227551707165173e-05, + "loss": 9.4781, + "step": 28100 + }, + { + "epoch": 2.08, + "learning_rate": 7.222529155910036e-05, + "loss": 8.7599, + "step": 28150 + }, + { + "epoch": 2.08, + "learning_rate": 7.217506604654901e-05, + "loss": 8.7605, + "step": 28200 + }, + { + "epoch": 2.08, + "learning_rate": 7.212484053399764e-05, + "loss": 10.061, + "step": 28250 + }, + { + "epoch": 2.09, + "learning_rate": 7.20746150214463e-05, + "loss": 9.6124, + "step": 28300 + }, + { + "epoch": 2.09, + "learning_rate": 7.202438950889495e-05, + "loss": 10.4776, + "step": 28350 + }, + { + "epoch": 2.09, + "learning_rate": 7.197416399634359e-05, + "loss": 9.2169, + "step": 28400 + }, + { + "epoch": 2.1, + "learning_rate": 7.192393848379223e-05, + "loss": 9.3654, + "step": 28450 + }, + { + "epoch": 2.1, + "learning_rate": 7.187371297124086e-05, + "loss": 9.4445, + "step": 28500 + }, + { + "epoch": 2.11, + "learning_rate": 7.182348745868952e-05, + "loss": 8.3614, + "step": 28550 + }, + { + "epoch": 2.11, + "learning_rate": 7.177326194613815e-05, + "loss": 9.1661, + "step": 28600 + }, + { + "epoch": 2.11, + "learning_rate": 7.172303643358681e-05, + "loss": 9.4976, + "step": 28650 + }, + { + "epoch": 2.12, + "learning_rate": 7.167281092103545e-05, + "loss": 9.125, + "step": 28700 + }, + { + "epoch": 2.12, + "learning_rate": 7.16225854084841e-05, + "loss": 8.9051, + "step": 28750 + }, + { + "epoch": 2.12, + "learning_rate": 7.157235989593274e-05, + "loss": 8.9753, + "step": 28800 + }, + { + "epoch": 2.13, + "learning_rate": 7.152213438338139e-05, + "loss": 9.133, + "step": 28850 + }, + { + "epoch": 2.13, + "learning_rate": 7.147190887083003e-05, + "loss": 9.9677, + "step": 28900 + }, + { + "epoch": 2.14, + "learning_rate": 7.142168335827867e-05, + "loss": 8.725, + "step": 28950 + }, + { + "epoch": 2.14, + "learning_rate": 7.137145784572732e-05, + "loss": 8.831, + "step": 29000 + }, + { + "epoch": 2.14, + "learning_rate": 7.132123233317596e-05, + "loss": 7.8207, + "step": 29050 + }, + { + "epoch": 2.15, + "learning_rate": 7.127100682062461e-05, + "loss": 9.3707, + "step": 29100 + }, + { + "epoch": 2.15, + "learning_rate": 7.122078130807325e-05, + "loss": 10.4259, + "step": 29150 + }, + { + "epoch": 2.15, + "learning_rate": 7.11705557955219e-05, + "loss": 8.1836, + "step": 29200 + }, + { + "epoch": 2.16, + "learning_rate": 7.112033028297054e-05, + "loss": 9.0874, + "step": 29250 + }, + { + "epoch": 2.16, + "learning_rate": 7.107010477041918e-05, + "loss": 9.5957, + "step": 29300 + }, + { + "epoch": 2.16, + "learning_rate": 7.101987925786783e-05, + "loss": 8.7545, + "step": 29350 + }, + { + "epoch": 2.17, + "learning_rate": 7.096965374531647e-05, + "loss": 8.4478, + "step": 29400 + }, + { + "epoch": 2.17, + "learning_rate": 7.091942823276512e-05, + "loss": 8.601, + "step": 29450 + }, + { + "epoch": 2.18, + "learning_rate": 7.086920272021377e-05, + "loss": 9.6172, + "step": 29500 + }, + { + "epoch": 2.18, + "learning_rate": 7.08189772076624e-05, + "loss": 9.0805, + "step": 29550 + }, + { + "epoch": 2.18, + "learning_rate": 7.076875169511106e-05, + "loss": 9.6039, + "step": 29600 + }, + { + "epoch": 2.19, + "learning_rate": 7.071852618255969e-05, + "loss": 9.3622, + "step": 29650 + }, + { + "epoch": 2.19, + "learning_rate": 7.066830067000834e-05, + "loss": 8.8765, + "step": 29700 + }, + { + "epoch": 2.19, + "learning_rate": 7.061807515745699e-05, + "loss": 8.992, + "step": 29750 + }, + { + "epoch": 2.2, + "learning_rate": 7.056784964490562e-05, + "loss": 10.3564, + "step": 29800 + }, + { + "epoch": 2.2, + "learning_rate": 7.051762413235428e-05, + "loss": 8.8092, + "step": 29850 + }, + { + "epoch": 2.21, + "learning_rate": 7.046739861980291e-05, + "loss": 9.8373, + "step": 29900 + }, + { + "epoch": 2.21, + "learning_rate": 7.041717310725157e-05, + "loss": 8.004, + "step": 29950 + }, + { + "epoch": 2.21, + "learning_rate": 7.03669475947002e-05, + "loss": 9.4461, + "step": 30000 + }, + { + "epoch": 2.22, + "learning_rate": 7.031672208214886e-05, + "loss": 8.4964, + "step": 30050 + }, + { + "epoch": 2.22, + "learning_rate": 7.02664965695975e-05, + "loss": 10.3181, + "step": 30100 + }, + { + "epoch": 2.22, + "learning_rate": 7.021627105704615e-05, + "loss": 8.6637, + "step": 30150 + }, + { + "epoch": 2.23, + "learning_rate": 7.016604554449479e-05, + "loss": 10.1703, + "step": 30200 + }, + { + "epoch": 2.23, + "learning_rate": 7.011582003194342e-05, + "loss": 9.2846, + "step": 30250 + }, + { + "epoch": 2.24, + "learning_rate": 7.006559451939208e-05, + "loss": 8.5913, + "step": 30300 + }, + { + "epoch": 2.24, + "learning_rate": 7.001536900684071e-05, + "loss": 9.1308, + "step": 30350 + }, + { + "epoch": 2.24, + "learning_rate": 6.996514349428937e-05, + "loss": 11.2229, + "step": 30400 + }, + { + "epoch": 2.25, + "learning_rate": 6.991491798173801e-05, + "loss": 8.5923, + "step": 30450 + }, + { + "epoch": 2.25, + "learning_rate": 6.986469246918665e-05, + "loss": 9.9826, + "step": 30500 + }, + { + "epoch": 2.25, + "learning_rate": 6.98144669566353e-05, + "loss": 8.4765, + "step": 30550 + }, + { + "epoch": 2.26, + "learning_rate": 6.976424144408394e-05, + "loss": 8.7624, + "step": 30600 + }, + { + "epoch": 2.26, + "learning_rate": 6.971401593153259e-05, + "loss": 9.238, + "step": 30650 + }, + { + "epoch": 2.26, + "learning_rate": 6.966379041898123e-05, + "loss": 8.4976, + "step": 30700 + }, + { + "epoch": 2.27, + "learning_rate": 6.961356490642987e-05, + "loss": 9.1886, + "step": 30750 + }, + { + "epoch": 2.27, + "learning_rate": 6.956333939387852e-05, + "loss": 8.4443, + "step": 30800 + }, + { + "epoch": 2.28, + "learning_rate": 6.951311388132716e-05, + "loss": 8.3648, + "step": 30850 + }, + { + "epoch": 2.28, + "learning_rate": 6.94628883687758e-05, + "loss": 9.2509, + "step": 30900 + }, + { + "epoch": 2.28, + "learning_rate": 6.941266285622445e-05, + "loss": 8.3765, + "step": 30950 + }, + { + "epoch": 2.29, + "learning_rate": 6.93624373436731e-05, + "loss": 9.6616, + "step": 31000 + }, + { + "epoch": 2.29, + "learning_rate": 6.931221183112174e-05, + "loss": 9.658, + "step": 31050 + }, + { + "epoch": 2.29, + "learning_rate": 6.926198631857038e-05, + "loss": 8.7527, + "step": 31100 + }, + { + "epoch": 2.3, + "learning_rate": 6.921176080601903e-05, + "loss": 8.7148, + "step": 31150 + }, + { + "epoch": 2.3, + "learning_rate": 6.916153529346767e-05, + "loss": 8.5962, + "step": 31200 + }, + { + "epoch": 2.31, + "learning_rate": 6.911130978091633e-05, + "loss": 9.2625, + "step": 31250 + }, + { + "epoch": 2.31, + "learning_rate": 6.906108426836496e-05, + "loss": 8.8352, + "step": 31300 + }, + { + "epoch": 2.31, + "learning_rate": 6.90108587558136e-05, + "loss": 7.3991, + "step": 31350 + }, + { + "epoch": 2.32, + "learning_rate": 6.896063324326225e-05, + "loss": 9.9391, + "step": 31400 + }, + { + "epoch": 2.32, + "learning_rate": 6.891040773071089e-05, + "loss": 8.9575, + "step": 31450 + }, + { + "epoch": 2.32, + "learning_rate": 6.886018221815954e-05, + "loss": 7.9103, + "step": 31500 + }, + { + "epoch": 2.33, + "learning_rate": 6.880995670560818e-05, + "loss": 8.5276, + "step": 31550 + }, + { + "epoch": 2.33, + "learning_rate": 6.875973119305684e-05, + "loss": 8.5427, + "step": 31600 + }, + { + "epoch": 2.33, + "learning_rate": 6.870950568050547e-05, + "loss": 8.4672, + "step": 31650 + }, + { + "epoch": 2.34, + "learning_rate": 6.865928016795412e-05, + "loss": 8.9638, + "step": 31700 + }, + { + "epoch": 2.34, + "learning_rate": 6.860905465540276e-05, + "loss": 8.3136, + "step": 31750 + }, + { + "epoch": 2.35, + "learning_rate": 6.855882914285141e-05, + "loss": 8.8076, + "step": 31800 + }, + { + "epoch": 2.35, + "learning_rate": 6.850860363030004e-05, + "loss": 8.6041, + "step": 31850 + }, + { + "epoch": 2.35, + "learning_rate": 6.845837811774869e-05, + "loss": 9.1751, + "step": 31900 + }, + { + "epoch": 2.36, + "learning_rate": 6.840815260519735e-05, + "loss": 8.5955, + "step": 31950 + }, + { + "epoch": 2.36, + "learning_rate": 6.835792709264598e-05, + "loss": 9.0927, + "step": 32000 + }, + { + "epoch": 2.36, + "learning_rate": 6.830770158009463e-05, + "loss": 7.9647, + "step": 32050 + }, + { + "epoch": 2.37, + "learning_rate": 6.825747606754326e-05, + "loss": 10.2647, + "step": 32100 + }, + { + "epoch": 2.37, + "learning_rate": 6.820725055499192e-05, + "loss": 8.3442, + "step": 32150 + }, + { + "epoch": 2.38, + "learning_rate": 6.815702504244057e-05, + "loss": 9.2019, + "step": 32200 + }, + { + "epoch": 2.38, + "learning_rate": 6.810679952988921e-05, + "loss": 8.345, + "step": 32250 + }, + { + "epoch": 2.38, + "learning_rate": 6.805657401733785e-05, + "loss": 9.1835, + "step": 32300 + }, + { + "epoch": 2.39, + "learning_rate": 6.80063485047865e-05, + "loss": 9.1846, + "step": 32350 + }, + { + "epoch": 2.39, + "learning_rate": 6.795612299223514e-05, + "loss": 9.0015, + "step": 32400 + }, + { + "epoch": 2.39, + "learning_rate": 6.790589747968379e-05, + "loss": 8.2404, + "step": 32450 + }, + { + "epoch": 2.4, + "learning_rate": 6.785567196713243e-05, + "loss": 8.8715, + "step": 32500 + }, + { + "epoch": 2.4, + "learning_rate": 6.780544645458107e-05, + "loss": 8.817, + "step": 32550 + }, + { + "epoch": 2.4, + "learning_rate": 6.775522094202972e-05, + "loss": 9.2154, + "step": 32600 + }, + { + "epoch": 2.41, + "learning_rate": 6.770499542947836e-05, + "loss": 9.1914, + "step": 32650 + }, + { + "epoch": 2.41, + "learning_rate": 6.7654769916927e-05, + "loss": 9.2804, + "step": 32700 + }, + { + "epoch": 2.42, + "learning_rate": 6.760454440437565e-05, + "loss": 9.177, + "step": 32750 + }, + { + "epoch": 2.42, + "learning_rate": 6.75543188918243e-05, + "loss": 8.8259, + "step": 32800 + }, + { + "epoch": 2.42, + "learning_rate": 6.750409337927294e-05, + "loss": 8.6121, + "step": 32850 + }, + { + "epoch": 2.43, + "learning_rate": 6.745386786672158e-05, + "loss": 8.644, + "step": 32900 + }, + { + "epoch": 2.43, + "learning_rate": 6.740364235417023e-05, + "loss": 8.5743, + "step": 32950 + }, + { + "epoch": 2.43, + "learning_rate": 6.735341684161888e-05, + "loss": 8.7636, + "step": 33000 + }, + { + "epoch": 2.44, + "learning_rate": 6.730319132906751e-05, + "loss": 8.3064, + "step": 33050 + }, + { + "epoch": 2.44, + "learning_rate": 6.725296581651616e-05, + "loss": 8.8806, + "step": 33100 + }, + { + "epoch": 2.45, + "learning_rate": 6.72027403039648e-05, + "loss": 8.8212, + "step": 33150 + }, + { + "epoch": 2.45, + "learning_rate": 6.715251479141345e-05, + "loss": 9.5261, + "step": 33200 + }, + { + "epoch": 2.45, + "learning_rate": 6.710228927886209e-05, + "loss": 9.0764, + "step": 33250 + }, + { + "epoch": 2.46, + "learning_rate": 6.705206376631073e-05, + "loss": 7.399, + "step": 33300 + }, + { + "epoch": 2.46, + "learning_rate": 6.700183825375939e-05, + "loss": 9.4119, + "step": 33350 + }, + { + "epoch": 2.46, + "learning_rate": 6.695161274120802e-05, + "loss": 8.4576, + "step": 33400 + }, + { + "epoch": 2.47, + "learning_rate": 6.690138722865668e-05, + "loss": 8.024, + "step": 33450 + }, + { + "epoch": 2.47, + "learning_rate": 6.685116171610531e-05, + "loss": 9.1605, + "step": 33500 + }, + { + "epoch": 2.47, + "learning_rate": 6.680093620355397e-05, + "loss": 8.3661, + "step": 33550 + }, + { + "epoch": 2.48, + "learning_rate": 6.67507106910026e-05, + "loss": 8.4145, + "step": 33600 + }, + { + "epoch": 2.48, + "learning_rate": 6.670048517845124e-05, + "loss": 7.824, + "step": 33650 + }, + { + "epoch": 2.49, + "learning_rate": 6.66502596658999e-05, + "loss": 9.129, + "step": 33700 + }, + { + "epoch": 2.49, + "learning_rate": 6.660003415334853e-05, + "loss": 9.0876, + "step": 33750 + }, + { + "epoch": 2.49, + "learning_rate": 6.654980864079719e-05, + "loss": 8.6961, + "step": 33800 + }, + { + "epoch": 2.5, + "learning_rate": 6.649958312824582e-05, + "loss": 8.1584, + "step": 33850 + }, + { + "epoch": 2.5, + "learning_rate": 6.644935761569448e-05, + "loss": 8.6587, + "step": 33900 + }, + { + "epoch": 2.5, + "learning_rate": 6.639913210314311e-05, + "loss": 8.1059, + "step": 33950 + }, + { + "epoch": 2.51, + "learning_rate": 6.634890659059176e-05, + "loss": 9.2588, + "step": 34000 + }, + { + "epoch": 2.51, + "learning_rate": 6.629868107804041e-05, + "loss": 8.6443, + "step": 34050 + }, + { + "epoch": 2.52, + "learning_rate": 6.624845556548905e-05, + "loss": 8.8006, + "step": 34100 + }, + { + "epoch": 2.52, + "learning_rate": 6.61982300529377e-05, + "loss": 9.2288, + "step": 34150 + }, + { + "epoch": 2.52, + "learning_rate": 6.614800454038633e-05, + "loss": 9.0328, + "step": 34200 + }, + { + "epoch": 2.53, + "learning_rate": 6.609777902783499e-05, + "loss": 7.8269, + "step": 34250 + }, + { + "epoch": 2.53, + "learning_rate": 6.604755351528362e-05, + "loss": 8.5883, + "step": 34300 + }, + { + "epoch": 2.53, + "learning_rate": 6.599732800273227e-05, + "loss": 9.9388, + "step": 34350 + }, + { + "epoch": 2.54, + "learning_rate": 6.594710249018092e-05, + "loss": 8.6776, + "step": 34400 + }, + { + "epoch": 2.54, + "learning_rate": 6.589687697762956e-05, + "loss": 7.2287, + "step": 34450 + }, + { + "epoch": 2.54, + "learning_rate": 6.58466514650782e-05, + "loss": 7.7042, + "step": 34500 + }, + { + "epoch": 2.55, + "learning_rate": 6.579642595252685e-05, + "loss": 9.0004, + "step": 34550 + }, + { + "epoch": 2.55, + "learning_rate": 6.57462004399755e-05, + "loss": 9.3279, + "step": 34600 + }, + { + "epoch": 2.56, + "learning_rate": 6.569597492742414e-05, + "loss": 8.9144, + "step": 34650 + }, + { + "epoch": 2.56, + "learning_rate": 6.564574941487278e-05, + "loss": 9.3319, + "step": 34700 + }, + { + "epoch": 2.56, + "learning_rate": 6.559552390232143e-05, + "loss": 9.4986, + "step": 34750 + }, + { + "epoch": 2.57, + "learning_rate": 6.554529838977007e-05, + "loss": 9.002, + "step": 34800 + }, + { + "epoch": 2.57, + "learning_rate": 6.549507287721871e-05, + "loss": 8.6061, + "step": 34850 + }, + { + "epoch": 2.57, + "learning_rate": 6.544484736466736e-05, + "loss": 7.4598, + "step": 34900 + }, + { + "epoch": 2.58, + "learning_rate": 6.5394621852116e-05, + "loss": 8.6618, + "step": 34950 + }, + { + "epoch": 2.58, + "learning_rate": 6.534439633956465e-05, + "loss": 9.0226, + "step": 35000 + }, + { + "epoch": 2.59, + "learning_rate": 6.529417082701329e-05, + "loss": 7.9738, + "step": 35050 + }, + { + "epoch": 2.59, + "learning_rate": 6.524394531446193e-05, + "loss": 8.7871, + "step": 35100 + }, + { + "epoch": 2.59, + "learning_rate": 6.519371980191058e-05, + "loss": 8.8744, + "step": 35150 + }, + { + "epoch": 2.6, + "learning_rate": 6.514349428935924e-05, + "loss": 8.3771, + "step": 35200 + }, + { + "epoch": 2.6, + "learning_rate": 6.509326877680787e-05, + "loss": 8.058, + "step": 35250 + }, + { + "epoch": 2.6, + "learning_rate": 6.504304326425652e-05, + "loss": 8.2627, + "step": 35300 + }, + { + "epoch": 2.61, + "learning_rate": 6.499281775170515e-05, + "loss": 8.1643, + "step": 35350 + }, + { + "epoch": 2.61, + "learning_rate": 6.49425922391538e-05, + "loss": 7.888, + "step": 35400 + }, + { + "epoch": 2.61, + "learning_rate": 6.489236672660246e-05, + "loss": 7.9235, + "step": 35450 + }, + { + "epoch": 2.62, + "learning_rate": 6.484214121405109e-05, + "loss": 8.1139, + "step": 35500 + }, + { + "epoch": 2.62, + "learning_rate": 6.479191570149974e-05, + "loss": 8.7467, + "step": 35550 + }, + { + "epoch": 2.63, + "learning_rate": 6.474169018894837e-05, + "loss": 7.4693, + "step": 35600 + }, + { + "epoch": 2.63, + "learning_rate": 6.469146467639703e-05, + "loss": 8.5167, + "step": 35650 + }, + { + "epoch": 2.63, + "learning_rate": 6.464123916384566e-05, + "loss": 9.5274, + "step": 35700 + }, + { + "epoch": 2.64, + "learning_rate": 6.459101365129432e-05, + "loss": 8.9735, + "step": 35750 + }, + { + "epoch": 2.64, + "learning_rate": 6.454078813874296e-05, + "loss": 8.1756, + "step": 35800 + }, + { + "epoch": 2.64, + "learning_rate": 6.449056262619161e-05, + "loss": 7.8084, + "step": 35850 + }, + { + "epoch": 2.65, + "learning_rate": 6.444033711364025e-05, + "loss": 8.2671, + "step": 35900 + }, + { + "epoch": 2.65, + "learning_rate": 6.439011160108888e-05, + "loss": 8.6628, + "step": 35950 + }, + { + "epoch": 2.66, + "learning_rate": 6.433988608853754e-05, + "loss": 9.8654, + "step": 36000 + }, + { + "epoch": 2.66, + "learning_rate": 6.428966057598617e-05, + "loss": 9.104, + "step": 36050 + }, + { + "epoch": 2.66, + "learning_rate": 6.423943506343483e-05, + "loss": 9.4156, + "step": 36100 + }, + { + "epoch": 2.67, + "learning_rate": 6.418920955088347e-05, + "loss": 8.9803, + "step": 36150 + }, + { + "epoch": 2.67, + "learning_rate": 6.413898403833212e-05, + "loss": 8.9584, + "step": 36200 + }, + { + "epoch": 2.67, + "learning_rate": 6.408875852578076e-05, + "loss": 7.3683, + "step": 36250 + }, + { + "epoch": 2.68, + "learning_rate": 6.40385330132294e-05, + "loss": 8.3277, + "step": 36300 + }, + { + "epoch": 2.68, + "learning_rate": 6.398830750067805e-05, + "loss": 9.3236, + "step": 36350 + }, + { + "epoch": 2.68, + "learning_rate": 6.393808198812669e-05, + "loss": 8.6918, + "step": 36400 + }, + { + "epoch": 2.69, + "learning_rate": 6.388785647557534e-05, + "loss": 8.9422, + "step": 36450 + }, + { + "epoch": 2.69, + "learning_rate": 6.383763096302398e-05, + "loss": 8.8438, + "step": 36500 + }, + { + "epoch": 2.7, + "learning_rate": 6.378740545047263e-05, + "loss": 8.7752, + "step": 36550 + }, + { + "epoch": 2.7, + "learning_rate": 6.373717993792127e-05, + "loss": 8.6483, + "step": 36600 + }, + { + "epoch": 2.7, + "learning_rate": 6.368695442536991e-05, + "loss": 8.5753, + "step": 36650 + }, + { + "epoch": 2.71, + "learning_rate": 6.363672891281856e-05, + "loss": 8.1893, + "step": 36700 + }, + { + "epoch": 2.71, + "learning_rate": 6.35865034002672e-05, + "loss": 8.189, + "step": 36750 + }, + { + "epoch": 2.71, + "learning_rate": 6.353627788771585e-05, + "loss": 8.2979, + "step": 36800 + }, + { + "epoch": 2.72, + "learning_rate": 6.348605237516449e-05, + "loss": 8.3904, + "step": 36850 + }, + { + "epoch": 2.72, + "learning_rate": 6.343582686261313e-05, + "loss": 9.3274, + "step": 36900 + }, + { + "epoch": 2.73, + "learning_rate": 6.338560135006179e-05, + "loss": 7.7663, + "step": 36950 + }, + { + "epoch": 2.73, + "learning_rate": 6.333537583751042e-05, + "loss": 8.2105, + "step": 37000 + }, + { + "epoch": 2.73, + "learning_rate": 6.328515032495907e-05, + "loss": 8.035, + "step": 37050 + }, + { + "epoch": 2.74, + "learning_rate": 6.323492481240771e-05, + "loss": 9.5032, + "step": 37100 + }, + { + "epoch": 2.74, + "learning_rate": 6.318469929985635e-05, + "loss": 8.3856, + "step": 37150 + }, + { + "epoch": 2.74, + "learning_rate": 6.3134473787305e-05, + "loss": 8.9941, + "step": 37200 + }, + { + "epoch": 2.75, + "learning_rate": 6.308424827475364e-05, + "loss": 8.3987, + "step": 37250 + }, + { + "epoch": 2.75, + "learning_rate": 6.30340227622023e-05, + "loss": 9.1753, + "step": 37300 + }, + { + "epoch": 2.76, + "learning_rate": 6.298379724965093e-05, + "loss": 7.9557, + "step": 37350 + }, + { + "epoch": 2.76, + "learning_rate": 6.293357173709959e-05, + "loss": 7.725, + "step": 37400 + }, + { + "epoch": 2.76, + "learning_rate": 6.288334622454822e-05, + "loss": 8.0807, + "step": 37450 + }, + { + "epoch": 2.77, + "learning_rate": 6.283312071199688e-05, + "loss": 8.6492, + "step": 37500 + }, + { + "epoch": 2.77, + "learning_rate": 6.27828951994455e-05, + "loss": 8.4716, + "step": 37550 + }, + { + "epoch": 2.77, + "learning_rate": 6.273266968689416e-05, + "loss": 8.7209, + "step": 37600 + }, + { + "epoch": 2.78, + "learning_rate": 6.268244417434281e-05, + "loss": 8.4902, + "step": 37650 + }, + { + "epoch": 2.78, + "learning_rate": 6.263221866179144e-05, + "loss": 7.9589, + "step": 37700 + }, + { + "epoch": 2.78, + "learning_rate": 6.25819931492401e-05, + "loss": 9.3285, + "step": 37750 + }, + { + "epoch": 2.79, + "learning_rate": 6.253176763668873e-05, + "loss": 9.0506, + "step": 37800 + }, + { + "epoch": 2.79, + "learning_rate": 6.248154212413738e-05, + "loss": 7.9992, + "step": 37850 + }, + { + "epoch": 2.8, + "learning_rate": 6.243131661158603e-05, + "loss": 8.029, + "step": 37900 + }, + { + "epoch": 2.8, + "learning_rate": 6.238109109903467e-05, + "loss": 8.6681, + "step": 37950 + }, + { + "epoch": 2.8, + "learning_rate": 6.233086558648332e-05, + "loss": 8.5906, + "step": 38000 + }, + { + "epoch": 2.81, + "learning_rate": 6.228064007393196e-05, + "loss": 10.4719, + "step": 38050 + }, + { + "epoch": 2.81, + "learning_rate": 6.22304145613806e-05, + "loss": 8.2759, + "step": 38100 + }, + { + "epoch": 2.81, + "learning_rate": 6.218018904882925e-05, + "loss": 8.2633, + "step": 38150 + }, + { + "epoch": 2.82, + "learning_rate": 6.212996353627789e-05, + "loss": 8.5218, + "step": 38200 + }, + { + "epoch": 2.82, + "learning_rate": 6.207973802372654e-05, + "loss": 8.0609, + "step": 38250 + }, + { + "epoch": 2.83, + "learning_rate": 6.202951251117518e-05, + "loss": 9.3672, + "step": 38300 + }, + { + "epoch": 2.83, + "learning_rate": 6.197928699862382e-05, + "loss": 10.1768, + "step": 38350 + }, + { + "epoch": 2.83, + "learning_rate": 6.192906148607247e-05, + "loss": 9.4389, + "step": 38400 + }, + { + "epoch": 2.84, + "learning_rate": 6.187883597352111e-05, + "loss": 7.6737, + "step": 38450 + }, + { + "epoch": 2.84, + "learning_rate": 6.182861046096976e-05, + "loss": 9.2337, + "step": 38500 + }, + { + "epoch": 2.84, + "learning_rate": 6.17783849484184e-05, + "loss": 8.7846, + "step": 38550 + }, + { + "epoch": 2.85, + "learning_rate": 6.172815943586704e-05, + "loss": 7.8709, + "step": 38600 + }, + { + "epoch": 2.85, + "learning_rate": 6.167793392331569e-05, + "loss": 8.8688, + "step": 38650 + }, + { + "epoch": 2.85, + "learning_rate": 6.162770841076435e-05, + "loss": 8.4087, + "step": 38700 + }, + { + "epoch": 2.86, + "learning_rate": 6.157748289821298e-05, + "loss": 7.7129, + "step": 38750 + }, + { + "epoch": 2.86, + "learning_rate": 6.152725738566162e-05, + "loss": 9.3196, + "step": 38800 + }, + { + "epoch": 2.87, + "learning_rate": 6.147703187311027e-05, + "loss": 8.8242, + "step": 38850 + }, + { + "epoch": 2.87, + "learning_rate": 6.142680636055891e-05, + "loss": 8.4237, + "step": 38900 + }, + { + "epoch": 2.87, + "learning_rate": 6.137658084800755e-05, + "loss": 8.9383, + "step": 38950 + }, + { + "epoch": 2.88, + "learning_rate": 6.13263553354562e-05, + "loss": 8.3749, + "step": 39000 + }, + { + "epoch": 2.88, + "learning_rate": 6.127612982290485e-05, + "loss": 8.8894, + "step": 39050 + }, + { + "epoch": 2.88, + "learning_rate": 6.122590431035349e-05, + "loss": 8.2975, + "step": 39100 + }, + { + "epoch": 2.89, + "learning_rate": 6.117567879780214e-05, + "loss": 8.0517, + "step": 39150 + }, + { + "epoch": 2.89, + "learning_rate": 6.112545328525077e-05, + "loss": 8.0154, + "step": 39200 + }, + { + "epoch": 2.9, + "learning_rate": 6.107522777269943e-05, + "loss": 8.4887, + "step": 39250 + }, + { + "epoch": 2.9, + "learning_rate": 6.102500226014807e-05, + "loss": 8.7064, + "step": 39300 + }, + { + "epoch": 2.9, + "learning_rate": 6.0974776747596706e-05, + "loss": 9.7375, + "step": 39350 + }, + { + "epoch": 2.91, + "learning_rate": 6.0924551235045357e-05, + "loss": 8.8614, + "step": 39400 + }, + { + "epoch": 2.91, + "learning_rate": 6.0874325722493994e-05, + "loss": 8.302, + "step": 39450 + }, + { + "epoch": 2.91, + "learning_rate": 6.0824100209942645e-05, + "loss": 7.8469, + "step": 39500 + }, + { + "epoch": 2.92, + "learning_rate": 6.077387469739129e-05, + "loss": 9.0706, + "step": 39550 + }, + { + "epoch": 2.92, + "learning_rate": 6.072364918483994e-05, + "loss": 9.1398, + "step": 39600 + }, + { + "epoch": 2.92, + "learning_rate": 6.067342367228858e-05, + "loss": 8.1838, + "step": 39650 + }, + { + "epoch": 2.93, + "learning_rate": 6.062319815973723e-05, + "loss": 9.2303, + "step": 39700 + }, + { + "epoch": 2.93, + "learning_rate": 6.0572972647185865e-05, + "loss": 8.3715, + "step": 39750 + }, + { + "epoch": 2.94, + "learning_rate": 6.0522747134634516e-05, + "loss": 8.409, + "step": 39800 + }, + { + "epoch": 2.94, + "learning_rate": 6.047252162208315e-05, + "loss": 8.6441, + "step": 39850 + }, + { + "epoch": 2.94, + "learning_rate": 6.04222961095318e-05, + "loss": 9.0975, + "step": 39900 + }, + { + "epoch": 2.95, + "learning_rate": 6.037207059698045e-05, + "loss": 8.0691, + "step": 39950 + }, + { + "epoch": 2.95, + "learning_rate": 6.0321845084429085e-05, + "loss": 8.6646, + "step": 40000 + }, + { + "epoch": 2.95, + "eval_loss": 8.163222312927246, + "eval_runtime": 957.6189, + "eval_samples_per_second": 13.678, + "eval_steps_per_second": 3.42, + "eval_wer": 0.22493805384066187, + "step": 40000 + }, + { + "epoch": 2.95, + "learning_rate": 6.0271619571877736e-05, + "loss": 8.4278, + "step": 40050 + }, + { + "epoch": 2.96, + "learning_rate": 6.022139405932637e-05, + "loss": 8.1656, + "step": 40100 + }, + { + "epoch": 2.96, + "learning_rate": 6.0171168546775024e-05, + "loss": 7.7975, + "step": 40150 + }, + { + "epoch": 2.97, + "learning_rate": 6.012094303422366e-05, + "loss": 7.5465, + "step": 40200 + }, + { + "epoch": 2.97, + "learning_rate": 6.007071752167231e-05, + "loss": 8.3986, + "step": 40250 + }, + { + "epoch": 2.97, + "learning_rate": 6.0020492009120956e-05, + "loss": 8.3762, + "step": 40300 + }, + { + "epoch": 2.98, + "learning_rate": 5.997026649656961e-05, + "loss": 8.6175, + "step": 40350 + }, + { + "epoch": 2.98, + "learning_rate": 5.9920040984018244e-05, + "loss": 8.5622, + "step": 40400 + }, + { + "epoch": 2.98, + "learning_rate": 5.9869815471466895e-05, + "loss": 8.1824, + "step": 40450 + }, + { + "epoch": 2.99, + "learning_rate": 5.981958995891553e-05, + "loss": 7.2886, + "step": 40500 + }, + { + "epoch": 2.99, + "learning_rate": 5.9769364446364177e-05, + "loss": 8.3469, + "step": 40550 + }, + { + "epoch": 2.99, + "learning_rate": 5.971913893381282e-05, + "loss": 8.6257, + "step": 40600 + }, + { + "epoch": 3.0, + "learning_rate": 5.9668913421261465e-05, + "loss": 7.7071, + "step": 40650 + }, + { + "epoch": 3.0, + "learning_rate": 5.9618687908710116e-05, + "loss": 7.8413, + "step": 40700 + }, + { + "epoch": 3.01, + "learning_rate": 5.956846239615875e-05, + "loss": 7.6704, + "step": 40750 + }, + { + "epoch": 3.01, + "learning_rate": 5.9518236883607404e-05, + "loss": 7.3902, + "step": 40800 + }, + { + "epoch": 3.01, + "learning_rate": 5.946801137105604e-05, + "loss": 8.3296, + "step": 40850 + }, + { + "epoch": 3.02, + "learning_rate": 5.941778585850469e-05, + "loss": 7.0884, + "step": 40900 + }, + { + "epoch": 3.02, + "learning_rate": 5.9367560345953336e-05, + "loss": 7.043, + "step": 40950 + }, + { + "epoch": 3.02, + "learning_rate": 5.931733483340198e-05, + "loss": 7.5367, + "step": 41000 + }, + { + "epoch": 3.03, + "learning_rate": 5.9267109320850624e-05, + "loss": 8.3064, + "step": 41050 + }, + { + "epoch": 3.03, + "learning_rate": 5.921688380829926e-05, + "loss": 7.6769, + "step": 41100 + }, + { + "epoch": 3.04, + "learning_rate": 5.916665829574791e-05, + "loss": 9.067, + "step": 41150 + }, + { + "epoch": 3.04, + "learning_rate": 5.911643278319655e-05, + "loss": 8.3565, + "step": 41200 + }, + { + "epoch": 3.04, + "learning_rate": 5.90662072706452e-05, + "loss": 7.8335, + "step": 41250 + }, + { + "epoch": 3.05, + "learning_rate": 5.9015981758093844e-05, + "loss": 7.9617, + "step": 41300 + }, + { + "epoch": 3.05, + "learning_rate": 5.8965756245542495e-05, + "loss": 8.6728, + "step": 41350 + }, + { + "epoch": 3.05, + "learning_rate": 5.891553073299113e-05, + "loss": 7.9142, + "step": 41400 + }, + { + "epoch": 3.06, + "learning_rate": 5.886530522043978e-05, + "loss": 7.7702, + "step": 41450 + }, + { + "epoch": 3.06, + "learning_rate": 5.881507970788842e-05, + "loss": 8.2997, + "step": 41500 + }, + { + "epoch": 3.06, + "learning_rate": 5.876485419533707e-05, + "loss": 8.1519, + "step": 41550 + }, + { + "epoch": 3.07, + "learning_rate": 5.871462868278571e-05, + "loss": 7.3762, + "step": 41600 + }, + { + "epoch": 3.07, + "learning_rate": 5.866440317023435e-05, + "loss": 7.5129, + "step": 41650 + }, + { + "epoch": 3.08, + "learning_rate": 5.8614177657683e-05, + "loss": 8.2537, + "step": 41700 + }, + { + "epoch": 3.08, + "learning_rate": 5.856395214513164e-05, + "loss": 8.4148, + "step": 41750 + }, + { + "epoch": 3.08, + "learning_rate": 5.851372663258029e-05, + "loss": 7.1737, + "step": 41800 + }, + { + "epoch": 3.09, + "learning_rate": 5.846350112002893e-05, + "loss": 7.2628, + "step": 41850 + }, + { + "epoch": 3.09, + "learning_rate": 5.841327560747758e-05, + "loss": 7.2933, + "step": 41900 + }, + { + "epoch": 3.09, + "learning_rate": 5.836305009492622e-05, + "loss": 7.7675, + "step": 41950 + }, + { + "epoch": 3.1, + "learning_rate": 5.831282458237487e-05, + "loss": 8.2344, + "step": 42000 + }, + { + "epoch": 3.1, + "learning_rate": 5.826259906982351e-05, + "loss": 7.1329, + "step": 42050 + }, + { + "epoch": 3.11, + "learning_rate": 5.821237355727216e-05, + "loss": 7.3924, + "step": 42100 + }, + { + "epoch": 3.11, + "learning_rate": 5.81621480447208e-05, + "loss": 6.6189, + "step": 42150 + }, + { + "epoch": 3.11, + "learning_rate": 5.811192253216944e-05, + "loss": 7.3457, + "step": 42200 + }, + { + "epoch": 3.12, + "learning_rate": 5.806169701961809e-05, + "loss": 8.9924, + "step": 42250 + }, + { + "epoch": 3.12, + "learning_rate": 5.8011471507066725e-05, + "loss": 7.6315, + "step": 42300 + }, + { + "epoch": 3.12, + "learning_rate": 5.7961245994515376e-05, + "loss": 8.4726, + "step": 42350 + }, + { + "epoch": 3.13, + "learning_rate": 5.791102048196402e-05, + "loss": 7.1755, + "step": 42400 + }, + { + "epoch": 3.13, + "learning_rate": 5.786079496941267e-05, + "loss": 7.5716, + "step": 42450 + }, + { + "epoch": 3.13, + "learning_rate": 5.781056945686131e-05, + "loss": 7.938, + "step": 42500 + }, + { + "epoch": 3.14, + "learning_rate": 5.776034394430996e-05, + "loss": 7.3833, + "step": 42550 + }, + { + "epoch": 3.14, + "learning_rate": 5.7710118431758596e-05, + "loss": 6.4276, + "step": 42600 + }, + { + "epoch": 3.15, + "learning_rate": 5.765989291920725e-05, + "loss": 6.8907, + "step": 42650 + }, + { + "epoch": 3.15, + "learning_rate": 5.7609667406655884e-05, + "loss": 7.7592, + "step": 42700 + }, + { + "epoch": 3.15, + "learning_rate": 5.755944189410453e-05, + "loss": 7.4997, + "step": 42750 + }, + { + "epoch": 3.16, + "learning_rate": 5.750921638155318e-05, + "loss": 7.2821, + "step": 42800 + }, + { + "epoch": 3.16, + "learning_rate": 5.745899086900182e-05, + "loss": 7.4861, + "step": 42850 + }, + { + "epoch": 3.16, + "learning_rate": 5.740876535645047e-05, + "loss": 7.9266, + "step": 42900 + }, + { + "epoch": 3.17, + "learning_rate": 5.7358539843899105e-05, + "loss": 7.6244, + "step": 42950 + }, + { + "epoch": 3.17, + "learning_rate": 5.7308314331347756e-05, + "loss": 7.382, + "step": 43000 + }, + { + "epoch": 3.18, + "learning_rate": 5.725808881879639e-05, + "loss": 8.1925, + "step": 43050 + }, + { + "epoch": 3.18, + "learning_rate": 5.7207863306245044e-05, + "loss": 8.3185, + "step": 43100 + }, + { + "epoch": 3.18, + "learning_rate": 5.715763779369369e-05, + "loss": 7.091, + "step": 43150 + }, + { + "epoch": 3.19, + "learning_rate": 5.710741228114234e-05, + "loss": 7.8352, + "step": 43200 + }, + { + "epoch": 3.19, + "learning_rate": 5.7057186768590976e-05, + "loss": 6.6085, + "step": 43250 + }, + { + "epoch": 3.19, + "learning_rate": 5.700696125603963e-05, + "loss": 7.8052, + "step": 43300 + }, + { + "epoch": 3.2, + "learning_rate": 5.6956735743488264e-05, + "loss": 8.1999, + "step": 43350 + }, + { + "epoch": 3.2, + "learning_rate": 5.690651023093691e-05, + "loss": 7.2801, + "step": 43400 + }, + { + "epoch": 3.2, + "learning_rate": 5.685628471838555e-05, + "loss": 7.6289, + "step": 43450 + }, + { + "epoch": 3.21, + "learning_rate": 5.6806059205834196e-05, + "loss": 6.8215, + "step": 43500 + }, + { + "epoch": 3.21, + "learning_rate": 5.675583369328285e-05, + "loss": 7.1678, + "step": 43550 + }, + { + "epoch": 3.22, + "learning_rate": 5.6705608180731484e-05, + "loss": 7.6612, + "step": 43600 + }, + { + "epoch": 3.22, + "learning_rate": 5.6655382668180135e-05, + "loss": 7.8899, + "step": 43650 + }, + { + "epoch": 3.22, + "learning_rate": 5.660515715562877e-05, + "loss": 7.8546, + "step": 43700 + }, + { + "epoch": 3.23, + "learning_rate": 5.655493164307742e-05, + "loss": 7.319, + "step": 43750 + }, + { + "epoch": 3.23, + "learning_rate": 5.650470613052607e-05, + "loss": 7.3317, + "step": 43800 + }, + { + "epoch": 3.23, + "learning_rate": 5.645448061797471e-05, + "loss": 7.8875, + "step": 43850 + }, + { + "epoch": 3.24, + "learning_rate": 5.6404255105423355e-05, + "loss": 7.8145, + "step": 43900 + }, + { + "epoch": 3.24, + "learning_rate": 5.635402959287199e-05, + "loss": 7.0667, + "step": 43950 + }, + { + "epoch": 3.25, + "learning_rate": 5.6303804080320643e-05, + "loss": 7.7603, + "step": 44000 + }, + { + "epoch": 3.25, + "learning_rate": 5.625357856776928e-05, + "loss": 7.6111, + "step": 44050 + }, + { + "epoch": 3.25, + "learning_rate": 5.620335305521793e-05, + "loss": 7.9858, + "step": 44100 + }, + { + "epoch": 3.26, + "learning_rate": 5.6153127542666576e-05, + "loss": 8.9896, + "step": 44150 + }, + { + "epoch": 3.26, + "learning_rate": 5.6102902030115226e-05, + "loss": 8.4081, + "step": 44200 + }, + { + "epoch": 3.26, + "learning_rate": 5.6052676517563864e-05, + "loss": 7.4748, + "step": 44250 + }, + { + "epoch": 3.27, + "learning_rate": 5.6002451005012515e-05, + "loss": 8.2133, + "step": 44300 + }, + { + "epoch": 3.27, + "learning_rate": 5.595222549246115e-05, + "loss": 7.3073, + "step": 44350 + }, + { + "epoch": 3.28, + "learning_rate": 5.59019999799098e-05, + "loss": 7.9638, + "step": 44400 + }, + { + "epoch": 3.28, + "learning_rate": 5.585177446735844e-05, + "loss": 7.9653, + "step": 44450 + }, + { + "epoch": 3.28, + "learning_rate": 5.5801548954807084e-05, + "loss": 7.8583, + "step": 44500 + }, + { + "epoch": 3.29, + "learning_rate": 5.5751323442255735e-05, + "loss": 8.0561, + "step": 44550 + }, + { + "epoch": 3.29, + "learning_rate": 5.570109792970437e-05, + "loss": 8.1276, + "step": 44600 + }, + { + "epoch": 3.29, + "learning_rate": 5.565087241715302e-05, + "loss": 7.7357, + "step": 44650 + }, + { + "epoch": 3.3, + "learning_rate": 5.560064690460166e-05, + "loss": 7.7529, + "step": 44700 + }, + { + "epoch": 3.3, + "learning_rate": 5.555042139205031e-05, + "loss": 7.2583, + "step": 44750 + }, + { + "epoch": 3.3, + "learning_rate": 5.550019587949895e-05, + "loss": 6.4675, + "step": 44800 + }, + { + "epoch": 3.31, + "learning_rate": 5.54499703669476e-05, + "loss": 7.3658, + "step": 44850 + }, + { + "epoch": 3.31, + "learning_rate": 5.539974485439624e-05, + "loss": 8.278, + "step": 44900 + }, + { + "epoch": 3.32, + "learning_rate": 5.5349519341844894e-05, + "loss": 7.3867, + "step": 44950 + }, + { + "epoch": 3.32, + "learning_rate": 5.529929382929353e-05, + "loss": 7.4187, + "step": 45000 + }, + { + "epoch": 3.32, + "learning_rate": 5.524906831674217e-05, + "loss": 7.5281, + "step": 45050 + }, + { + "epoch": 3.33, + "learning_rate": 5.519884280419082e-05, + "loss": 7.8815, + "step": 45100 + }, + { + "epoch": 3.33, + "learning_rate": 5.514861729163946e-05, + "loss": 7.2487, + "step": 45150 + }, + { + "epoch": 3.33, + "learning_rate": 5.509839177908811e-05, + "loss": 8.3441, + "step": 45200 + }, + { + "epoch": 3.34, + "learning_rate": 5.504816626653675e-05, + "loss": 7.4892, + "step": 45250 + }, + { + "epoch": 3.34, + "learning_rate": 5.49979407539854e-05, + "loss": 7.7789, + "step": 45300 + }, + { + "epoch": 3.35, + "learning_rate": 5.494771524143404e-05, + "loss": 7.3951, + "step": 45350 + }, + { + "epoch": 3.35, + "learning_rate": 5.489748972888269e-05, + "loss": 7.8756, + "step": 45400 + }, + { + "epoch": 3.35, + "learning_rate": 5.484726421633133e-05, + "loss": 7.9274, + "step": 45450 + }, + { + "epoch": 3.36, + "learning_rate": 5.479703870377998e-05, + "loss": 8.1525, + "step": 45500 + }, + { + "epoch": 3.36, + "learning_rate": 5.4746813191228616e-05, + "loss": 7.5597, + "step": 45550 + }, + { + "epoch": 3.36, + "learning_rate": 5.469658767867726e-05, + "loss": 7.8939, + "step": 45600 + }, + { + "epoch": 3.37, + "learning_rate": 5.464636216612591e-05, + "loss": 6.1451, + "step": 45650 + }, + { + "epoch": 3.37, + "learning_rate": 5.459613665357455e-05, + "loss": 7.224, + "step": 45700 + }, + { + "epoch": 3.37, + "learning_rate": 5.45459111410232e-05, + "loss": 7.2489, + "step": 45750 + }, + { + "epoch": 3.38, + "learning_rate": 5.4495685628471836e-05, + "loss": 7.4162, + "step": 45800 + }, + { + "epoch": 3.38, + "learning_rate": 5.444546011592049e-05, + "loss": 6.8503, + "step": 45850 + }, + { + "epoch": 3.39, + "learning_rate": 5.4395234603369124e-05, + "loss": 6.7087, + "step": 45900 + }, + { + "epoch": 3.39, + "learning_rate": 5.4345009090817775e-05, + "loss": 6.9697, + "step": 45950 + }, + { + "epoch": 3.39, + "learning_rate": 5.429478357826642e-05, + "loss": 7.8369, + "step": 46000 + }, + { + "epoch": 3.4, + "learning_rate": 5.424455806571507e-05, + "loss": 7.7567, + "step": 46050 + }, + { + "epoch": 3.4, + "learning_rate": 5.419433255316371e-05, + "loss": 6.6241, + "step": 46100 + }, + { + "epoch": 3.4, + "learning_rate": 5.414410704061236e-05, + "loss": 7.5218, + "step": 46150 + }, + { + "epoch": 3.41, + "learning_rate": 5.4093881528060995e-05, + "loss": 7.2338, + "step": 46200 + }, + { + "epoch": 3.41, + "learning_rate": 5.404365601550964e-05, + "loss": 7.0707, + "step": 46250 + }, + { + "epoch": 3.42, + "learning_rate": 5.3993430502958283e-05, + "loss": 7.6922, + "step": 46300 + }, + { + "epoch": 3.42, + "learning_rate": 5.394320499040693e-05, + "loss": 8.6056, + "step": 46350 + }, + { + "epoch": 3.42, + "learning_rate": 5.389297947785558e-05, + "loss": 7.4641, + "step": 46400 + }, + { + "epoch": 3.43, + "learning_rate": 5.3842753965304216e-05, + "loss": 7.1716, + "step": 46450 + }, + { + "epoch": 3.43, + "learning_rate": 5.3792528452752866e-05, + "loss": 7.6382, + "step": 46500 + }, + { + "epoch": 3.43, + "learning_rate": 5.3742302940201504e-05, + "loss": 7.0739, + "step": 46550 + }, + { + "epoch": 3.44, + "learning_rate": 5.3692077427650155e-05, + "loss": 7.8667, + "step": 46600 + }, + { + "epoch": 3.44, + "learning_rate": 5.36418519150988e-05, + "loss": 7.559, + "step": 46650 + }, + { + "epoch": 3.44, + "learning_rate": 5.359162640254744e-05, + "loss": 7.6078, + "step": 46700 + }, + { + "epoch": 3.45, + "learning_rate": 5.354140088999609e-05, + "loss": 7.7994, + "step": 46750 + }, + { + "epoch": 3.45, + "learning_rate": 5.3491175377444724e-05, + "loss": 7.0418, + "step": 46800 + }, + { + "epoch": 3.46, + "learning_rate": 5.3440949864893375e-05, + "loss": 7.3261, + "step": 46850 + }, + { + "epoch": 3.46, + "learning_rate": 5.339072435234201e-05, + "loss": 7.9914, + "step": 46900 + }, + { + "epoch": 3.46, + "learning_rate": 5.334049883979066e-05, + "loss": 7.1998, + "step": 46950 + }, + { + "epoch": 3.47, + "learning_rate": 5.329027332723931e-05, + "loss": 7.3343, + "step": 47000 + }, + { + "epoch": 3.47, + "learning_rate": 5.324004781468796e-05, + "loss": 8.1604, + "step": 47050 + }, + { + "epoch": 3.47, + "learning_rate": 5.3189822302136595e-05, + "loss": 7.5405, + "step": 47100 + }, + { + "epoch": 3.48, + "learning_rate": 5.3139596789585246e-05, + "loss": 7.9409, + "step": 47150 + }, + { + "epoch": 3.48, + "learning_rate": 5.308937127703388e-05, + "loss": 8.0573, + "step": 47200 + }, + { + "epoch": 3.49, + "learning_rate": 5.3039145764482534e-05, + "loss": 7.2927, + "step": 47250 + }, + { + "epoch": 3.49, + "learning_rate": 5.298892025193117e-05, + "loss": 6.9476, + "step": 47300 + }, + { + "epoch": 3.49, + "learning_rate": 5.2938694739379815e-05, + "loss": 7.1999, + "step": 47350 + }, + { + "epoch": 3.5, + "learning_rate": 5.2888469226828466e-05, + "loss": 7.8224, + "step": 47400 + }, + { + "epoch": 3.5, + "learning_rate": 5.2838243714277103e-05, + "loss": 8.1369, + "step": 47450 + }, + { + "epoch": 3.5, + "learning_rate": 5.2788018201725754e-05, + "loss": 6.7302, + "step": 47500 + }, + { + "epoch": 3.51, + "learning_rate": 5.273779268917439e-05, + "loss": 8.0819, + "step": 47550 + }, + { + "epoch": 3.51, + "learning_rate": 5.268756717662304e-05, + "loss": 7.8832, + "step": 47600 + }, + { + "epoch": 3.51, + "learning_rate": 5.263734166407168e-05, + "loss": 8.4479, + "step": 47650 + }, + { + "epoch": 3.52, + "learning_rate": 5.258711615152033e-05, + "loss": 7.7838, + "step": 47700 + }, + { + "epoch": 3.52, + "learning_rate": 5.2536890638968975e-05, + "loss": 8.3843, + "step": 47750 + }, + { + "epoch": 3.53, + "learning_rate": 5.2486665126417625e-05, + "loss": 6.9055, + "step": 47800 + }, + { + "epoch": 3.53, + "learning_rate": 5.243643961386626e-05, + "loss": 6.6339, + "step": 47850 + }, + { + "epoch": 3.53, + "learning_rate": 5.23862141013149e-05, + "loss": 7.0316, + "step": 47900 + }, + { + "epoch": 3.54, + "learning_rate": 5.233598858876355e-05, + "loss": 7.4569, + "step": 47950 + }, + { + "epoch": 3.54, + "learning_rate": 5.228576307621219e-05, + "loss": 7.6204, + "step": 48000 + }, + { + "epoch": 3.54, + "learning_rate": 5.223553756366084e-05, + "loss": 7.1085, + "step": 48050 + }, + { + "epoch": 3.55, + "learning_rate": 5.218531205110948e-05, + "loss": 7.7254, + "step": 48100 + }, + { + "epoch": 3.55, + "learning_rate": 5.2135086538558134e-05, + "loss": 7.1486, + "step": 48150 + }, + { + "epoch": 3.56, + "learning_rate": 5.208486102600677e-05, + "loss": 6.9297, + "step": 48200 + }, + { + "epoch": 3.56, + "learning_rate": 5.203463551345542e-05, + "loss": 7.5314, + "step": 48250 + }, + { + "epoch": 3.56, + "learning_rate": 5.198441000090406e-05, + "loss": 7.68, + "step": 48300 + }, + { + "epoch": 3.57, + "learning_rate": 5.193418448835271e-05, + "loss": 7.9467, + "step": 48350 + }, + { + "epoch": 3.57, + "learning_rate": 5.188395897580135e-05, + "loss": 6.7188, + "step": 48400 + }, + { + "epoch": 3.57, + "learning_rate": 5.183373346325e-05, + "loss": 7.7619, + "step": 48450 + }, + { + "epoch": 3.58, + "learning_rate": 5.178350795069864e-05, + "loss": 7.6537, + "step": 48500 + }, + { + "epoch": 3.58, + "learning_rate": 5.173328243814728e-05, + "loss": 6.9593, + "step": 48550 + }, + { + "epoch": 3.58, + "learning_rate": 5.168305692559593e-05, + "loss": 7.4834, + "step": 48600 + }, + { + "epoch": 3.59, + "learning_rate": 5.163283141304457e-05, + "loss": 8.2864, + "step": 48650 + }, + { + "epoch": 3.59, + "learning_rate": 5.158260590049322e-05, + "loss": 7.234, + "step": 48700 + }, + { + "epoch": 3.6, + "learning_rate": 5.1532380387941856e-05, + "loss": 7.2513, + "step": 48750 + }, + { + "epoch": 3.6, + "learning_rate": 5.1482154875390506e-05, + "loss": 7.508, + "step": 48800 + }, + { + "epoch": 3.6, + "learning_rate": 5.143192936283915e-05, + "loss": 7.1513, + "step": 48850 + }, + { + "epoch": 3.61, + "learning_rate": 5.13817038502878e-05, + "loss": 7.8882, + "step": 48900 + }, + { + "epoch": 3.61, + "learning_rate": 5.133147833773644e-05, + "loss": 8.0859, + "step": 48950 + }, + { + "epoch": 3.61, + "learning_rate": 5.128125282518509e-05, + "loss": 7.5506, + "step": 49000 + }, + { + "epoch": 3.62, + "learning_rate": 5.123102731263373e-05, + "loss": 7.9777, + "step": 49050 + }, + { + "epoch": 3.62, + "learning_rate": 5.118080180008237e-05, + "loss": 8.3599, + "step": 49100 + }, + { + "epoch": 3.63, + "learning_rate": 5.1130576287531015e-05, + "loss": 7.105, + "step": 49150 + }, + { + "epoch": 3.63, + "learning_rate": 5.108035077497966e-05, + "loss": 8.2692, + "step": 49200 + }, + { + "epoch": 3.63, + "learning_rate": 5.103012526242831e-05, + "loss": 7.9098, + "step": 49250 + }, + { + "epoch": 3.64, + "learning_rate": 5.097989974987695e-05, + "loss": 7.1698, + "step": 49300 + }, + { + "epoch": 3.64, + "learning_rate": 5.09296742373256e-05, + "loss": 7.406, + "step": 49350 + }, + { + "epoch": 3.64, + "learning_rate": 5.0879448724774235e-05, + "loss": 8.3276, + "step": 49400 + }, + { + "epoch": 3.65, + "learning_rate": 5.0829223212222886e-05, + "loss": 7.5714, + "step": 49450 + }, + { + "epoch": 3.65, + "learning_rate": 5.077899769967153e-05, + "loss": 7.0839, + "step": 49500 + }, + { + "epoch": 3.65, + "learning_rate": 5.0728772187120174e-05, + "loss": 7.0589, + "step": 49550 + }, + { + "epoch": 3.66, + "learning_rate": 5.067854667456882e-05, + "loss": 7.4998, + "step": 49600 + }, + { + "epoch": 3.66, + "learning_rate": 5.0628321162017455e-05, + "loss": 7.3495, + "step": 49650 + }, + { + "epoch": 3.67, + "learning_rate": 5.0578095649466106e-05, + "loss": 7.5101, + "step": 49700 + }, + { + "epoch": 3.67, + "learning_rate": 5.0527870136914743e-05, + "loss": 6.7707, + "step": 49750 + }, + { + "epoch": 3.67, + "learning_rate": 5.0477644624363394e-05, + "loss": 7.5822, + "step": 49800 + }, + { + "epoch": 3.68, + "learning_rate": 5.042741911181204e-05, + "loss": 6.5937, + "step": 49850 + }, + { + "epoch": 3.68, + "learning_rate": 5.037719359926069e-05, + "loss": 7.2497, + "step": 49900 + }, + { + "epoch": 3.68, + "learning_rate": 5.0326968086709326e-05, + "loss": 7.35, + "step": 49950 + }, + { + "epoch": 3.69, + "learning_rate": 5.027674257415798e-05, + "loss": 7.767, + "step": 50000 + }, + { + "epoch": 3.69, + "learning_rate": 5.0226517061606615e-05, + "loss": 8.3228, + "step": 50050 + }, + { + "epoch": 3.7, + "learning_rate": 5.0176291549055265e-05, + "loss": 7.6905, + "step": 50100 + }, + { + "epoch": 3.7, + "learning_rate": 5.01260660365039e-05, + "loss": 7.8275, + "step": 50150 + }, + { + "epoch": 3.7, + "learning_rate": 5.007584052395255e-05, + "loss": 8.0724, + "step": 50200 + }, + { + "epoch": 3.71, + "learning_rate": 5.00256150114012e-05, + "loss": 7.0501, + "step": 50250 + }, + { + "epoch": 3.71, + "learning_rate": 4.997538949884984e-05, + "loss": 7.4269, + "step": 50300 + }, + { + "epoch": 3.71, + "learning_rate": 4.9925163986298486e-05, + "loss": 7.5186, + "step": 50350 + }, + { + "epoch": 3.72, + "learning_rate": 4.987493847374713e-05, + "loss": 8.2606, + "step": 50400 + }, + { + "epoch": 3.72, + "learning_rate": 4.9824712961195774e-05, + "loss": 8.2097, + "step": 50450 + }, + { + "epoch": 3.73, + "learning_rate": 4.977448744864441e-05, + "loss": 7.468, + "step": 50500 + }, + { + "epoch": 3.73, + "learning_rate": 4.9724261936093055e-05, + "loss": 8.2075, + "step": 50550 + }, + { + "epoch": 3.73, + "learning_rate": 4.9674036423541706e-05, + "loss": 7.3928, + "step": 50600 + }, + { + "epoch": 3.74, + "learning_rate": 4.962381091099035e-05, + "loss": 7.2907, + "step": 50650 + }, + { + "epoch": 3.74, + "learning_rate": 4.9573585398438994e-05, + "loss": 7.706, + "step": 50700 + }, + { + "epoch": 3.74, + "learning_rate": 4.952335988588764e-05, + "loss": 7.301, + "step": 50750 + }, + { + "epoch": 3.75, + "learning_rate": 4.947313437333628e-05, + "loss": 6.9109, + "step": 50800 + }, + { + "epoch": 3.75, + "learning_rate": 4.9422908860784926e-05, + "loss": 6.6967, + "step": 50850 + }, + { + "epoch": 3.75, + "learning_rate": 4.937268334823357e-05, + "loss": 5.9484, + "step": 50900 + }, + { + "epoch": 3.76, + "learning_rate": 4.9322457835682214e-05, + "loss": 7.8288, + "step": 50950 + }, + { + "epoch": 3.76, + "learning_rate": 4.9272232323130865e-05, + "loss": 7.3987, + "step": 51000 + }, + { + "epoch": 3.77, + "learning_rate": 4.92220068105795e-05, + "loss": 7.3714, + "step": 51050 + }, + { + "epoch": 3.77, + "learning_rate": 4.9171781298028147e-05, + "loss": 7.258, + "step": 51100 + }, + { + "epoch": 3.77, + "learning_rate": 4.912155578547679e-05, + "loss": 6.8541, + "step": 51150 + }, + { + "epoch": 3.78, + "learning_rate": 4.9071330272925435e-05, + "loss": 7.085, + "step": 51200 + }, + { + "epoch": 3.78, + "learning_rate": 4.902110476037408e-05, + "loss": 6.7827, + "step": 51250 + }, + { + "epoch": 3.78, + "learning_rate": 4.897087924782273e-05, + "loss": 6.6806, + "step": 51300 + }, + { + "epoch": 3.79, + "learning_rate": 4.8920653735271374e-05, + "loss": 7.2918, + "step": 51350 + }, + { + "epoch": 3.79, + "learning_rate": 4.887042822272002e-05, + "loss": 7.9022, + "step": 51400 + }, + { + "epoch": 3.8, + "learning_rate": 4.882020271016866e-05, + "loss": 7.6094, + "step": 51450 + }, + { + "epoch": 3.8, + "learning_rate": 4.8769977197617306e-05, + "loss": 8.1048, + "step": 51500 + }, + { + "epoch": 3.8, + "learning_rate": 4.871975168506595e-05, + "loss": 6.9056, + "step": 51550 + }, + { + "epoch": 3.81, + "learning_rate": 4.866952617251459e-05, + "loss": 6.4347, + "step": 51600 + }, + { + "epoch": 3.81, + "learning_rate": 4.861930065996324e-05, + "loss": 7.307, + "step": 51650 + }, + { + "epoch": 3.81, + "learning_rate": 4.856907514741188e-05, + "loss": 7.649, + "step": 51700 + }, + { + "epoch": 3.82, + "learning_rate": 4.8518849634860526e-05, + "loss": 6.7706, + "step": 51750 + }, + { + "epoch": 3.82, + "learning_rate": 4.846862412230917e-05, + "loss": 6.7943, + "step": 51800 + }, + { + "epoch": 3.82, + "learning_rate": 4.8418398609757814e-05, + "loss": 7.654, + "step": 51850 + }, + { + "epoch": 3.83, + "learning_rate": 4.836817309720646e-05, + "loss": 7.6245, + "step": 51900 + }, + { + "epoch": 3.83, + "learning_rate": 4.83179475846551e-05, + "loss": 7.8284, + "step": 51950 + }, + { + "epoch": 3.84, + "learning_rate": 4.8267722072103746e-05, + "loss": 6.9516, + "step": 52000 + }, + { + "epoch": 3.84, + "learning_rate": 4.82174965595524e-05, + "loss": 7.1367, + "step": 52050 + }, + { + "epoch": 3.84, + "learning_rate": 4.816727104700104e-05, + "loss": 7.4153, + "step": 52100 + }, + { + "epoch": 3.85, + "learning_rate": 4.8117045534449685e-05, + "loss": 6.5358, + "step": 52150 + }, + { + "epoch": 3.85, + "learning_rate": 4.806682002189832e-05, + "loss": 7.5443, + "step": 52200 + }, + { + "epoch": 3.85, + "learning_rate": 4.8016594509346967e-05, + "loss": 7.8184, + "step": 52250 + }, + { + "epoch": 3.86, + "learning_rate": 4.796636899679561e-05, + "loss": 6.9702, + "step": 52300 + }, + { + "epoch": 3.86, + "learning_rate": 4.791614348424426e-05, + "loss": 8.3988, + "step": 52350 + }, + { + "epoch": 3.87, + "learning_rate": 4.7865917971692905e-05, + "loss": 8.1025, + "step": 52400 + }, + { + "epoch": 3.87, + "learning_rate": 4.781569245914155e-05, + "loss": 7.81, + "step": 52450 + }, + { + "epoch": 3.87, + "learning_rate": 4.7765466946590194e-05, + "loss": 6.6809, + "step": 52500 + }, + { + "epoch": 3.88, + "learning_rate": 4.771524143403884e-05, + "loss": 6.81, + "step": 52550 + }, + { + "epoch": 3.88, + "learning_rate": 4.766501592148748e-05, + "loss": 7.1717, + "step": 52600 + }, + { + "epoch": 3.88, + "learning_rate": 4.7614790408936126e-05, + "loss": 7.3114, + "step": 52650 + }, + { + "epoch": 3.89, + "learning_rate": 4.756456489638477e-05, + "loss": 7.2819, + "step": 52700 + }, + { + "epoch": 3.89, + "learning_rate": 4.7514339383833414e-05, + "loss": 6.6964, + "step": 52750 + }, + { + "epoch": 3.89, + "learning_rate": 4.746411387128206e-05, + "loss": 8.1118, + "step": 52800 + }, + { + "epoch": 3.9, + "learning_rate": 4.74138883587307e-05, + "loss": 8.1901, + "step": 52850 + }, + { + "epoch": 3.9, + "learning_rate": 4.7363662846179346e-05, + "loss": 6.8883, + "step": 52900 + }, + { + "epoch": 3.91, + "learning_rate": 4.731343733362799e-05, + "loss": 7.2554, + "step": 52950 + }, + { + "epoch": 3.91, + "learning_rate": 4.7263211821076634e-05, + "loss": 7.402, + "step": 53000 + }, + { + "epoch": 3.91, + "learning_rate": 4.721298630852528e-05, + "loss": 8.8808, + "step": 53050 + }, + { + "epoch": 3.92, + "learning_rate": 4.716276079597393e-05, + "loss": 7.1652, + "step": 53100 + }, + { + "epoch": 3.92, + "learning_rate": 4.711253528342257e-05, + "loss": 6.884, + "step": 53150 + }, + { + "epoch": 3.92, + "learning_rate": 4.706230977087122e-05, + "loss": 7.4472, + "step": 53200 + }, + { + "epoch": 3.93, + "learning_rate": 4.701208425831986e-05, + "loss": 6.8787, + "step": 53250 + }, + { + "epoch": 3.93, + "learning_rate": 4.6961858745768505e-05, + "loss": 6.9316, + "step": 53300 + }, + { + "epoch": 3.94, + "learning_rate": 4.691163323321714e-05, + "loss": 7.1614, + "step": 53350 + }, + { + "epoch": 3.94, + "learning_rate": 4.6861407720665787e-05, + "loss": 7.193, + "step": 53400 + }, + { + "epoch": 3.94, + "learning_rate": 4.681118220811444e-05, + "loss": 7.5875, + "step": 53450 + }, + { + "epoch": 3.95, + "learning_rate": 4.676095669556308e-05, + "loss": 7.0836, + "step": 53500 + }, + { + "epoch": 3.95, + "learning_rate": 4.6710731183011725e-05, + "loss": 7.2054, + "step": 53550 + }, + { + "epoch": 3.95, + "learning_rate": 4.666050567046037e-05, + "loss": 6.95, + "step": 53600 + }, + { + "epoch": 3.96, + "learning_rate": 4.6610280157909014e-05, + "loss": 6.6366, + "step": 53650 + }, + { + "epoch": 3.96, + "learning_rate": 4.656005464535766e-05, + "loss": 6.7976, + "step": 53700 + }, + { + "epoch": 3.96, + "learning_rate": 4.65098291328063e-05, + "loss": 7.1371, + "step": 53750 + }, + { + "epoch": 3.97, + "learning_rate": 4.6459603620254946e-05, + "loss": 6.7457, + "step": 53800 + }, + { + "epoch": 3.97, + "learning_rate": 4.6409378107703597e-05, + "loss": 6.6139, + "step": 53850 + }, + { + "epoch": 3.98, + "learning_rate": 4.6359152595152234e-05, + "loss": 7.9291, + "step": 53900 + }, + { + "epoch": 3.98, + "learning_rate": 4.630892708260088e-05, + "loss": 7.4235, + "step": 53950 + }, + { + "epoch": 3.98, + "learning_rate": 4.625870157004952e-05, + "loss": 6.8609, + "step": 54000 + }, + { + "epoch": 3.99, + "learning_rate": 4.6208476057498166e-05, + "loss": 6.6128, + "step": 54050 + }, + { + "epoch": 3.99, + "learning_rate": 4.615825054494681e-05, + "loss": 7.0313, + "step": 54100 + }, + { + "epoch": 3.99, + "learning_rate": 4.610802503239546e-05, + "loss": 7.3654, + "step": 54150 + }, + { + "epoch": 4.0, + "learning_rate": 4.6057799519844105e-05, + "loss": 8.266, + "step": 54200 + }, + { + "epoch": 4.0, + "learning_rate": 4.600757400729275e-05, + "loss": 7.9471, + "step": 54250 + }, + { + "epoch": 4.01, + "learning_rate": 4.595734849474139e-05, + "loss": 6.0877, + "step": 54300 + }, + { + "epoch": 4.01, + "learning_rate": 4.590712298219004e-05, + "loss": 6.7453, + "step": 54350 + }, + { + "epoch": 4.01, + "learning_rate": 4.585689746963868e-05, + "loss": 5.8985, + "step": 54400 + }, + { + "epoch": 4.02, + "learning_rate": 4.580667195708732e-05, + "loss": 7.4527, + "step": 54450 + }, + { + "epoch": 4.02, + "learning_rate": 4.575644644453597e-05, + "loss": 7.0419, + "step": 54500 + }, + { + "epoch": 4.02, + "learning_rate": 4.570622093198461e-05, + "loss": 6.281, + "step": 54550 + }, + { + "epoch": 4.03, + "learning_rate": 4.565599541943326e-05, + "loss": 6.6096, + "step": 54600 + }, + { + "epoch": 4.03, + "learning_rate": 4.56057699068819e-05, + "loss": 7.0341, + "step": 54650 + }, + { + "epoch": 4.03, + "learning_rate": 4.5555544394330545e-05, + "loss": 6.621, + "step": 54700 + }, + { + "epoch": 4.04, + "learning_rate": 4.550531888177919e-05, + "loss": 7.4405, + "step": 54750 + }, + { + "epoch": 4.04, + "learning_rate": 4.5455093369227834e-05, + "loss": 7.2506, + "step": 54800 + }, + { + "epoch": 4.05, + "learning_rate": 4.540486785667648e-05, + "loss": 7.1534, + "step": 54850 + }, + { + "epoch": 4.05, + "learning_rate": 4.535464234412513e-05, + "loss": 6.5829, + "step": 54900 + }, + { + "epoch": 4.05, + "learning_rate": 4.530441683157377e-05, + "loss": 7.0338, + "step": 54950 + }, + { + "epoch": 4.06, + "learning_rate": 4.5254191319022417e-05, + "loss": 6.6234, + "step": 55000 + }, + { + "epoch": 4.06, + "learning_rate": 4.5203965806471054e-05, + "loss": 6.2412, + "step": 55050 + }, + { + "epoch": 4.06, + "learning_rate": 4.51537402939197e-05, + "loss": 6.3439, + "step": 55100 + }, + { + "epoch": 4.07, + "learning_rate": 4.510351478136834e-05, + "loss": 6.8272, + "step": 55150 + }, + { + "epoch": 4.07, + "learning_rate": 4.5053289268816986e-05, + "loss": 6.4758, + "step": 55200 + }, + { + "epoch": 4.08, + "learning_rate": 4.500306375626564e-05, + "loss": 6.434, + "step": 55250 + }, + { + "epoch": 4.08, + "learning_rate": 4.495283824371428e-05, + "loss": 6.5471, + "step": 55300 + }, + { + "epoch": 4.08, + "learning_rate": 4.4902612731162925e-05, + "loss": 6.5088, + "step": 55350 + }, + { + "epoch": 4.09, + "learning_rate": 4.485238721861157e-05, + "loss": 6.6941, + "step": 55400 + }, + { + "epoch": 4.09, + "learning_rate": 4.480216170606021e-05, + "loss": 6.3248, + "step": 55450 + }, + { + "epoch": 4.09, + "learning_rate": 4.475193619350886e-05, + "loss": 7.2989, + "step": 55500 + }, + { + "epoch": 4.1, + "learning_rate": 4.47017106809575e-05, + "loss": 7.0947, + "step": 55550 + }, + { + "epoch": 4.1, + "learning_rate": 4.4651485168406145e-05, + "loss": 6.4896, + "step": 55600 + }, + { + "epoch": 4.1, + "learning_rate": 4.460125965585479e-05, + "loss": 5.9249, + "step": 55650 + }, + { + "epoch": 4.11, + "learning_rate": 4.455103414330343e-05, + "loss": 6.7801, + "step": 55700 + }, + { + "epoch": 4.11, + "learning_rate": 4.450080863075208e-05, + "loss": 6.2216, + "step": 55750 + }, + { + "epoch": 4.12, + "learning_rate": 4.445058311820072e-05, + "loss": 6.8346, + "step": 55800 + }, + { + "epoch": 4.12, + "learning_rate": 4.4400357605649366e-05, + "loss": 7.2863, + "step": 55850 + }, + { + "epoch": 4.12, + "learning_rate": 4.435013209309801e-05, + "loss": 7.406, + "step": 55900 + }, + { + "epoch": 4.13, + "learning_rate": 4.429990658054666e-05, + "loss": 6.0165, + "step": 55950 + }, + { + "epoch": 4.13, + "learning_rate": 4.4249681067995304e-05, + "loss": 6.8052, + "step": 56000 + }, + { + "epoch": 4.13, + "learning_rate": 4.419945555544395e-05, + "loss": 6.679, + "step": 56050 + }, + { + "epoch": 4.14, + "learning_rate": 4.414923004289259e-05, + "loss": 6.2087, + "step": 56100 + }, + { + "epoch": 4.14, + "learning_rate": 4.4099004530341237e-05, + "loss": 6.5904, + "step": 56150 + }, + { + "epoch": 4.15, + "learning_rate": 4.4048779017789874e-05, + "loss": 6.4147, + "step": 56200 + }, + { + "epoch": 4.15, + "learning_rate": 4.399855350523852e-05, + "loss": 6.6465, + "step": 56250 + }, + { + "epoch": 4.15, + "learning_rate": 4.394832799268717e-05, + "loss": 7.11, + "step": 56300 + }, + { + "epoch": 4.16, + "learning_rate": 4.389810248013581e-05, + "loss": 7.0558, + "step": 56350 + }, + { + "epoch": 4.16, + "learning_rate": 4.384787696758446e-05, + "loss": 6.922, + "step": 56400 + }, + { + "epoch": 4.16, + "learning_rate": 4.37976514550331e-05, + "loss": 7.2125, + "step": 56450 + }, + { + "epoch": 4.17, + "learning_rate": 4.3747425942481745e-05, + "loss": 6.4719, + "step": 56500 + }, + { + "epoch": 4.17, + "learning_rate": 4.369720042993039e-05, + "loss": 7.204, + "step": 56550 + }, + { + "epoch": 4.17, + "learning_rate": 4.364697491737903e-05, + "loss": 7.2371, + "step": 56600 + }, + { + "epoch": 4.18, + "learning_rate": 4.359674940482768e-05, + "loss": 6.6131, + "step": 56650 + }, + { + "epoch": 4.18, + "learning_rate": 4.354652389227633e-05, + "loss": 6.6349, + "step": 56700 + }, + { + "epoch": 4.19, + "learning_rate": 4.3496298379724965e-05, + "loss": 5.9137, + "step": 56750 + }, + { + "epoch": 4.19, + "learning_rate": 4.344607286717361e-05, + "loss": 6.5402, + "step": 56800 + }, + { + "epoch": 4.19, + "learning_rate": 4.339584735462225e-05, + "loss": 7.3351, + "step": 56850 + }, + { + "epoch": 4.2, + "learning_rate": 4.33456218420709e-05, + "loss": 8.1387, + "step": 56900 + }, + { + "epoch": 4.2, + "learning_rate": 4.329539632951954e-05, + "loss": 7.0783, + "step": 56950 + }, + { + "epoch": 4.2, + "learning_rate": 4.324517081696819e-05, + "loss": 6.947, + "step": 57000 + }, + { + "epoch": 4.21, + "learning_rate": 4.3194945304416836e-05, + "loss": 6.1526, + "step": 57050 + }, + { + "epoch": 4.21, + "learning_rate": 4.314471979186548e-05, + "loss": 7.273, + "step": 57100 + }, + { + "epoch": 4.22, + "learning_rate": 4.3094494279314124e-05, + "loss": 7.0958, + "step": 57150 + }, + { + "epoch": 4.22, + "learning_rate": 4.304426876676277e-05, + "loss": 6.4413, + "step": 57200 + }, + { + "epoch": 4.22, + "learning_rate": 4.299404325421141e-05, + "loss": 6.597, + "step": 57250 + }, + { + "epoch": 4.23, + "learning_rate": 4.294381774166006e-05, + "loss": 6.6893, + "step": 57300 + }, + { + "epoch": 4.23, + "learning_rate": 4.28935922291087e-05, + "loss": 6.4746, + "step": 57350 + }, + { + "epoch": 4.23, + "learning_rate": 4.2843366716557345e-05, + "loss": 7.376, + "step": 57400 + }, + { + "epoch": 4.24, + "learning_rate": 4.279314120400599e-05, + "loss": 7.2823, + "step": 57450 + }, + { + "epoch": 4.24, + "learning_rate": 4.274291569145463e-05, + "loss": 6.3184, + "step": 57500 + }, + { + "epoch": 4.25, + "learning_rate": 4.269269017890328e-05, + "loss": 6.4526, + "step": 57550 + }, + { + "epoch": 4.25, + "learning_rate": 4.264246466635192e-05, + "loss": 6.7892, + "step": 57600 + }, + { + "epoch": 4.25, + "learning_rate": 4.2592239153800565e-05, + "loss": 6.2082, + "step": 57650 + }, + { + "epoch": 4.26, + "learning_rate": 4.254201364124921e-05, + "loss": 7.1488, + "step": 57700 + }, + { + "epoch": 4.26, + "learning_rate": 4.249178812869786e-05, + "loss": 6.9399, + "step": 57750 + }, + { + "epoch": 4.26, + "learning_rate": 4.2441562616146504e-05, + "loss": 6.8596, + "step": 57800 + }, + { + "epoch": 4.27, + "learning_rate": 4.239133710359515e-05, + "loss": 6.8899, + "step": 57850 + }, + { + "epoch": 4.27, + "learning_rate": 4.2341111591043785e-05, + "loss": 6.8196, + "step": 57900 + }, + { + "epoch": 4.27, + "learning_rate": 4.229088607849243e-05, + "loss": 7.5114, + "step": 57950 + }, + { + "epoch": 4.28, + "learning_rate": 4.224066056594107e-05, + "loss": 6.4122, + "step": 58000 + }, + { + "epoch": 4.28, + "learning_rate": 4.219043505338972e-05, + "loss": 6.9228, + "step": 58050 + }, + { + "epoch": 4.29, + "learning_rate": 4.214020954083837e-05, + "loss": 6.3687, + "step": 58100 + }, + { + "epoch": 4.29, + "learning_rate": 4.208998402828701e-05, + "loss": 6.8616, + "step": 58150 + }, + { + "epoch": 4.29, + "learning_rate": 4.2039758515735656e-05, + "loss": 6.002, + "step": 58200 + }, + { + "epoch": 4.3, + "learning_rate": 4.19895330031843e-05, + "loss": 6.0985, + "step": 58250 + }, + { + "epoch": 4.3, + "learning_rate": 4.1939307490632944e-05, + "loss": 6.5857, + "step": 58300 + }, + { + "epoch": 4.3, + "learning_rate": 4.188908197808159e-05, + "loss": 6.257, + "step": 58350 + }, + { + "epoch": 4.31, + "learning_rate": 4.183885646553023e-05, + "loss": 6.9222, + "step": 58400 + }, + { + "epoch": 4.31, + "learning_rate": 4.178863095297888e-05, + "loss": 6.7801, + "step": 58450 + }, + { + "epoch": 4.32, + "learning_rate": 4.173840544042752e-05, + "loss": 6.3861, + "step": 58500 + }, + { + "epoch": 4.32, + "learning_rate": 4.1688179927876165e-05, + "loss": 6.8685, + "step": 58550 + }, + { + "epoch": 4.32, + "learning_rate": 4.163795441532481e-05, + "loss": 6.9948, + "step": 58600 + }, + { + "epoch": 4.33, + "learning_rate": 4.158772890277345e-05, + "loss": 6.0965, + "step": 58650 + }, + { + "epoch": 4.33, + "learning_rate": 4.15375033902221e-05, + "loss": 7.282, + "step": 58700 + }, + { + "epoch": 4.33, + "learning_rate": 4.148727787767074e-05, + "loss": 7.6165, + "step": 58750 + }, + { + "epoch": 4.34, + "learning_rate": 4.143705236511939e-05, + "loss": 6.734, + "step": 58800 + }, + { + "epoch": 4.34, + "learning_rate": 4.1386826852568036e-05, + "loss": 6.0334, + "step": 58850 + }, + { + "epoch": 4.34, + "learning_rate": 4.133660134001668e-05, + "loss": 6.5306, + "step": 58900 + }, + { + "epoch": 4.35, + "learning_rate": 4.1286375827465324e-05, + "loss": 7.4324, + "step": 58950 + }, + { + "epoch": 4.35, + "learning_rate": 4.123615031491397e-05, + "loss": 7.234, + "step": 59000 + }, + { + "epoch": 4.36, + "learning_rate": 4.1185924802362605e-05, + "loss": 6.7196, + "step": 59050 + }, + { + "epoch": 4.36, + "learning_rate": 4.113569928981125e-05, + "loss": 6.0641, + "step": 59100 + }, + { + "epoch": 4.36, + "learning_rate": 4.10854737772599e-05, + "loss": 5.9373, + "step": 59150 + }, + { + "epoch": 4.37, + "learning_rate": 4.1035248264708544e-05, + "loss": 6.4428, + "step": 59200 + }, + { + "epoch": 4.37, + "learning_rate": 4.098502275215719e-05, + "loss": 6.7303, + "step": 59250 + }, + { + "epoch": 4.37, + "learning_rate": 4.093479723960583e-05, + "loss": 6.5585, + "step": 59300 + }, + { + "epoch": 4.38, + "learning_rate": 4.0884571727054476e-05, + "loss": 5.837, + "step": 59350 + }, + { + "epoch": 4.38, + "learning_rate": 4.083434621450312e-05, + "loss": 6.574, + "step": 59400 + }, + { + "epoch": 4.39, + "learning_rate": 4.0784120701951764e-05, + "loss": 7.4319, + "step": 59450 + }, + { + "epoch": 4.39, + "learning_rate": 4.073389518940041e-05, + "loss": 6.1092, + "step": 59500 + }, + { + "epoch": 4.39, + "learning_rate": 4.068366967684906e-05, + "loss": 6.4683, + "step": 59550 + }, + { + "epoch": 4.4, + "learning_rate": 4.06334441642977e-05, + "loss": 7.1323, + "step": 59600 + }, + { + "epoch": 4.4, + "learning_rate": 4.058321865174634e-05, + "loss": 6.7011, + "step": 59650 + }, + { + "epoch": 4.4, + "learning_rate": 4.0532993139194985e-05, + "loss": 6.7281, + "step": 59700 + }, + { + "epoch": 4.41, + "learning_rate": 4.048276762664363e-05, + "loss": 6.0361, + "step": 59750 + }, + { + "epoch": 4.41, + "learning_rate": 4.043254211409227e-05, + "loss": 6.5359, + "step": 59800 + }, + { + "epoch": 4.41, + "learning_rate": 4.0382316601540924e-05, + "loss": 7.3576, + "step": 59850 + }, + { + "epoch": 4.42, + "learning_rate": 4.033209108898957e-05, + "loss": 6.7324, + "step": 59900 + }, + { + "epoch": 4.42, + "learning_rate": 4.028186557643821e-05, + "loss": 7.1445, + "step": 59950 + }, + { + "epoch": 4.43, + "learning_rate": 4.0231640063886856e-05, + "loss": 5.426, + "step": 60000 + }, + { + "epoch": 4.43, + "eval_loss": 7.799332618713379, + "eval_runtime": 963.4222, + "eval_samples_per_second": 13.595, + "eval_steps_per_second": 3.399, + "eval_wer": 0.20775061946159337, + "step": 60000 + }, + { + "epoch": 4.43, + "learning_rate": 4.01814145513355e-05, + "loss": 6.577, + "step": 60050 + }, + { + "epoch": 4.43, + "learning_rate": 4.0131189038784144e-05, + "loss": 7.1169, + "step": 60100 + }, + { + "epoch": 4.44, + "learning_rate": 4.008096352623279e-05, + "loss": 7.2535, + "step": 60150 + }, + { + "epoch": 4.44, + "learning_rate": 4.003073801368143e-05, + "loss": 6.2427, + "step": 60200 + }, + { + "epoch": 4.44, + "learning_rate": 3.9980512501130076e-05, + "loss": 6.3184, + "step": 60250 + }, + { + "epoch": 4.45, + "learning_rate": 3.993028698857872e-05, + "loss": 6.9348, + "step": 60300 + }, + { + "epoch": 4.45, + "learning_rate": 3.9880061476027364e-05, + "loss": 6.5074, + "step": 60350 + }, + { + "epoch": 4.46, + "learning_rate": 3.982983596347601e-05, + "loss": 7.2028, + "step": 60400 + }, + { + "epoch": 4.46, + "learning_rate": 3.977961045092465e-05, + "loss": 7.1185, + "step": 60450 + }, + { + "epoch": 4.46, + "learning_rate": 3.9729384938373296e-05, + "loss": 6.5089, + "step": 60500 + }, + { + "epoch": 4.47, + "learning_rate": 3.967915942582194e-05, + "loss": 5.8853, + "step": 60550 + }, + { + "epoch": 4.47, + "learning_rate": 3.962893391327059e-05, + "loss": 6.0402, + "step": 60600 + }, + { + "epoch": 4.47, + "learning_rate": 3.9578708400719235e-05, + "loss": 6.6078, + "step": 60650 + }, + { + "epoch": 4.48, + "learning_rate": 3.952848288816788e-05, + "loss": 6.4986, + "step": 60700 + }, + { + "epoch": 4.48, + "learning_rate": 3.947825737561652e-05, + "loss": 6.9922, + "step": 60750 + }, + { + "epoch": 4.48, + "learning_rate": 3.942803186306516e-05, + "loss": 6.8327, + "step": 60800 + }, + { + "epoch": 4.49, + "learning_rate": 3.9377806350513805e-05, + "loss": 6.7131, + "step": 60850 + }, + { + "epoch": 4.49, + "learning_rate": 3.932758083796245e-05, + "loss": 6.3372, + "step": 60900 + }, + { + "epoch": 4.5, + "learning_rate": 3.92773553254111e-05, + "loss": 6.5337, + "step": 60950 + }, + { + "epoch": 4.5, + "learning_rate": 3.9227129812859744e-05, + "loss": 7.5148, + "step": 61000 + }, + { + "epoch": 4.5, + "learning_rate": 3.917690430030839e-05, + "loss": 6.2416, + "step": 61050 + }, + { + "epoch": 4.51, + "learning_rate": 3.912667878775703e-05, + "loss": 6.3845, + "step": 61100 + }, + { + "epoch": 4.51, + "learning_rate": 3.9076453275205676e-05, + "loss": 7.0156, + "step": 61150 + }, + { + "epoch": 4.51, + "learning_rate": 3.902622776265432e-05, + "loss": 6.4239, + "step": 61200 + }, + { + "epoch": 4.52, + "learning_rate": 3.8976002250102964e-05, + "loss": 7.2111, + "step": 61250 + }, + { + "epoch": 4.52, + "learning_rate": 3.892577673755161e-05, + "loss": 6.5958, + "step": 61300 + }, + { + "epoch": 4.53, + "learning_rate": 3.887555122500025e-05, + "loss": 7.3125, + "step": 61350 + }, + { + "epoch": 4.53, + "learning_rate": 3.8825325712448896e-05, + "loss": 6.0597, + "step": 61400 + }, + { + "epoch": 4.53, + "learning_rate": 3.877510019989754e-05, + "loss": 6.3881, + "step": 61450 + }, + { + "epoch": 4.54, + "learning_rate": 3.8724874687346184e-05, + "loss": 6.2309, + "step": 61500 + }, + { + "epoch": 4.54, + "learning_rate": 3.867464917479483e-05, + "loss": 6.7603, + "step": 61550 + }, + { + "epoch": 4.54, + "learning_rate": 3.862442366224347e-05, + "loss": 7.386, + "step": 61600 + }, + { + "epoch": 4.55, + "learning_rate": 3.857419814969212e-05, + "loss": 7.1897, + "step": 61650 + }, + { + "epoch": 4.55, + "learning_rate": 3.852397263714077e-05, + "loss": 6.8813, + "step": 61700 + }, + { + "epoch": 4.55, + "learning_rate": 3.847374712458941e-05, + "loss": 7.9826, + "step": 61750 + }, + { + "epoch": 4.56, + "learning_rate": 3.8423521612038055e-05, + "loss": 6.4649, + "step": 61800 + }, + { + "epoch": 4.56, + "learning_rate": 3.83732960994867e-05, + "loss": 6.0005, + "step": 61850 + }, + { + "epoch": 4.57, + "learning_rate": 3.832307058693534e-05, + "loss": 6.7249, + "step": 61900 + }, + { + "epoch": 4.57, + "learning_rate": 3.827284507438398e-05, + "loss": 7.3703, + "step": 61950 + }, + { + "epoch": 4.57, + "learning_rate": 3.822261956183263e-05, + "loss": 6.3157, + "step": 62000 + }, + { + "epoch": 4.58, + "learning_rate": 3.8172394049281276e-05, + "loss": 6.3982, + "step": 62050 + }, + { + "epoch": 4.58, + "learning_rate": 3.812216853672992e-05, + "loss": 6.3995, + "step": 62100 + }, + { + "epoch": 4.58, + "learning_rate": 3.8071943024178564e-05, + "loss": 6.9792, + "step": 62150 + }, + { + "epoch": 4.59, + "learning_rate": 3.802171751162721e-05, + "loss": 7.7098, + "step": 62200 + }, + { + "epoch": 4.59, + "learning_rate": 3.797149199907585e-05, + "loss": 7.3092, + "step": 62250 + }, + { + "epoch": 4.6, + "learning_rate": 3.7921266486524496e-05, + "loss": 6.7666, + "step": 62300 + }, + { + "epoch": 4.6, + "learning_rate": 3.787104097397314e-05, + "loss": 6.1829, + "step": 62350 + }, + { + "epoch": 4.6, + "learning_rate": 3.782081546142179e-05, + "loss": 8.2604, + "step": 62400 + }, + { + "epoch": 4.61, + "learning_rate": 3.777058994887043e-05, + "loss": 6.7275, + "step": 62450 + }, + { + "epoch": 4.61, + "learning_rate": 3.772036443631907e-05, + "loss": 6.8682, + "step": 62500 + }, + { + "epoch": 4.61, + "learning_rate": 3.7670138923767716e-05, + "loss": 7.4531, + "step": 62550 + }, + { + "epoch": 4.62, + "learning_rate": 3.761991341121636e-05, + "loss": 7.4792, + "step": 62600 + }, + { + "epoch": 4.62, + "learning_rate": 3.7569687898665004e-05, + "loss": 6.3364, + "step": 62650 + }, + { + "epoch": 4.62, + "learning_rate": 3.7519462386113655e-05, + "loss": 6.395, + "step": 62700 + }, + { + "epoch": 4.63, + "learning_rate": 3.74692368735623e-05, + "loss": 6.4644, + "step": 62750 + }, + { + "epoch": 4.63, + "learning_rate": 3.741901136101094e-05, + "loss": 7.6636, + "step": 62800 + }, + { + "epoch": 4.64, + "learning_rate": 3.736878584845959e-05, + "loss": 6.5346, + "step": 62850 + }, + { + "epoch": 4.64, + "learning_rate": 3.731856033590823e-05, + "loss": 7.7544, + "step": 62900 + }, + { + "epoch": 4.64, + "learning_rate": 3.7268334823356875e-05, + "loss": 7.1518, + "step": 62950 + }, + { + "epoch": 4.65, + "learning_rate": 3.721810931080552e-05, + "loss": 5.2845, + "step": 63000 + }, + { + "epoch": 4.65, + "learning_rate": 3.7167883798254163e-05, + "loss": 6.4635, + "step": 63050 + }, + { + "epoch": 4.65, + "learning_rate": 3.711765828570281e-05, + "loss": 6.7313, + "step": 63100 + }, + { + "epoch": 4.66, + "learning_rate": 3.706743277315145e-05, + "loss": 6.2767, + "step": 63150 + }, + { + "epoch": 4.66, + "learning_rate": 3.7017207260600096e-05, + "loss": 6.2349, + "step": 63200 + }, + { + "epoch": 4.67, + "learning_rate": 3.696698174804874e-05, + "loss": 7.2152, + "step": 63250 + }, + { + "epoch": 4.67, + "learning_rate": 3.6916756235497384e-05, + "loss": 6.4904, + "step": 63300 + }, + { + "epoch": 4.67, + "learning_rate": 3.686653072294603e-05, + "loss": 6.4779, + "step": 63350 + }, + { + "epoch": 4.68, + "learning_rate": 3.681630521039467e-05, + "loss": 7.0359, + "step": 63400 + }, + { + "epoch": 4.68, + "learning_rate": 3.676607969784332e-05, + "loss": 6.3846, + "step": 63450 + }, + { + "epoch": 4.68, + "learning_rate": 3.671585418529197e-05, + "loss": 6.3923, + "step": 63500 + }, + { + "epoch": 4.69, + "learning_rate": 3.666562867274061e-05, + "loss": 6.7271, + "step": 63550 + }, + { + "epoch": 4.69, + "learning_rate": 3.661540316018925e-05, + "loss": 6.6546, + "step": 63600 + }, + { + "epoch": 4.69, + "learning_rate": 3.656517764763789e-05, + "loss": 7.1482, + "step": 63650 + }, + { + "epoch": 4.7, + "learning_rate": 3.6514952135086536e-05, + "loss": 6.4931, + "step": 63700 + }, + { + "epoch": 4.7, + "learning_rate": 3.646472662253518e-05, + "loss": 5.1346, + "step": 63750 + }, + { + "epoch": 4.71, + "learning_rate": 3.641450110998383e-05, + "loss": 6.25, + "step": 63800 + }, + { + "epoch": 4.71, + "learning_rate": 3.6364275597432475e-05, + "loss": 6.5812, + "step": 63850 + }, + { + "epoch": 4.71, + "learning_rate": 3.631405008488112e-05, + "loss": 6.4797, + "step": 63900 + }, + { + "epoch": 4.72, + "learning_rate": 3.626382457232976e-05, + "loss": 6.2886, + "step": 63950 + }, + { + "epoch": 4.72, + "learning_rate": 3.621359905977841e-05, + "loss": 6.4452, + "step": 64000 + }, + { + "epoch": 4.72, + "learning_rate": 3.616337354722705e-05, + "loss": 6.5534, + "step": 64050 + }, + { + "epoch": 4.73, + "learning_rate": 3.6113148034675695e-05, + "loss": 6.9353, + "step": 64100 + }, + { + "epoch": 4.73, + "learning_rate": 3.606292252212434e-05, + "loss": 6.1219, + "step": 64150 + }, + { + "epoch": 4.74, + "learning_rate": 3.6012697009572984e-05, + "loss": 6.8233, + "step": 64200 + }, + { + "epoch": 4.74, + "learning_rate": 3.596247149702163e-05, + "loss": 7.1924, + "step": 64250 + }, + { + "epoch": 4.74, + "learning_rate": 3.591224598447027e-05, + "loss": 7.1116, + "step": 64300 + }, + { + "epoch": 4.75, + "learning_rate": 3.5862020471918916e-05, + "loss": 7.2818, + "step": 64350 + }, + { + "epoch": 4.75, + "learning_rate": 3.581179495936756e-05, + "loss": 6.3182, + "step": 64400 + }, + { + "epoch": 4.75, + "learning_rate": 3.5761569446816204e-05, + "loss": 6.7712, + "step": 64450 + }, + { + "epoch": 4.76, + "learning_rate": 3.5711343934264855e-05, + "loss": 6.7902, + "step": 64500 + }, + { + "epoch": 4.76, + "learning_rate": 3.56611184217135e-05, + "loss": 6.5504, + "step": 64550 + }, + { + "epoch": 4.77, + "learning_rate": 3.561089290916214e-05, + "loss": 6.3599, + "step": 64600 + }, + { + "epoch": 4.77, + "learning_rate": 3.556066739661079e-05, + "loss": 6.4758, + "step": 64650 + }, + { + "epoch": 4.77, + "learning_rate": 3.551044188405943e-05, + "loss": 7.2899, + "step": 64700 + }, + { + "epoch": 4.78, + "learning_rate": 3.546021637150807e-05, + "loss": 6.6164, + "step": 64750 + }, + { + "epoch": 4.78, + "learning_rate": 3.540999085895671e-05, + "loss": 6.0466, + "step": 64800 + }, + { + "epoch": 4.78, + "learning_rate": 3.535976534640536e-05, + "loss": 6.2209, + "step": 64850 + }, + { + "epoch": 4.79, + "learning_rate": 3.530953983385401e-05, + "loss": 6.6098, + "step": 64900 + }, + { + "epoch": 4.79, + "learning_rate": 3.525931432130265e-05, + "loss": 5.959, + "step": 64950 + }, + { + "epoch": 4.79, + "learning_rate": 3.5209088808751295e-05, + "loss": 6.6942, + "step": 65000 + }, + { + "epoch": 4.8, + "learning_rate": 3.515886329619994e-05, + "loss": 7.7404, + "step": 65050 + }, + { + "epoch": 4.8, + "learning_rate": 3.510863778364858e-05, + "loss": 6.5342, + "step": 65100 + }, + { + "epoch": 4.81, + "learning_rate": 3.505841227109723e-05, + "loss": 6.1434, + "step": 65150 + }, + { + "epoch": 4.81, + "learning_rate": 3.500818675854587e-05, + "loss": 5.8523, + "step": 65200 + }, + { + "epoch": 4.81, + "learning_rate": 3.495796124599452e-05, + "loss": 5.7111, + "step": 65250 + }, + { + "epoch": 4.82, + "learning_rate": 3.490773573344316e-05, + "loss": 6.4092, + "step": 65300 + }, + { + "epoch": 4.82, + "learning_rate": 3.4857510220891804e-05, + "loss": 6.5057, + "step": 65350 + }, + { + "epoch": 4.82, + "learning_rate": 3.480728470834045e-05, + "loss": 5.9863, + "step": 65400 + }, + { + "epoch": 4.83, + "learning_rate": 3.475705919578909e-05, + "loss": 6.0978, + "step": 65450 + }, + { + "epoch": 4.83, + "learning_rate": 3.4706833683237736e-05, + "loss": 6.6325, + "step": 65500 + }, + { + "epoch": 4.84, + "learning_rate": 3.4656608170686387e-05, + "loss": 5.9716, + "step": 65550 + }, + { + "epoch": 4.84, + "learning_rate": 3.460638265813503e-05, + "loss": 5.9812, + "step": 65600 + }, + { + "epoch": 4.84, + "learning_rate": 3.4556157145583675e-05, + "loss": 5.6784, + "step": 65650 + }, + { + "epoch": 4.85, + "learning_rate": 3.450593163303232e-05, + "loss": 6.7303, + "step": 65700 + }, + { + "epoch": 4.85, + "learning_rate": 3.445570612048096e-05, + "loss": 7.527, + "step": 65750 + }, + { + "epoch": 4.85, + "learning_rate": 3.440548060792961e-05, + "loss": 6.3649, + "step": 65800 + }, + { + "epoch": 4.86, + "learning_rate": 3.435525509537825e-05, + "loss": 6.6456, + "step": 65850 + }, + { + "epoch": 4.86, + "learning_rate": 3.4305029582826895e-05, + "loss": 6.2518, + "step": 65900 + }, + { + "epoch": 4.86, + "learning_rate": 3.425480407027554e-05, + "loss": 6.0104, + "step": 65950 + }, + { + "epoch": 4.87, + "learning_rate": 3.420457855772418e-05, + "loss": 7.0261, + "step": 66000 + }, + { + "epoch": 4.87, + "learning_rate": 3.415435304517283e-05, + "loss": 6.4373, + "step": 66050 + }, + { + "epoch": 4.88, + "learning_rate": 3.410412753262147e-05, + "loss": 6.3931, + "step": 66100 + }, + { + "epoch": 4.88, + "learning_rate": 3.4053902020070115e-05, + "loss": 7.0678, + "step": 66150 + }, + { + "epoch": 4.88, + "learning_rate": 3.400367650751876e-05, + "loss": 6.9086, + "step": 66200 + }, + { + "epoch": 4.89, + "learning_rate": 3.39534509949674e-05, + "loss": 6.3431, + "step": 66250 + }, + { + "epoch": 4.89, + "learning_rate": 3.3903225482416054e-05, + "loss": 7.298, + "step": 66300 + }, + { + "epoch": 4.89, + "learning_rate": 3.38529999698647e-05, + "loss": 6.4188, + "step": 66350 + }, + { + "epoch": 4.9, + "learning_rate": 3.380277445731334e-05, + "loss": 6.1998, + "step": 66400 + }, + { + "epoch": 4.9, + "learning_rate": 3.375254894476198e-05, + "loss": 6.7306, + "step": 66450 + }, + { + "epoch": 4.91, + "learning_rate": 3.3702323432210624e-05, + "loss": 6.0333, + "step": 66500 + }, + { + "epoch": 4.91, + "learning_rate": 3.365209791965927e-05, + "loss": 6.9375, + "step": 66550 + }, + { + "epoch": 4.91, + "learning_rate": 3.360187240710791e-05, + "loss": 6.4168, + "step": 66600 + }, + { + "epoch": 4.92, + "learning_rate": 3.355164689455656e-05, + "loss": 6.6486, + "step": 66650 + }, + { + "epoch": 4.92, + "learning_rate": 3.3501421382005207e-05, + "loss": 5.6693, + "step": 66700 + }, + { + "epoch": 4.92, + "learning_rate": 3.345119586945385e-05, + "loss": 6.7548, + "step": 66750 + }, + { + "epoch": 4.93, + "learning_rate": 3.3400970356902495e-05, + "loss": 8.7782, + "step": 66800 + }, + { + "epoch": 4.93, + "learning_rate": 3.335074484435114e-05, + "loss": 6.9741, + "step": 66850 + }, + { + "epoch": 4.93, + "learning_rate": 3.330051933179978e-05, + "loss": 6.423, + "step": 66900 + }, + { + "epoch": 4.94, + "learning_rate": 3.325029381924843e-05, + "loss": 6.0077, + "step": 66950 + }, + { + "epoch": 4.94, + "learning_rate": 3.320006830669707e-05, + "loss": 6.4085, + "step": 67000 + }, + { + "epoch": 4.95, + "learning_rate": 3.3149842794145715e-05, + "loss": 6.383, + "step": 67050 + }, + { + "epoch": 4.95, + "learning_rate": 3.309961728159436e-05, + "loss": 5.8758, + "step": 67100 + }, + { + "epoch": 4.95, + "learning_rate": 3.3049391769043e-05, + "loss": 7.1169, + "step": 67150 + }, + { + "epoch": 4.96, + "learning_rate": 3.299916625649165e-05, + "loss": 7.5655, + "step": 67200 + }, + { + "epoch": 4.96, + "learning_rate": 3.294894074394029e-05, + "loss": 6.1727, + "step": 67250 + }, + { + "epoch": 4.96, + "learning_rate": 3.2898715231388935e-05, + "loss": 6.0568, + "step": 67300 + }, + { + "epoch": 4.97, + "learning_rate": 3.2848489718837586e-05, + "loss": 6.9697, + "step": 67350 + }, + { + "epoch": 4.97, + "learning_rate": 3.279826420628623e-05, + "loss": 6.7938, + "step": 67400 + }, + { + "epoch": 4.98, + "learning_rate": 3.2748038693734874e-05, + "loss": 6.3006, + "step": 67450 + }, + { + "epoch": 4.98, + "learning_rate": 3.269781318118352e-05, + "loss": 6.3299, + "step": 67500 + }, + { + "epoch": 4.98, + "learning_rate": 3.264758766863216e-05, + "loss": 6.3615, + "step": 67550 + }, + { + "epoch": 4.99, + "learning_rate": 3.25973621560808e-05, + "loss": 6.1106, + "step": 67600 + }, + { + "epoch": 4.99, + "learning_rate": 3.2547136643529444e-05, + "loss": 5.571, + "step": 67650 + }, + { + "epoch": 4.99, + "learning_rate": 3.2496911130978094e-05, + "loss": 6.5922, + "step": 67700 + }, + { + "epoch": 5.0, + "learning_rate": 3.244668561842674e-05, + "loss": 5.663, + "step": 67750 + }, + { + "epoch": 5.0, + "learning_rate": 3.239646010587538e-05, + "loss": 7.3669, + "step": 67800 + }, + { + "epoch": 5.0, + "learning_rate": 3.2346234593324027e-05, + "loss": 6.015, + "step": 67850 + }, + { + "epoch": 5.01, + "learning_rate": 3.229600908077267e-05, + "loss": 5.8678, + "step": 67900 + }, + { + "epoch": 5.01, + "learning_rate": 3.2245783568221315e-05, + "loss": 5.5537, + "step": 67950 + }, + { + "epoch": 5.02, + "learning_rate": 3.219555805566996e-05, + "loss": 6.175, + "step": 68000 + }, + { + "epoch": 5.02, + "learning_rate": 3.21453325431186e-05, + "loss": 5.9018, + "step": 68050 + }, + { + "epoch": 5.02, + "learning_rate": 3.2095107030567254e-05, + "loss": 6.9064, + "step": 68100 + }, + { + "epoch": 5.03, + "learning_rate": 3.204488151801589e-05, + "loss": 6.8775, + "step": 68150 + }, + { + "epoch": 5.03, + "learning_rate": 3.1994656005464535e-05, + "loss": 5.6397, + "step": 68200 + }, + { + "epoch": 5.03, + "learning_rate": 3.194443049291318e-05, + "loss": 5.815, + "step": 68250 + }, + { + "epoch": 5.04, + "learning_rate": 3.189420498036182e-05, + "loss": 6.0795, + "step": 68300 + }, + { + "epoch": 5.04, + "learning_rate": 3.184397946781047e-05, + "loss": 6.8721, + "step": 68350 + }, + { + "epoch": 5.05, + "learning_rate": 3.179375395525912e-05, + "loss": 6.4936, + "step": 68400 + }, + { + "epoch": 5.05, + "learning_rate": 3.174352844270776e-05, + "loss": 5.7195, + "step": 68450 + }, + { + "epoch": 5.05, + "learning_rate": 3.1693302930156406e-05, + "loss": 5.6897, + "step": 68500 + }, + { + "epoch": 5.06, + "learning_rate": 3.164307741760505e-05, + "loss": 6.2271, + "step": 68550 + }, + { + "epoch": 5.06, + "learning_rate": 3.1592851905053694e-05, + "loss": 6.3731, + "step": 68600 + }, + { + "epoch": 5.06, + "learning_rate": 3.154262639250234e-05, + "loss": 5.8502, + "step": 68650 + }, + { + "epoch": 5.07, + "learning_rate": 3.149240087995098e-05, + "loss": 5.8768, + "step": 68700 + }, + { + "epoch": 5.07, + "learning_rate": 3.1442175367399626e-05, + "loss": 6.4265, + "step": 68750 + }, + { + "epoch": 5.07, + "learning_rate": 3.139194985484827e-05, + "loss": 5.8952, + "step": 68800 + }, + { + "epoch": 5.08, + "learning_rate": 3.1341724342296914e-05, + "loss": 5.3739, + "step": 68850 + }, + { + "epoch": 5.08, + "learning_rate": 3.129149882974556e-05, + "loss": 6.4317, + "step": 68900 + }, + { + "epoch": 5.09, + "learning_rate": 3.12412733171942e-05, + "loss": 5.6296, + "step": 68950 + }, + { + "epoch": 5.09, + "learning_rate": 3.1191047804642847e-05, + "loss": 6.2448, + "step": 69000 + }, + { + "epoch": 5.09, + "learning_rate": 3.114082229209149e-05, + "loss": 6.116, + "step": 69050 + }, + { + "epoch": 5.1, + "learning_rate": 3.1090596779540135e-05, + "loss": 6.4299, + "step": 69100 + }, + { + "epoch": 5.1, + "learning_rate": 3.1040371266988785e-05, + "loss": 6.7337, + "step": 69150 + }, + { + "epoch": 5.1, + "learning_rate": 3.099014575443743e-05, + "loss": 6.6103, + "step": 69200 + }, + { + "epoch": 5.11, + "learning_rate": 3.0939920241886074e-05, + "loss": 5.6155, + "step": 69250 + }, + { + "epoch": 5.11, + "learning_rate": 3.088969472933471e-05, + "loss": 5.7131, + "step": 69300 + }, + { + "epoch": 5.12, + "learning_rate": 3.0839469216783355e-05, + "loss": 5.6799, + "step": 69350 + }, + { + "epoch": 5.12, + "learning_rate": 3.0789243704232e-05, + "loss": 5.9907, + "step": 69400 + }, + { + "epoch": 5.12, + "learning_rate": 3.073901819168064e-05, + "loss": 5.7125, + "step": 69450 + }, + { + "epoch": 5.13, + "learning_rate": 3.0688792679129294e-05, + "loss": 6.3093, + "step": 69500 + }, + { + "epoch": 5.13, + "learning_rate": 3.063856716657794e-05, + "loss": 6.1113, + "step": 69550 + }, + { + "epoch": 5.13, + "learning_rate": 3.058834165402658e-05, + "loss": 5.5845, + "step": 69600 + }, + { + "epoch": 5.14, + "learning_rate": 3.0538116141475226e-05, + "loss": 5.9267, + "step": 69650 + }, + { + "epoch": 5.14, + "learning_rate": 3.048789062892387e-05, + "loss": 6.0062, + "step": 69700 + }, + { + "epoch": 5.14, + "learning_rate": 3.0437665116372514e-05, + "loss": 6.005, + "step": 69750 + }, + { + "epoch": 5.15, + "learning_rate": 3.038743960382116e-05, + "loss": 5.854, + "step": 69800 + }, + { + "epoch": 5.15, + "learning_rate": 3.0337214091269806e-05, + "loss": 6.3468, + "step": 69850 + }, + { + "epoch": 5.16, + "learning_rate": 3.0286988578718446e-05, + "loss": 5.9127, + "step": 69900 + }, + { + "epoch": 5.16, + "learning_rate": 3.023676306616709e-05, + "loss": 6.5192, + "step": 69950 + }, + { + "epoch": 5.16, + "learning_rate": 3.0186537553615734e-05, + "loss": 5.8418, + "step": 70000 + }, + { + "epoch": 5.17, + "learning_rate": 3.013631204106438e-05, + "loss": 6.0775, + "step": 70050 + }, + { + "epoch": 5.17, + "learning_rate": 3.0086086528513026e-05, + "loss": 5.926, + "step": 70100 + }, + { + "epoch": 5.17, + "learning_rate": 3.003586101596167e-05, + "loss": 5.8467, + "step": 70150 + }, + { + "epoch": 5.18, + "learning_rate": 2.9985635503410314e-05, + "loss": 6.441, + "step": 70200 + }, + { + "epoch": 5.18, + "learning_rate": 2.9935409990858958e-05, + "loss": 5.6337, + "step": 70250 + }, + { + "epoch": 5.19, + "learning_rate": 2.9885184478307606e-05, + "loss": 6.3408, + "step": 70300 + }, + { + "epoch": 5.19, + "learning_rate": 2.983495896575625e-05, + "loss": 6.0077, + "step": 70350 + }, + { + "epoch": 5.19, + "learning_rate": 2.9784733453204894e-05, + "loss": 5.7263, + "step": 70400 + }, + { + "epoch": 5.2, + "learning_rate": 2.9734507940653534e-05, + "loss": 6.8161, + "step": 70450 + }, + { + "epoch": 5.2, + "learning_rate": 2.968428242810218e-05, + "loss": 6.4292, + "step": 70500 + }, + { + "epoch": 5.2, + "learning_rate": 2.9634056915550822e-05, + "loss": 6.0751, + "step": 70550 + }, + { + "epoch": 5.21, + "learning_rate": 2.9583831402999466e-05, + "loss": 6.2439, + "step": 70600 + }, + { + "epoch": 5.21, + "learning_rate": 2.9533605890448114e-05, + "loss": 5.6406, + "step": 70650 + }, + { + "epoch": 5.22, + "learning_rate": 2.9483380377896758e-05, + "loss": 5.5623, + "step": 70700 + }, + { + "epoch": 5.22, + "learning_rate": 2.9433154865345402e-05, + "loss": 6.3392, + "step": 70750 + }, + { + "epoch": 5.22, + "learning_rate": 2.9382929352794046e-05, + "loss": 7.3752, + "step": 70800 + }, + { + "epoch": 5.23, + "learning_rate": 2.9332703840242693e-05, + "loss": 6.2126, + "step": 70850 + }, + { + "epoch": 5.23, + "learning_rate": 2.9282478327691338e-05, + "loss": 5.3583, + "step": 70900 + }, + { + "epoch": 5.23, + "learning_rate": 2.923225281513998e-05, + "loss": 5.4659, + "step": 70950 + }, + { + "epoch": 5.24, + "learning_rate": 2.9182027302588626e-05, + "loss": 6.1876, + "step": 71000 + }, + { + "epoch": 5.24, + "learning_rate": 2.9131801790037266e-05, + "loss": 5.8878, + "step": 71050 + }, + { + "epoch": 5.24, + "learning_rate": 2.908157627748591e-05, + "loss": 6.2974, + "step": 71100 + }, + { + "epoch": 5.25, + "learning_rate": 2.9031350764934558e-05, + "loss": 6.348, + "step": 71150 + }, + { + "epoch": 5.25, + "learning_rate": 2.8981125252383202e-05, + "loss": 5.9929, + "step": 71200 + }, + { + "epoch": 5.26, + "learning_rate": 2.8930899739831846e-05, + "loss": 5.9609, + "step": 71250 + }, + { + "epoch": 5.26, + "learning_rate": 2.888067422728049e-05, + "loss": 5.4301, + "step": 71300 + }, + { + "epoch": 5.26, + "learning_rate": 2.8830448714729137e-05, + "loss": 5.4559, + "step": 71350 + }, + { + "epoch": 5.27, + "learning_rate": 2.878022320217778e-05, + "loss": 7.1806, + "step": 71400 + }, + { + "epoch": 5.27, + "learning_rate": 2.8729997689626426e-05, + "loss": 5.6962, + "step": 71450 + }, + { + "epoch": 5.27, + "learning_rate": 2.867977217707507e-05, + "loss": 5.2751, + "step": 71500 + }, + { + "epoch": 5.28, + "learning_rate": 2.8629546664523717e-05, + "loss": 5.8732, + "step": 71550 + }, + { + "epoch": 5.28, + "learning_rate": 2.8579321151972354e-05, + "loss": 5.3111, + "step": 71600 + }, + { + "epoch": 5.29, + "learning_rate": 2.8529095639421e-05, + "loss": 6.269, + "step": 71650 + }, + { + "epoch": 5.29, + "learning_rate": 2.8478870126869646e-05, + "loss": 4.7494, + "step": 71700 + }, + { + "epoch": 5.29, + "learning_rate": 2.842864461431829e-05, + "loss": 6.2853, + "step": 71750 + }, + { + "epoch": 5.3, + "learning_rate": 2.8378419101766934e-05, + "loss": 6.7802, + "step": 71800 + }, + { + "epoch": 5.3, + "learning_rate": 2.8328193589215578e-05, + "loss": 7.7665, + "step": 71850 + }, + { + "epoch": 5.3, + "learning_rate": 2.8277968076664225e-05, + "loss": 5.7143, + "step": 71900 + }, + { + "epoch": 5.31, + "learning_rate": 2.822774256411287e-05, + "loss": 6.1485, + "step": 71950 + }, + { + "epoch": 5.31, + "learning_rate": 2.8177517051561514e-05, + "loss": 5.9611, + "step": 72000 + }, + { + "epoch": 5.31, + "learning_rate": 2.8127291539010158e-05, + "loss": 6.9089, + "step": 72050 + }, + { + "epoch": 5.32, + "learning_rate": 2.8077066026458805e-05, + "loss": 5.3067, + "step": 72100 + }, + { + "epoch": 5.32, + "learning_rate": 2.8026840513907442e-05, + "loss": 5.7255, + "step": 72150 + }, + { + "epoch": 5.33, + "learning_rate": 2.7976615001356086e-05, + "loss": 6.6141, + "step": 72200 + }, + { + "epoch": 5.33, + "learning_rate": 2.7926389488804734e-05, + "loss": 5.1587, + "step": 72250 + }, + { + "epoch": 5.33, + "learning_rate": 2.7876163976253378e-05, + "loss": 6.2063, + "step": 72300 + }, + { + "epoch": 5.34, + "learning_rate": 2.7825938463702022e-05, + "loss": 5.9984, + "step": 72350 + }, + { + "epoch": 5.34, + "learning_rate": 2.7775712951150666e-05, + "loss": 6.2301, + "step": 72400 + }, + { + "epoch": 5.34, + "learning_rate": 2.7725487438599313e-05, + "loss": 6.0619, + "step": 72450 + }, + { + "epoch": 5.35, + "learning_rate": 2.7675261926047957e-05, + "loss": 7.1879, + "step": 72500 + }, + { + "epoch": 5.35, + "learning_rate": 2.76250364134966e-05, + "loss": 6.8024, + "step": 72550 + }, + { + "epoch": 5.36, + "learning_rate": 2.7574810900945246e-05, + "loss": 7.412, + "step": 72600 + }, + { + "epoch": 5.36, + "learning_rate": 2.7524585388393893e-05, + "loss": 6.172, + "step": 72650 + }, + { + "epoch": 5.36, + "learning_rate": 2.7474359875842537e-05, + "loss": 5.9536, + "step": 72700 + }, + { + "epoch": 5.37, + "learning_rate": 2.7424134363291178e-05, + "loss": 6.4215, + "step": 72750 + }, + { + "epoch": 5.37, + "learning_rate": 2.7373908850739822e-05, + "loss": 5.6326, + "step": 72800 + }, + { + "epoch": 5.37, + "learning_rate": 2.7323683338188466e-05, + "loss": 5.8943, + "step": 72850 + }, + { + "epoch": 5.38, + "learning_rate": 2.727345782563711e-05, + "loss": 6.8689, + "step": 72900 + }, + { + "epoch": 5.38, + "learning_rate": 2.7223232313085757e-05, + "loss": 6.2079, + "step": 72950 + }, + { + "epoch": 5.38, + "learning_rate": 2.71730068005344e-05, + "loss": 6.4607, + "step": 73000 + }, + { + "epoch": 5.39, + "learning_rate": 2.7122781287983045e-05, + "loss": 6.0781, + "step": 73050 + }, + { + "epoch": 5.39, + "learning_rate": 2.707255577543169e-05, + "loss": 5.7624, + "step": 73100 + }, + { + "epoch": 5.4, + "learning_rate": 2.7022330262880337e-05, + "loss": 6.0385, + "step": 73150 + }, + { + "epoch": 5.4, + "learning_rate": 2.697210475032898e-05, + "loss": 5.9751, + "step": 73200 + }, + { + "epoch": 5.4, + "learning_rate": 2.6921879237777625e-05, + "loss": 6.3938, + "step": 73250 + }, + { + "epoch": 5.41, + "learning_rate": 2.6871653725226266e-05, + "loss": 5.9229, + "step": 73300 + }, + { + "epoch": 5.41, + "learning_rate": 2.682142821267491e-05, + "loss": 6.0674, + "step": 73350 + }, + { + "epoch": 5.41, + "learning_rate": 2.6771202700123554e-05, + "loss": 6.7223, + "step": 73400 + }, + { + "epoch": 5.42, + "learning_rate": 2.6720977187572198e-05, + "loss": 5.7889, + "step": 73450 + }, + { + "epoch": 5.42, + "learning_rate": 2.6670751675020845e-05, + "loss": 7.1486, + "step": 73500 + }, + { + "epoch": 5.43, + "learning_rate": 2.662052616246949e-05, + "loss": 6.1844, + "step": 73550 + }, + { + "epoch": 5.43, + "learning_rate": 2.6570300649918133e-05, + "loss": 6.198, + "step": 73600 + }, + { + "epoch": 5.43, + "learning_rate": 2.6520075137366777e-05, + "loss": 6.6778, + "step": 73650 + }, + { + "epoch": 5.44, + "learning_rate": 2.6469849624815425e-05, + "loss": 5.9788, + "step": 73700 + }, + { + "epoch": 5.44, + "learning_rate": 2.641962411226407e-05, + "loss": 6.3568, + "step": 73750 + }, + { + "epoch": 5.44, + "learning_rate": 2.6369398599712713e-05, + "loss": 5.9383, + "step": 73800 + }, + { + "epoch": 5.45, + "learning_rate": 2.6319173087161357e-05, + "loss": 6.4832, + "step": 73850 + }, + { + "epoch": 5.45, + "learning_rate": 2.6268947574609998e-05, + "loss": 5.883, + "step": 73900 + }, + { + "epoch": 5.45, + "learning_rate": 2.6218722062058642e-05, + "loss": 6.213, + "step": 73950 + }, + { + "epoch": 5.46, + "learning_rate": 2.616849654950729e-05, + "loss": 6.5404, + "step": 74000 + }, + { + "epoch": 5.46, + "learning_rate": 2.6118271036955933e-05, + "loss": 6.1246, + "step": 74050 + }, + { + "epoch": 5.47, + "learning_rate": 2.6068045524404577e-05, + "loss": 6.0739, + "step": 74100 + }, + { + "epoch": 5.47, + "learning_rate": 2.601782001185322e-05, + "loss": 6.2085, + "step": 74150 + }, + { + "epoch": 5.47, + "learning_rate": 2.596759449930187e-05, + "loss": 6.7059, + "step": 74200 + }, + { + "epoch": 5.48, + "learning_rate": 2.5917368986750513e-05, + "loss": 6.231, + "step": 74250 + }, + { + "epoch": 5.48, + "learning_rate": 2.5867143474199157e-05, + "loss": 6.1287, + "step": 74300 + }, + { + "epoch": 5.48, + "learning_rate": 2.58169179616478e-05, + "loss": 6.0583, + "step": 74350 + }, + { + "epoch": 5.49, + "learning_rate": 2.576669244909645e-05, + "loss": 6.1552, + "step": 74400 + }, + { + "epoch": 5.49, + "learning_rate": 2.5716466936545086e-05, + "loss": 6.4191, + "step": 74450 + }, + { + "epoch": 5.5, + "learning_rate": 2.566624142399373e-05, + "loss": 7.2899, + "step": 74500 + }, + { + "epoch": 5.5, + "learning_rate": 2.5616015911442377e-05, + "loss": 6.3234, + "step": 74550 + }, + { + "epoch": 5.5, + "learning_rate": 2.556579039889102e-05, + "loss": 5.6938, + "step": 74600 + }, + { + "epoch": 5.51, + "learning_rate": 2.5515564886339665e-05, + "loss": 5.1418, + "step": 74650 + }, + { + "epoch": 5.51, + "learning_rate": 2.546533937378831e-05, + "loss": 5.7747, + "step": 74700 + }, + { + "epoch": 5.51, + "learning_rate": 2.5415113861236957e-05, + "loss": 5.7191, + "step": 74750 + }, + { + "epoch": 5.52, + "learning_rate": 2.53648883486856e-05, + "loss": 6.0435, + "step": 74800 + }, + { + "epoch": 5.52, + "learning_rate": 2.5314662836134245e-05, + "loss": 6.1592, + "step": 74850 + }, + { + "epoch": 5.52, + "learning_rate": 2.526443732358289e-05, + "loss": 6.1625, + "step": 74900 + }, + { + "epoch": 5.53, + "learning_rate": 2.5214211811031536e-05, + "loss": 4.7958, + "step": 74950 + }, + { + "epoch": 5.53, + "learning_rate": 2.516398629848018e-05, + "loss": 6.0618, + "step": 75000 + }, + { + "epoch": 5.54, + "learning_rate": 2.5113760785928818e-05, + "loss": 5.754, + "step": 75050 + }, + { + "epoch": 5.54, + "learning_rate": 2.5063535273377465e-05, + "loss": 5.6346, + "step": 75100 + }, + { + "epoch": 5.54, + "learning_rate": 2.501330976082611e-05, + "loss": 5.6922, + "step": 75150 + }, + { + "epoch": 5.55, + "learning_rate": 2.4963084248274753e-05, + "loss": 6.6754, + "step": 75200 + }, + { + "epoch": 5.55, + "learning_rate": 2.4912858735723397e-05, + "loss": 5.2828, + "step": 75250 + }, + { + "epoch": 5.55, + "learning_rate": 2.4862633223172045e-05, + "loss": 5.799, + "step": 75300 + }, + { + "epoch": 5.56, + "learning_rate": 2.481240771062069e-05, + "loss": 5.8229, + "step": 75350 + }, + { + "epoch": 5.56, + "learning_rate": 2.4762182198069333e-05, + "loss": 5.1759, + "step": 75400 + }, + { + "epoch": 5.57, + "learning_rate": 2.4711956685517977e-05, + "loss": 5.9411, + "step": 75450 + }, + { + "epoch": 5.57, + "learning_rate": 2.466173117296662e-05, + "loss": 5.4522, + "step": 75500 + }, + { + "epoch": 5.57, + "learning_rate": 2.4611505660415265e-05, + "loss": 6.0731, + "step": 75550 + }, + { + "epoch": 5.58, + "learning_rate": 2.456128014786391e-05, + "loss": 5.9288, + "step": 75600 + }, + { + "epoch": 5.58, + "learning_rate": 2.4511054635312557e-05, + "loss": 5.7434, + "step": 75650 + }, + { + "epoch": 5.58, + "learning_rate": 2.44608291227612e-05, + "loss": 5.5638, + "step": 75700 + }, + { + "epoch": 5.59, + "learning_rate": 2.441060361020984e-05, + "loss": 6.423, + "step": 75750 + }, + { + "epoch": 5.59, + "learning_rate": 2.436037809765849e-05, + "loss": 5.4612, + "step": 75800 + }, + { + "epoch": 5.59, + "learning_rate": 2.4310152585107133e-05, + "loss": 7.1213, + "step": 75850 + }, + { + "epoch": 5.6, + "learning_rate": 2.4259927072555777e-05, + "loss": 6.366, + "step": 75900 + }, + { + "epoch": 5.6, + "learning_rate": 2.420970156000442e-05, + "loss": 5.8278, + "step": 75950 + }, + { + "epoch": 5.61, + "learning_rate": 2.4159476047453065e-05, + "loss": 6.1465, + "step": 76000 + }, + { + "epoch": 5.61, + "learning_rate": 2.410925053490171e-05, + "loss": 5.7868, + "step": 76050 + }, + { + "epoch": 5.61, + "learning_rate": 2.4059025022350353e-05, + "loss": 5.8116, + "step": 76100 + }, + { + "epoch": 5.62, + "learning_rate": 2.4008799509798997e-05, + "loss": 5.7459, + "step": 76150 + }, + { + "epoch": 5.62, + "learning_rate": 2.3958573997247645e-05, + "loss": 6.2053, + "step": 76200 + }, + { + "epoch": 5.62, + "learning_rate": 2.390834848469629e-05, + "loss": 5.768, + "step": 76250 + }, + { + "epoch": 5.63, + "learning_rate": 2.385812297214493e-05, + "loss": 5.9021, + "step": 76300 + }, + { + "epoch": 5.63, + "learning_rate": 2.3807897459593577e-05, + "loss": 6.2206, + "step": 76350 + }, + { + "epoch": 5.64, + "learning_rate": 2.375767194704222e-05, + "loss": 6.3088, + "step": 76400 + }, + { + "epoch": 5.64, + "learning_rate": 2.3707446434490865e-05, + "loss": 6.0105, + "step": 76450 + }, + { + "epoch": 5.64, + "learning_rate": 2.365722092193951e-05, + "loss": 5.738, + "step": 76500 + }, + { + "epoch": 5.65, + "learning_rate": 2.3606995409388156e-05, + "loss": 6.2838, + "step": 76550 + }, + { + "epoch": 5.65, + "learning_rate": 2.3556769896836797e-05, + "loss": 5.7041, + "step": 76600 + }, + { + "epoch": 5.65, + "learning_rate": 2.350654438428544e-05, + "loss": 6.7796, + "step": 76650 + }, + { + "epoch": 5.66, + "learning_rate": 2.345631887173409e-05, + "loss": 6.4432, + "step": 76700 + }, + { + "epoch": 5.66, + "learning_rate": 2.3406093359182733e-05, + "loss": 5.5805, + "step": 76750 + }, + { + "epoch": 5.66, + "learning_rate": 2.3355867846631377e-05, + "loss": 5.4049, + "step": 76800 + }, + { + "epoch": 5.67, + "learning_rate": 2.330564233408002e-05, + "loss": 5.0643, + "step": 76850 + }, + { + "epoch": 5.67, + "learning_rate": 2.3255416821528665e-05, + "loss": 5.4007, + "step": 76900 + }, + { + "epoch": 5.68, + "learning_rate": 2.320519130897731e-05, + "loss": 5.3506, + "step": 76950 + }, + { + "epoch": 5.68, + "learning_rate": 2.3154965796425953e-05, + "loss": 6.5889, + "step": 77000 + }, + { + "epoch": 5.68, + "learning_rate": 2.31047402838746e-05, + "loss": 6.206, + "step": 77050 + }, + { + "epoch": 5.69, + "learning_rate": 2.3054514771323244e-05, + "loss": 6.03, + "step": 77100 + }, + { + "epoch": 5.69, + "learning_rate": 2.3004289258771885e-05, + "loss": 5.6658, + "step": 77150 + }, + { + "epoch": 5.69, + "learning_rate": 2.295406374622053e-05, + "loss": 6.5901, + "step": 77200 + }, + { + "epoch": 5.7, + "learning_rate": 2.2903838233669176e-05, + "loss": 6.3019, + "step": 77250 + }, + { + "epoch": 5.7, + "learning_rate": 2.285361272111782e-05, + "loss": 5.5744, + "step": 77300 + }, + { + "epoch": 5.71, + "learning_rate": 2.2803387208566465e-05, + "loss": 5.8269, + "step": 77350 + }, + { + "epoch": 5.71, + "learning_rate": 2.275316169601511e-05, + "loss": 6.1005, + "step": 77400 + }, + { + "epoch": 5.71, + "learning_rate": 2.2702936183463753e-05, + "loss": 6.0196, + "step": 77450 + }, + { + "epoch": 5.72, + "learning_rate": 2.2652710670912397e-05, + "loss": 5.8475, + "step": 77500 + }, + { + "epoch": 5.72, + "learning_rate": 2.260248515836104e-05, + "loss": 6.4338, + "step": 77550 + }, + { + "epoch": 5.72, + "learning_rate": 2.2552259645809688e-05, + "loss": 4.958, + "step": 77600 + }, + { + "epoch": 5.73, + "learning_rate": 2.2502034133258332e-05, + "loss": 6.4737, + "step": 77650 + }, + { + "epoch": 5.73, + "learning_rate": 2.2451808620706976e-05, + "loss": 6.3223, + "step": 77700 + }, + { + "epoch": 5.74, + "learning_rate": 2.240158310815562e-05, + "loss": 7.171, + "step": 77750 + }, + { + "epoch": 5.74, + "learning_rate": 2.2351357595604264e-05, + "loss": 6.5725, + "step": 77800 + }, + { + "epoch": 5.74, + "learning_rate": 2.230113208305291e-05, + "loss": 5.7644, + "step": 77850 + }, + { + "epoch": 5.75, + "learning_rate": 2.2250906570501553e-05, + "loss": 5.6257, + "step": 77900 + }, + { + "epoch": 5.75, + "learning_rate": 2.22006810579502e-05, + "loss": 6.2325, + "step": 77950 + }, + { + "epoch": 5.75, + "learning_rate": 2.215045554539884e-05, + "loss": 6.7106, + "step": 78000 + }, + { + "epoch": 5.76, + "learning_rate": 2.2100230032847485e-05, + "loss": 5.0113, + "step": 78050 + }, + { + "epoch": 5.76, + "learning_rate": 2.205000452029613e-05, + "loss": 6.1309, + "step": 78100 + }, + { + "epoch": 5.76, + "learning_rate": 2.1999779007744776e-05, + "loss": 5.5098, + "step": 78150 + }, + { + "epoch": 5.77, + "learning_rate": 2.194955349519342e-05, + "loss": 6.6709, + "step": 78200 + }, + { + "epoch": 5.77, + "learning_rate": 2.1899327982642064e-05, + "loss": 5.8039, + "step": 78250 + }, + { + "epoch": 5.78, + "learning_rate": 2.184910247009071e-05, + "loss": 5.0271, + "step": 78300 + }, + { + "epoch": 5.78, + "learning_rate": 2.1798876957539352e-05, + "loss": 6.1648, + "step": 78350 + }, + { + "epoch": 5.78, + "learning_rate": 2.1748651444987996e-05, + "loss": 5.1838, + "step": 78400 + }, + { + "epoch": 5.79, + "learning_rate": 2.169842593243664e-05, + "loss": 7.1149, + "step": 78450 + }, + { + "epoch": 5.79, + "learning_rate": 2.1648200419885288e-05, + "loss": 5.9544, + "step": 78500 + }, + { + "epoch": 5.79, + "learning_rate": 2.1597974907333932e-05, + "loss": 6.4747, + "step": 78550 + }, + { + "epoch": 5.8, + "learning_rate": 2.1547749394782573e-05, + "loss": 5.6367, + "step": 78600 + }, + { + "epoch": 5.8, + "learning_rate": 2.149752388223122e-05, + "loss": 5.8395, + "step": 78650 + }, + { + "epoch": 5.81, + "learning_rate": 2.1447298369679864e-05, + "loss": 6.8058, + "step": 78700 + }, + { + "epoch": 5.81, + "learning_rate": 2.1397072857128508e-05, + "loss": 6.4977, + "step": 78750 + }, + { + "epoch": 5.81, + "learning_rate": 2.1346847344577152e-05, + "loss": 7.0943, + "step": 78800 + }, + { + "epoch": 5.82, + "learning_rate": 2.12966218320258e-05, + "loss": 6.0009, + "step": 78850 + }, + { + "epoch": 5.82, + "learning_rate": 2.124639631947444e-05, + "loss": 5.8074, + "step": 78900 + }, + { + "epoch": 5.82, + "learning_rate": 2.1196170806923084e-05, + "loss": 6.277, + "step": 78950 + }, + { + "epoch": 5.83, + "learning_rate": 2.114594529437173e-05, + "loss": 5.61, + "step": 79000 + }, + { + "epoch": 5.83, + "learning_rate": 2.1095719781820376e-05, + "loss": 5.6585, + "step": 79050 + }, + { + "epoch": 5.83, + "learning_rate": 2.104549426926902e-05, + "loss": 4.9836, + "step": 79100 + }, + { + "epoch": 5.84, + "learning_rate": 2.099526875671766e-05, + "loss": 6.1327, + "step": 79150 + }, + { + "epoch": 5.84, + "learning_rate": 2.0945043244166308e-05, + "loss": 6.2281, + "step": 79200 + }, + { + "epoch": 5.85, + "learning_rate": 2.0894817731614952e-05, + "loss": 5.9593, + "step": 79250 + }, + { + "epoch": 5.85, + "learning_rate": 2.0844592219063596e-05, + "loss": 5.1415, + "step": 79300 + }, + { + "epoch": 5.85, + "learning_rate": 2.079436670651224e-05, + "loss": 5.719, + "step": 79350 + }, + { + "epoch": 5.86, + "learning_rate": 2.0744141193960888e-05, + "loss": 6.1617, + "step": 79400 + }, + { + "epoch": 5.86, + "learning_rate": 2.069391568140953e-05, + "loss": 6.3103, + "step": 79450 + }, + { + "epoch": 5.86, + "learning_rate": 2.0643690168858172e-05, + "loss": 5.2091, + "step": 79500 + }, + { + "epoch": 5.87, + "learning_rate": 2.059346465630682e-05, + "loss": 6.1573, + "step": 79550 + }, + { + "epoch": 5.87, + "learning_rate": 2.0543239143755464e-05, + "loss": 6.3729, + "step": 79600 + }, + { + "epoch": 5.88, + "learning_rate": 2.0493013631204108e-05, + "loss": 6.1918, + "step": 79650 + }, + { + "epoch": 5.88, + "learning_rate": 2.0442788118652752e-05, + "loss": 6.7536, + "step": 79700 + }, + { + "epoch": 5.88, + "learning_rate": 2.0392562606101396e-05, + "loss": 5.7906, + "step": 79750 + }, + { + "epoch": 5.89, + "learning_rate": 2.034233709355004e-05, + "loss": 6.3394, + "step": 79800 + }, + { + "epoch": 5.89, + "learning_rate": 2.0292111580998684e-05, + "loss": 5.8182, + "step": 79850 + }, + { + "epoch": 5.89, + "learning_rate": 2.0241886068447328e-05, + "loss": 5.8381, + "step": 79900 + }, + { + "epoch": 5.9, + "learning_rate": 2.0191660555895976e-05, + "loss": 6.4559, + "step": 79950 + }, + { + "epoch": 5.9, + "learning_rate": 2.0141435043344616e-05, + "loss": 5.5812, + "step": 80000 + }, + { + "epoch": 5.9, + "eval_loss": 7.74003791809082, + "eval_runtime": 967.3759, + "eval_samples_per_second": 13.54, + "eval_steps_per_second": 3.385, + "eval_wer": 0.19946663306874973, + "step": 80000 + }, + { + "epoch": 5.9, + "learning_rate": 2.009120953079326e-05, + "loss": 5.7727, + "step": 80050 + }, + { + "epoch": 5.91, + "learning_rate": 2.0040984018241908e-05, + "loss": 5.5779, + "step": 80100 + }, + { + "epoch": 5.91, + "learning_rate": 1.9990758505690552e-05, + "loss": 5.9231, + "step": 80150 + }, + { + "epoch": 5.92, + "learning_rate": 1.9940532993139196e-05, + "loss": 5.428, + "step": 80200 + }, + { + "epoch": 5.92, + "learning_rate": 1.989030748058784e-05, + "loss": 5.4097, + "step": 80250 + }, + { + "epoch": 5.92, + "learning_rate": 1.9840081968036484e-05, + "loss": 6.2016, + "step": 80300 + }, + { + "epoch": 5.93, + "learning_rate": 1.9789856455485128e-05, + "loss": 6.3442, + "step": 80350 + }, + { + "epoch": 5.93, + "learning_rate": 1.9739630942933772e-05, + "loss": 5.775, + "step": 80400 + }, + { + "epoch": 5.93, + "learning_rate": 1.968940543038242e-05, + "loss": 6.2853, + "step": 80450 + }, + { + "epoch": 5.94, + "learning_rate": 1.9639179917831064e-05, + "loss": 6.1317, + "step": 80500 + }, + { + "epoch": 5.94, + "learning_rate": 1.9588954405279708e-05, + "loss": 6.1315, + "step": 80550 + }, + { + "epoch": 5.95, + "learning_rate": 1.9538728892728352e-05, + "loss": 5.6609, + "step": 80600 + }, + { + "epoch": 5.95, + "learning_rate": 1.9488503380176996e-05, + "loss": 5.2825, + "step": 80650 + }, + { + "epoch": 5.95, + "learning_rate": 1.943827786762564e-05, + "loss": 6.114, + "step": 80700 + }, + { + "epoch": 5.96, + "learning_rate": 1.9388052355074284e-05, + "loss": 5.5919, + "step": 80750 + }, + { + "epoch": 5.96, + "learning_rate": 1.933782684252293e-05, + "loss": 5.7829, + "step": 80800 + }, + { + "epoch": 5.96, + "learning_rate": 1.9287601329971575e-05, + "loss": 5.2979, + "step": 80850 + }, + { + "epoch": 5.97, + "learning_rate": 1.9237375817420216e-05, + "loss": 6.2099, + "step": 80900 + }, + { + "epoch": 5.97, + "learning_rate": 1.918715030486886e-05, + "loss": 6.3945, + "step": 80950 + }, + { + "epoch": 5.97, + "learning_rate": 1.9136924792317508e-05, + "loss": 6.1131, + "step": 81000 + }, + { + "epoch": 5.98, + "learning_rate": 1.908669927976615e-05, + "loss": 5.7025, + "step": 81050 + }, + { + "epoch": 5.98, + "learning_rate": 1.9036473767214796e-05, + "loss": 6.2167, + "step": 81100 + }, + { + "epoch": 5.99, + "learning_rate": 1.898624825466344e-05, + "loss": 4.7241, + "step": 81150 + }, + { + "epoch": 5.99, + "learning_rate": 1.8936022742112084e-05, + "loss": 5.4894, + "step": 81200 + }, + { + "epoch": 5.99, + "learning_rate": 1.8885797229560728e-05, + "loss": 5.5749, + "step": 81250 + }, + { + "epoch": 6.0, + "learning_rate": 1.8835571717009372e-05, + "loss": 6.286, + "step": 81300 + }, + { + "epoch": 6.0, + "learning_rate": 1.878534620445802e-05, + "loss": 5.5281, + "step": 81350 + }, + { + "epoch": 6.0, + "learning_rate": 1.8735120691906663e-05, + "loss": 5.5189, + "step": 81400 + }, + { + "epoch": 6.01, + "learning_rate": 1.8684895179355304e-05, + "loss": 5.5588, + "step": 81450 + }, + { + "epoch": 6.01, + "learning_rate": 1.863466966680395e-05, + "loss": 4.9543, + "step": 81500 + }, + { + "epoch": 6.02, + "learning_rate": 1.8584444154252596e-05, + "loss": 6.1674, + "step": 81550 + }, + { + "epoch": 6.02, + "learning_rate": 1.853421864170124e-05, + "loss": 5.6677, + "step": 81600 + }, + { + "epoch": 6.02, + "learning_rate": 1.8483993129149884e-05, + "loss": 6.0838, + "step": 81650 + }, + { + "epoch": 6.03, + "learning_rate": 1.843376761659853e-05, + "loss": 5.4898, + "step": 81700 + }, + { + "epoch": 6.03, + "learning_rate": 1.8383542104047172e-05, + "loss": 5.2513, + "step": 81750 + }, + { + "epoch": 6.03, + "learning_rate": 1.8333316591495816e-05, + "loss": 5.2589, + "step": 81800 + }, + { + "epoch": 6.04, + "learning_rate": 1.828309107894446e-05, + "loss": 5.8669, + "step": 81850 + }, + { + "epoch": 6.04, + "learning_rate": 1.8232865566393107e-05, + "loss": 5.1666, + "step": 81900 + }, + { + "epoch": 6.04, + "learning_rate": 1.818264005384175e-05, + "loss": 6.0622, + "step": 81950 + }, + { + "epoch": 6.05, + "learning_rate": 1.8132414541290392e-05, + "loss": 5.9538, + "step": 82000 + }, + { + "epoch": 6.05, + "learning_rate": 1.808218902873904e-05, + "loss": 5.3682, + "step": 82050 + }, + { + "epoch": 6.06, + "learning_rate": 1.8031963516187684e-05, + "loss": 6.2593, + "step": 82100 + }, + { + "epoch": 6.06, + "learning_rate": 1.7981738003636328e-05, + "loss": 5.7516, + "step": 82150 + }, + { + "epoch": 6.06, + "learning_rate": 1.793151249108497e-05, + "loss": 5.034, + "step": 82200 + }, + { + "epoch": 6.07, + "learning_rate": 1.788128697853362e-05, + "loss": 5.9257, + "step": 82250 + }, + { + "epoch": 6.07, + "learning_rate": 1.783106146598226e-05, + "loss": 6.0995, + "step": 82300 + }, + { + "epoch": 6.07, + "learning_rate": 1.7780835953430904e-05, + "loss": 5.6513, + "step": 82350 + }, + { + "epoch": 6.08, + "learning_rate": 1.773061044087955e-05, + "loss": 5.4488, + "step": 82400 + }, + { + "epoch": 6.08, + "learning_rate": 1.7680384928328195e-05, + "loss": 5.9832, + "step": 82450 + }, + { + "epoch": 6.09, + "learning_rate": 1.763015941577684e-05, + "loss": 5.3573, + "step": 82500 + }, + { + "epoch": 6.09, + "learning_rate": 1.7579933903225483e-05, + "loss": 5.7321, + "step": 82550 + }, + { + "epoch": 6.09, + "learning_rate": 1.7529708390674127e-05, + "loss": 6.0602, + "step": 82600 + }, + { + "epoch": 6.1, + "learning_rate": 1.747948287812277e-05, + "loss": 5.6744, + "step": 82650 + }, + { + "epoch": 6.1, + "learning_rate": 1.7429257365571416e-05, + "loss": 4.8128, + "step": 82700 + }, + { + "epoch": 6.1, + "learning_rate": 1.737903185302006e-05, + "loss": 5.271, + "step": 82750 + }, + { + "epoch": 6.11, + "learning_rate": 1.7328806340468707e-05, + "loss": 6.0612, + "step": 82800 + }, + { + "epoch": 6.11, + "learning_rate": 1.727858082791735e-05, + "loss": 6.1573, + "step": 82850 + }, + { + "epoch": 6.11, + "learning_rate": 1.7228355315365992e-05, + "loss": 6.6811, + "step": 82900 + }, + { + "epoch": 6.12, + "learning_rate": 1.717812980281464e-05, + "loss": 5.7212, + "step": 82950 + }, + { + "epoch": 6.12, + "learning_rate": 1.7127904290263283e-05, + "loss": 5.7033, + "step": 83000 + }, + { + "epoch": 6.13, + "learning_rate": 1.7077678777711927e-05, + "loss": 5.6656, + "step": 83050 + }, + { + "epoch": 6.13, + "learning_rate": 1.702745326516057e-05, + "loss": 5.6811, + "step": 83100 + }, + { + "epoch": 6.13, + "learning_rate": 1.6977227752609215e-05, + "loss": 5.8367, + "step": 83150 + }, + { + "epoch": 6.14, + "learning_rate": 1.692700224005786e-05, + "loss": 5.2576, + "step": 83200 + }, + { + "epoch": 6.14, + "learning_rate": 1.6876776727506504e-05, + "loss": 5.7272, + "step": 83250 + }, + { + "epoch": 6.14, + "learning_rate": 1.682655121495515e-05, + "loss": 5.1136, + "step": 83300 + }, + { + "epoch": 6.15, + "learning_rate": 1.6776325702403795e-05, + "loss": 5.7522, + "step": 83350 + }, + { + "epoch": 6.15, + "learning_rate": 1.672610018985244e-05, + "loss": 5.89, + "step": 83400 + }, + { + "epoch": 6.16, + "learning_rate": 1.6675874677301083e-05, + "loss": 6.5674, + "step": 83450 + }, + { + "epoch": 6.16, + "learning_rate": 1.6625649164749727e-05, + "loss": 5.241, + "step": 83500 + }, + { + "epoch": 6.16, + "learning_rate": 1.657542365219837e-05, + "loss": 6.0398, + "step": 83550 + }, + { + "epoch": 6.17, + "learning_rate": 1.6525198139647015e-05, + "loss": 5.3322, + "step": 83600 + }, + { + "epoch": 6.17, + "learning_rate": 1.6474972627095663e-05, + "loss": 6.3029, + "step": 83650 + }, + { + "epoch": 6.17, + "learning_rate": 1.6424747114544307e-05, + "loss": 5.1723, + "step": 83700 + }, + { + "epoch": 6.18, + "learning_rate": 1.6374521601992947e-05, + "loss": 5.5713, + "step": 83750 + }, + { + "epoch": 6.18, + "learning_rate": 1.632429608944159e-05, + "loss": 6.0942, + "step": 83800 + }, + { + "epoch": 6.18, + "learning_rate": 1.627407057689024e-05, + "loss": 5.8515, + "step": 83850 + }, + { + "epoch": 6.19, + "learning_rate": 1.6223845064338883e-05, + "loss": 6.5971, + "step": 83900 + }, + { + "epoch": 6.19, + "learning_rate": 1.6173619551787527e-05, + "loss": 5.87, + "step": 83950 + }, + { + "epoch": 6.2, + "learning_rate": 1.612339403923617e-05, + "loss": 5.1113, + "step": 84000 + }, + { + "epoch": 6.2, + "learning_rate": 1.6073168526684815e-05, + "loss": 5.6073, + "step": 84050 + }, + { + "epoch": 6.2, + "learning_rate": 1.602294301413346e-05, + "loss": 4.9219, + "step": 84100 + }, + { + "epoch": 6.21, + "learning_rate": 1.5972717501582103e-05, + "loss": 5.2715, + "step": 84150 + }, + { + "epoch": 6.21, + "learning_rate": 1.592249198903075e-05, + "loss": 5.2142, + "step": 84200 + }, + { + "epoch": 6.21, + "learning_rate": 1.5872266476479395e-05, + "loss": 5.7238, + "step": 84250 + }, + { + "epoch": 6.22, + "learning_rate": 1.5822040963928035e-05, + "loss": 5.2094, + "step": 84300 + }, + { + "epoch": 6.22, + "learning_rate": 1.5771815451376683e-05, + "loss": 5.6301, + "step": 84350 + }, + { + "epoch": 6.23, + "learning_rate": 1.5721589938825327e-05, + "loss": 6.3736, + "step": 84400 + }, + { + "epoch": 6.23, + "learning_rate": 1.567136442627397e-05, + "loss": 5.3024, + "step": 84450 + }, + { + "epoch": 6.23, + "learning_rate": 1.5621138913722615e-05, + "loss": 5.251, + "step": 84500 + }, + { + "epoch": 6.24, + "learning_rate": 1.5570913401171263e-05, + "loss": 5.0983, + "step": 84550 + }, + { + "epoch": 6.24, + "learning_rate": 1.5520687888619903e-05, + "loss": 5.3273, + "step": 84600 + }, + { + "epoch": 6.24, + "learning_rate": 1.5470462376068547e-05, + "loss": 5.0034, + "step": 84650 + }, + { + "epoch": 6.25, + "learning_rate": 1.542023686351719e-05, + "loss": 6.1106, + "step": 84700 + }, + { + "epoch": 6.25, + "learning_rate": 1.537001135096584e-05, + "loss": 6.0266, + "step": 84750 + }, + { + "epoch": 6.26, + "learning_rate": 1.5319785838414483e-05, + "loss": 5.941, + "step": 84800 + }, + { + "epoch": 6.26, + "learning_rate": 1.5269560325863123e-05, + "loss": 6.0568, + "step": 84850 + }, + { + "epoch": 6.26, + "learning_rate": 1.521933481331177e-05, + "loss": 5.2097, + "step": 84900 + }, + { + "epoch": 6.27, + "learning_rate": 1.5169109300760415e-05, + "loss": 5.66, + "step": 84950 + }, + { + "epoch": 6.27, + "learning_rate": 1.5118883788209059e-05, + "loss": 5.1255, + "step": 85000 + }, + { + "epoch": 6.27, + "learning_rate": 1.5068658275657705e-05, + "loss": 4.3753, + "step": 85050 + }, + { + "epoch": 6.28, + "learning_rate": 1.5018432763106349e-05, + "loss": 5.7544, + "step": 85100 + }, + { + "epoch": 6.28, + "learning_rate": 1.4968207250554991e-05, + "loss": 5.0477, + "step": 85150 + }, + { + "epoch": 6.28, + "learning_rate": 1.4917981738003637e-05, + "loss": 4.7382, + "step": 85200 + }, + { + "epoch": 6.29, + "learning_rate": 1.4867756225452281e-05, + "loss": 6.1112, + "step": 85250 + }, + { + "epoch": 6.29, + "learning_rate": 1.4817530712900927e-05, + "loss": 5.9275, + "step": 85300 + }, + { + "epoch": 6.3, + "learning_rate": 1.476730520034957e-05, + "loss": 4.9116, + "step": 85350 + }, + { + "epoch": 6.3, + "learning_rate": 1.4717079687798217e-05, + "loss": 5.3206, + "step": 85400 + }, + { + "epoch": 6.3, + "learning_rate": 1.4666854175246857e-05, + "loss": 5.1605, + "step": 85450 + }, + { + "epoch": 6.31, + "learning_rate": 1.4616628662695503e-05, + "loss": 5.2704, + "step": 85500 + }, + { + "epoch": 6.31, + "learning_rate": 1.4566403150144147e-05, + "loss": 5.4004, + "step": 85550 + }, + { + "epoch": 6.31, + "learning_rate": 1.4516177637592793e-05, + "loss": 5.1312, + "step": 85600 + }, + { + "epoch": 6.32, + "learning_rate": 1.4465952125041437e-05, + "loss": 5.4173, + "step": 85650 + }, + { + "epoch": 6.32, + "learning_rate": 1.4415726612490083e-05, + "loss": 5.5278, + "step": 85700 + }, + { + "epoch": 6.33, + "learning_rate": 1.4365501099938725e-05, + "loss": 6.4264, + "step": 85750 + }, + { + "epoch": 6.33, + "learning_rate": 1.4315275587387369e-05, + "loss": 6.233, + "step": 85800 + }, + { + "epoch": 6.33, + "learning_rate": 1.4265050074836015e-05, + "loss": 5.6656, + "step": 85850 + }, + { + "epoch": 6.34, + "learning_rate": 1.4214824562284659e-05, + "loss": 5.3803, + "step": 85900 + }, + { + "epoch": 6.34, + "learning_rate": 1.4164599049733305e-05, + "loss": 6.1274, + "step": 85950 + }, + { + "epoch": 6.34, + "learning_rate": 1.4114373537181947e-05, + "loss": 5.2657, + "step": 86000 + }, + { + "epoch": 6.35, + "learning_rate": 1.4064148024630591e-05, + "loss": 6.0392, + "step": 86050 + }, + { + "epoch": 6.35, + "learning_rate": 1.4013922512079237e-05, + "loss": 5.1692, + "step": 86100 + }, + { + "epoch": 6.35, + "learning_rate": 1.396369699952788e-05, + "loss": 5.1826, + "step": 86150 + }, + { + "epoch": 6.36, + "learning_rate": 1.3913471486976526e-05, + "loss": 5.5917, + "step": 86200 + }, + { + "epoch": 6.36, + "learning_rate": 1.386324597442517e-05, + "loss": 5.1852, + "step": 86250 + }, + { + "epoch": 6.37, + "learning_rate": 1.3813020461873813e-05, + "loss": 5.1779, + "step": 86300 + }, + { + "epoch": 6.37, + "learning_rate": 1.3762794949322459e-05, + "loss": 5.6655, + "step": 86350 + }, + { + "epoch": 6.37, + "learning_rate": 1.3712569436771103e-05, + "loss": 5.6346, + "step": 86400 + }, + { + "epoch": 6.38, + "learning_rate": 1.3662343924219748e-05, + "loss": 6.4509, + "step": 86450 + }, + { + "epoch": 6.38, + "learning_rate": 1.3612118411668392e-05, + "loss": 6.6129, + "step": 86500 + }, + { + "epoch": 6.38, + "learning_rate": 1.3561892899117038e-05, + "loss": 5.1282, + "step": 86550 + }, + { + "epoch": 6.39, + "learning_rate": 1.3511667386565679e-05, + "loss": 6.06, + "step": 86600 + }, + { + "epoch": 6.39, + "learning_rate": 1.3461441874014325e-05, + "loss": 6.2118, + "step": 86650 + }, + { + "epoch": 6.4, + "learning_rate": 1.3411216361462969e-05, + "loss": 5.9067, + "step": 86700 + }, + { + "epoch": 6.4, + "learning_rate": 1.3360990848911614e-05, + "loss": 6.5174, + "step": 86750 + }, + { + "epoch": 6.4, + "learning_rate": 1.3310765336360259e-05, + "loss": 6.0514, + "step": 86800 + }, + { + "epoch": 6.41, + "learning_rate": 1.3260539823808901e-05, + "loss": 6.2585, + "step": 86850 + }, + { + "epoch": 6.41, + "learning_rate": 1.3210314311257547e-05, + "loss": 5.8812, + "step": 86900 + }, + { + "epoch": 6.41, + "learning_rate": 1.316008879870619e-05, + "loss": 5.7082, + "step": 86950 + }, + { + "epoch": 6.42, + "learning_rate": 1.3109863286154836e-05, + "loss": 5.9844, + "step": 87000 + }, + { + "epoch": 6.42, + "learning_rate": 1.305963777360348e-05, + "loss": 5.2375, + "step": 87050 + }, + { + "epoch": 6.42, + "learning_rate": 1.3009412261052126e-05, + "loss": 5.8796, + "step": 87100 + }, + { + "epoch": 6.43, + "learning_rate": 1.2959186748500769e-05, + "loss": 5.0831, + "step": 87150 + }, + { + "epoch": 6.43, + "learning_rate": 1.2908961235949413e-05, + "loss": 5.9476, + "step": 87200 + }, + { + "epoch": 6.44, + "learning_rate": 1.2858735723398058e-05, + "loss": 5.9525, + "step": 87250 + }, + { + "epoch": 6.44, + "learning_rate": 1.2808510210846702e-05, + "loss": 4.9682, + "step": 87300 + }, + { + "epoch": 6.44, + "learning_rate": 1.2758284698295348e-05, + "loss": 5.8342, + "step": 87350 + }, + { + "epoch": 6.45, + "learning_rate": 1.2708059185743992e-05, + "loss": 5.2582, + "step": 87400 + }, + { + "epoch": 6.45, + "learning_rate": 1.2657833673192635e-05, + "loss": 5.6833, + "step": 87450 + }, + { + "epoch": 6.45, + "learning_rate": 1.2607608160641279e-05, + "loss": 6.4167, + "step": 87500 + }, + { + "epoch": 6.46, + "learning_rate": 1.2557382648089924e-05, + "loss": 6.0258, + "step": 87550 + }, + { + "epoch": 6.46, + "learning_rate": 1.2507157135538568e-05, + "loss": 6.4125, + "step": 87600 + }, + { + "epoch": 6.47, + "learning_rate": 1.2456931622987213e-05, + "loss": 5.5319, + "step": 87650 + }, + { + "epoch": 6.47, + "learning_rate": 1.2406706110435858e-05, + "loss": 5.169, + "step": 87700 + }, + { + "epoch": 6.47, + "learning_rate": 1.2356480597884502e-05, + "loss": 4.8537, + "step": 87750 + }, + { + "epoch": 6.48, + "learning_rate": 1.2306255085333146e-05, + "loss": 5.6043, + "step": 87800 + }, + { + "epoch": 6.48, + "learning_rate": 1.225602957278179e-05, + "loss": 5.1859, + "step": 87850 + }, + { + "epoch": 6.48, + "learning_rate": 1.2205804060230436e-05, + "loss": 4.814, + "step": 87900 + }, + { + "epoch": 6.49, + "learning_rate": 1.2155578547679079e-05, + "loss": 5.4293, + "step": 87950 + }, + { + "epoch": 6.49, + "learning_rate": 1.2105353035127724e-05, + "loss": 6.1502, + "step": 88000 + }, + { + "epoch": 6.49, + "learning_rate": 1.2055127522576368e-05, + "loss": 6.1367, + "step": 88050 + }, + { + "epoch": 6.5, + "learning_rate": 1.2004902010025012e-05, + "loss": 6.2755, + "step": 88100 + }, + { + "epoch": 6.5, + "learning_rate": 1.1954676497473658e-05, + "loss": 5.211, + "step": 88150 + }, + { + "epoch": 6.51, + "learning_rate": 1.1904450984922302e-05, + "loss": 5.2505, + "step": 88200 + }, + { + "epoch": 6.51, + "learning_rate": 1.1854225472370946e-05, + "loss": 6.7278, + "step": 88250 + }, + { + "epoch": 6.51, + "learning_rate": 1.180399995981959e-05, + "loss": 6.1237, + "step": 88300 + }, + { + "epoch": 6.52, + "learning_rate": 1.1753774447268234e-05, + "loss": 6.1895, + "step": 88350 + }, + { + "epoch": 6.52, + "learning_rate": 1.1703548934716878e-05, + "loss": 5.1235, + "step": 88400 + }, + { + "epoch": 6.52, + "learning_rate": 1.1653323422165524e-05, + "loss": 5.9953, + "step": 88450 + }, + { + "epoch": 6.53, + "learning_rate": 1.1603097909614168e-05, + "loss": 6.2782, + "step": 88500 + }, + { + "epoch": 6.53, + "learning_rate": 1.1552872397062812e-05, + "loss": 5.3504, + "step": 88550 + }, + { + "epoch": 6.54, + "learning_rate": 1.1502646884511458e-05, + "loss": 5.7504, + "step": 88600 + }, + { + "epoch": 6.54, + "learning_rate": 1.14524213719601e-05, + "loss": 5.3578, + "step": 88650 + }, + { + "epoch": 6.54, + "learning_rate": 1.1402195859408746e-05, + "loss": 5.0383, + "step": 88700 + }, + { + "epoch": 6.55, + "learning_rate": 1.135197034685739e-05, + "loss": 5.9042, + "step": 88750 + }, + { + "epoch": 6.55, + "learning_rate": 1.1301744834306034e-05, + "loss": 5.155, + "step": 88800 + }, + { + "epoch": 6.55, + "learning_rate": 1.1251519321754678e-05, + "loss": 5.9763, + "step": 88850 + }, + { + "epoch": 6.56, + "learning_rate": 1.1201293809203324e-05, + "loss": 5.6412, + "step": 88900 + }, + { + "epoch": 6.56, + "learning_rate": 1.1151068296651968e-05, + "loss": 5.4027, + "step": 88950 + }, + { + "epoch": 6.56, + "learning_rate": 1.1100842784100612e-05, + "loss": 5.3177, + "step": 89000 + }, + { + "epoch": 6.57, + "learning_rate": 1.1050617271549258e-05, + "loss": 5.5425, + "step": 89050 + }, + { + "epoch": 6.57, + "learning_rate": 1.10003917589979e-05, + "loss": 5.1199, + "step": 89100 + }, + { + "epoch": 6.58, + "learning_rate": 1.0950166246446546e-05, + "loss": 5.2852, + "step": 89150 + }, + { + "epoch": 6.58, + "learning_rate": 1.089994073389519e-05, + "loss": 5.905, + "step": 89200 + }, + { + "epoch": 6.58, + "learning_rate": 1.0849715221343834e-05, + "loss": 5.7968, + "step": 89250 + }, + { + "epoch": 6.59, + "learning_rate": 1.079948970879248e-05, + "loss": 6.0684, + "step": 89300 + }, + { + "epoch": 6.59, + "learning_rate": 1.0749264196241122e-05, + "loss": 4.8366, + "step": 89350 + }, + { + "epoch": 6.59, + "learning_rate": 1.0699038683689768e-05, + "loss": 6.2994, + "step": 89400 + }, + { + "epoch": 6.6, + "learning_rate": 1.0648813171138412e-05, + "loss": 5.2661, + "step": 89450 + }, + { + "epoch": 6.6, + "learning_rate": 1.0598587658587056e-05, + "loss": 6.3783, + "step": 89500 + }, + { + "epoch": 6.61, + "learning_rate": 1.05483621460357e-05, + "loss": 5.4387, + "step": 89550 + }, + { + "epoch": 6.61, + "learning_rate": 1.0498136633484346e-05, + "loss": 5.3958, + "step": 89600 + }, + { + "epoch": 6.61, + "learning_rate": 1.044791112093299e-05, + "loss": 5.2108, + "step": 89650 + }, + { + "epoch": 6.62, + "learning_rate": 1.0397685608381634e-05, + "loss": 5.9092, + "step": 89700 + }, + { + "epoch": 6.62, + "learning_rate": 1.034746009583028e-05, + "loss": 5.8141, + "step": 89750 + }, + { + "epoch": 6.62, + "learning_rate": 1.0297234583278922e-05, + "loss": 6.1024, + "step": 89800 + }, + { + "epoch": 6.63, + "learning_rate": 1.0247009070727568e-05, + "loss": 5.5871, + "step": 89850 + }, + { + "epoch": 6.63, + "learning_rate": 1.0196783558176212e-05, + "loss": 5.323, + "step": 89900 + }, + { + "epoch": 6.63, + "learning_rate": 1.0146558045624856e-05, + "loss": 5.9425, + "step": 89950 + }, + { + "epoch": 6.64, + "learning_rate": 1.00963325330735e-05, + "loss": 5.7013, + "step": 90000 + }, + { + "epoch": 6.64, + "learning_rate": 1.0046107020522146e-05, + "loss": 5.701, + "step": 90050 + }, + { + "epoch": 6.65, + "learning_rate": 9.99588150797079e-06, + "loss": 6.146, + "step": 90100 + }, + { + "epoch": 6.65, + "learning_rate": 9.945655995419434e-06, + "loss": 5.4734, + "step": 90150 + }, + { + "epoch": 6.65, + "learning_rate": 9.89543048286808e-06, + "loss": 5.8372, + "step": 90200 + }, + { + "epoch": 6.66, + "learning_rate": 9.845204970316722e-06, + "loss": 6.0749, + "step": 90250 + }, + { + "epoch": 6.66, + "learning_rate": 9.794979457765368e-06, + "loss": 5.2792, + "step": 90300 + }, + { + "epoch": 6.66, + "learning_rate": 9.74475394521401e-06, + "loss": 5.759, + "step": 90350 + }, + { + "epoch": 6.67, + "learning_rate": 9.694528432662656e-06, + "loss": 4.975, + "step": 90400 + }, + { + "epoch": 6.67, + "learning_rate": 9.6443029201113e-06, + "loss": 6.1866, + "step": 90450 + }, + { + "epoch": 6.68, + "learning_rate": 9.594077407559944e-06, + "loss": 6.1507, + "step": 90500 + }, + { + "epoch": 6.68, + "learning_rate": 9.54385189500859e-06, + "loss": 5.9567, + "step": 90550 + }, + { + "epoch": 6.68, + "learning_rate": 9.493626382457234e-06, + "loss": 5.8165, + "step": 90600 + }, + { + "epoch": 6.69, + "learning_rate": 9.443400869905878e-06, + "loss": 5.3155, + "step": 90650 + }, + { + "epoch": 6.69, + "learning_rate": 9.393175357354522e-06, + "loss": 5.5206, + "step": 90700 + }, + { + "epoch": 6.69, + "learning_rate": 9.342949844803168e-06, + "loss": 4.7732, + "step": 90750 + }, + { + "epoch": 6.7, + "learning_rate": 9.29272433225181e-06, + "loss": 5.5809, + "step": 90800 + }, + { + "epoch": 6.7, + "learning_rate": 9.242498819700456e-06, + "loss": 5.7061, + "step": 90850 + }, + { + "epoch": 6.71, + "learning_rate": 9.1922733071491e-06, + "loss": 6.1421, + "step": 90900 + }, + { + "epoch": 6.71, + "learning_rate": 9.142047794597744e-06, + "loss": 5.4665, + "step": 90950 + }, + { + "epoch": 6.71, + "learning_rate": 9.09182228204639e-06, + "loss": 5.4408, + "step": 91000 + }, + { + "epoch": 6.72, + "learning_rate": 9.041596769495034e-06, + "loss": 5.3826, + "step": 91050 + }, + { + "epoch": 6.72, + "learning_rate": 8.991371256943678e-06, + "loss": 5.9068, + "step": 91100 + }, + { + "epoch": 6.72, + "learning_rate": 8.941145744392322e-06, + "loss": 5.1764, + "step": 91150 + }, + { + "epoch": 6.73, + "learning_rate": 8.890920231840966e-06, + "loss": 5.2145, + "step": 91200 + }, + { + "epoch": 6.73, + "learning_rate": 8.84069471928961e-06, + "loss": 5.5649, + "step": 91250 + }, + { + "epoch": 6.73, + "learning_rate": 8.790469206738256e-06, + "loss": 6.5099, + "step": 91300 + }, + { + "epoch": 6.74, + "learning_rate": 8.7402436941869e-06, + "loss": 5.3549, + "step": 91350 + }, + { + "epoch": 6.74, + "learning_rate": 8.690018181635544e-06, + "loss": 5.5097, + "step": 91400 + }, + { + "epoch": 6.75, + "learning_rate": 8.63979266908419e-06, + "loss": 6.265, + "step": 91450 + }, + { + "epoch": 6.75, + "learning_rate": 8.589567156532832e-06, + "loss": 5.3955, + "step": 91500 + }, + { + "epoch": 6.75, + "learning_rate": 8.539341643981478e-06, + "loss": 6.4223, + "step": 91550 + }, + { + "epoch": 6.76, + "learning_rate": 8.489116131430122e-06, + "loss": 5.8701, + "step": 91600 + }, + { + "epoch": 6.76, + "learning_rate": 8.438890618878766e-06, + "loss": 5.7781, + "step": 91650 + }, + { + "epoch": 6.76, + "learning_rate": 8.38866510632741e-06, + "loss": 5.522, + "step": 91700 + }, + { + "epoch": 6.77, + "learning_rate": 8.338439593776055e-06, + "loss": 5.6781, + "step": 91750 + }, + { + "epoch": 6.77, + "learning_rate": 8.2882140812247e-06, + "loss": 5.5512, + "step": 91800 + }, + { + "epoch": 6.78, + "learning_rate": 8.237988568673344e-06, + "loss": 5.4052, + "step": 91850 + }, + { + "epoch": 6.78, + "learning_rate": 8.18776305612199e-06, + "loss": 6.6671, + "step": 91900 + }, + { + "epoch": 6.78, + "learning_rate": 8.137537543570632e-06, + "loss": 5.2754, + "step": 91950 + }, + { + "epoch": 6.79, + "learning_rate": 8.087312031019277e-06, + "loss": 6.2486, + "step": 92000 + }, + { + "epoch": 6.79, + "learning_rate": 8.037086518467921e-06, + "loss": 5.0945, + "step": 92050 + }, + { + "epoch": 6.79, + "learning_rate": 7.986861005916565e-06, + "loss": 5.3328, + "step": 92100 + }, + { + "epoch": 6.8, + "learning_rate": 7.936635493365211e-06, + "loss": 5.1418, + "step": 92150 + }, + { + "epoch": 6.8, + "learning_rate": 7.886409980813854e-06, + "loss": 5.1135, + "step": 92200 + }, + { + "epoch": 6.8, + "learning_rate": 7.8361844682625e-06, + "loss": 5.5731, + "step": 92250 + }, + { + "epoch": 6.81, + "learning_rate": 7.785958955711143e-06, + "loss": 5.4876, + "step": 92300 + }, + { + "epoch": 6.81, + "learning_rate": 7.735733443159787e-06, + "loss": 4.4139, + "step": 92350 + }, + { + "epoch": 6.82, + "learning_rate": 7.685507930608432e-06, + "loss": 5.5843, + "step": 92400 + }, + { + "epoch": 6.82, + "learning_rate": 7.635282418057077e-06, + "loss": 5.8209, + "step": 92450 + }, + { + "epoch": 6.82, + "learning_rate": 7.5850569055057205e-06, + "loss": 5.1681, + "step": 92500 + }, + { + "epoch": 6.83, + "learning_rate": 7.534831392954365e-06, + "loss": 5.2068, + "step": 92550 + }, + { + "epoch": 6.83, + "learning_rate": 7.48460588040301e-06, + "loss": 6.0487, + "step": 92600 + }, + { + "epoch": 6.83, + "learning_rate": 7.4343803678516535e-06, + "loss": 5.1479, + "step": 92650 + }, + { + "epoch": 6.84, + "learning_rate": 7.384154855300298e-06, + "loss": 5.7305, + "step": 92700 + }, + { + "epoch": 6.84, + "learning_rate": 7.333929342748943e-06, + "loss": 5.1534, + "step": 92750 + }, + { + "epoch": 6.85, + "learning_rate": 7.283703830197587e-06, + "loss": 6.7006, + "step": 92800 + }, + { + "epoch": 6.85, + "learning_rate": 7.233478317646232e-06, + "loss": 6.093, + "step": 92850 + }, + { + "epoch": 6.85, + "learning_rate": 7.183252805094877e-06, + "loss": 5.3032, + "step": 92900 + }, + { + "epoch": 6.86, + "learning_rate": 7.13302729254352e-06, + "loss": 5.417, + "step": 92950 + }, + { + "epoch": 6.86, + "learning_rate": 7.082801779992165e-06, + "loss": 5.2938, + "step": 93000 + }, + { + "epoch": 6.86, + "learning_rate": 7.03257626744081e-06, + "loss": 5.0712, + "step": 93050 + }, + { + "epoch": 6.87, + "learning_rate": 6.982350754889453e-06, + "loss": 5.4341, + "step": 93100 + }, + { + "epoch": 6.87, + "learning_rate": 6.932125242338098e-06, + "loss": 4.828, + "step": 93150 + }, + { + "epoch": 6.87, + "learning_rate": 6.881899729786742e-06, + "loss": 5.5724, + "step": 93200 + }, + { + "epoch": 6.88, + "learning_rate": 6.831674217235387e-06, + "loss": 4.9367, + "step": 93250 + }, + { + "epoch": 6.88, + "learning_rate": 6.781448704684032e-06, + "loss": 5.3421, + "step": 93300 + }, + { + "epoch": 6.89, + "learning_rate": 6.731223192132675e-06, + "loss": 5.5862, + "step": 93350 + }, + { + "epoch": 6.89, + "learning_rate": 6.68099767958132e-06, + "loss": 4.857, + "step": 93400 + }, + { + "epoch": 6.89, + "learning_rate": 6.630772167029965e-06, + "loss": 5.4573, + "step": 93450 + }, + { + "epoch": 6.9, + "learning_rate": 6.580546654478609e-06, + "loss": 5.536, + "step": 93500 + }, + { + "epoch": 6.9, + "learning_rate": 6.530321141927253e-06, + "loss": 5.818, + "step": 93550 + }, + { + "epoch": 6.9, + "learning_rate": 6.480095629375898e-06, + "loss": 5.3187, + "step": 93600 + }, + { + "epoch": 6.91, + "learning_rate": 6.429870116824542e-06, + "loss": 5.2464, + "step": 93650 + }, + { + "epoch": 6.91, + "learning_rate": 6.379644604273187e-06, + "loss": 5.4407, + "step": 93700 + }, + { + "epoch": 6.92, + "learning_rate": 6.329419091721832e-06, + "loss": 5.2338, + "step": 93750 + }, + { + "epoch": 6.92, + "learning_rate": 6.279193579170475e-06, + "loss": 5.4482, + "step": 93800 + }, + { + "epoch": 6.92, + "learning_rate": 6.22896806661912e-06, + "loss": 5.1899, + "step": 93850 + }, + { + "epoch": 6.93, + "learning_rate": 6.178742554067764e-06, + "loss": 5.4041, + "step": 93900 + }, + { + "epoch": 6.93, + "learning_rate": 6.128517041516409e-06, + "loss": 6.1085, + "step": 93950 + }, + { + "epoch": 6.93, + "learning_rate": 6.078291528965054e-06, + "loss": 5.363, + "step": 94000 + }, + { + "epoch": 6.94, + "learning_rate": 6.028066016413698e-06, + "loss": 6.0109, + "step": 94050 + }, + { + "epoch": 6.94, + "learning_rate": 5.977840503862342e-06, + "loss": 5.5074, + "step": 94100 + }, + { + "epoch": 6.94, + "learning_rate": 5.927614991310987e-06, + "loss": 5.831, + "step": 94150 + }, + { + "epoch": 6.95, + "learning_rate": 5.877389478759631e-06, + "loss": 6.5099, + "step": 94200 + }, + { + "epoch": 6.95, + "learning_rate": 5.827163966208275e-06, + "loss": 5.213, + "step": 94250 + }, + { + "epoch": 6.96, + "learning_rate": 5.77693845365692e-06, + "loss": 5.3367, + "step": 94300 + }, + { + "epoch": 6.96, + "learning_rate": 5.726712941105564e-06, + "loss": 5.6918, + "step": 94350 + }, + { + "epoch": 6.96, + "learning_rate": 5.676487428554209e-06, + "loss": 5.8001, + "step": 94400 + }, + { + "epoch": 6.97, + "learning_rate": 5.626261916002853e-06, + "loss": 4.8384, + "step": 94450 + }, + { + "epoch": 6.97, + "learning_rate": 5.576036403451498e-06, + "loss": 5.9708, + "step": 94500 + }, + { + "epoch": 6.97, + "learning_rate": 5.525810890900142e-06, + "loss": 5.1562, + "step": 94550 + }, + { + "epoch": 6.98, + "learning_rate": 5.475585378348786e-06, + "loss": 6.0821, + "step": 94600 + }, + { + "epoch": 6.98, + "learning_rate": 5.425359865797431e-06, + "loss": 6.4639, + "step": 94650 + }, + { + "epoch": 6.99, + "learning_rate": 5.375134353246075e-06, + "loss": 5.8299, + "step": 94700 + }, + { + "epoch": 6.99, + "learning_rate": 5.324908840694719e-06, + "loss": 5.219, + "step": 94750 + }, + { + "epoch": 6.99, + "learning_rate": 5.274683328143364e-06, + "loss": 4.9022, + "step": 94800 + }, + { + "epoch": 7.0, + "learning_rate": 5.224457815592009e-06, + "loss": 5.8906, + "step": 94850 + }, + { + "epoch": 7.0, + "learning_rate": 5.174232303040653e-06, + "loss": 4.7464, + "step": 94900 + }, + { + "epoch": 7.0, + "learning_rate": 5.124006790489297e-06, + "loss": 5.2756, + "step": 94950 + }, + { + "epoch": 7.01, + "learning_rate": 5.073781277937942e-06, + "loss": 5.7393, + "step": 95000 + }, + { + "epoch": 7.01, + "learning_rate": 5.023555765386586e-06, + "loss": 5.8585, + "step": 95050 + }, + { + "epoch": 7.01, + "learning_rate": 4.97333025283523e-06, + "loss": 5.0746, + "step": 95100 + }, + { + "epoch": 7.02, + "learning_rate": 4.923104740283875e-06, + "loss": 5.2644, + "step": 95150 + }, + { + "epoch": 7.02, + "learning_rate": 4.87287922773252e-06, + "loss": 5.3518, + "step": 95200 + }, + { + "epoch": 7.03, + "learning_rate": 4.822653715181164e-06, + "loss": 5.6309, + "step": 95250 + }, + { + "epoch": 7.03, + "learning_rate": 4.772428202629809e-06, + "loss": 5.0614, + "step": 95300 + }, + { + "epoch": 7.03, + "learning_rate": 4.722202690078453e-06, + "loss": 5.6014, + "step": 95350 + }, + { + "epoch": 7.04, + "learning_rate": 4.671977177527097e-06, + "loss": 5.9759, + "step": 95400 + }, + { + "epoch": 7.04, + "learning_rate": 4.621751664975741e-06, + "loss": 5.6206, + "step": 95450 + }, + { + "epoch": 7.04, + "learning_rate": 4.571526152424386e-06, + "loss": 5.6578, + "step": 95500 + }, + { + "epoch": 7.05, + "learning_rate": 4.52130063987303e-06, + "loss": 4.8626, + "step": 95550 + }, + { + "epoch": 7.05, + "learning_rate": 4.471075127321675e-06, + "loss": 5.1913, + "step": 95600 + }, + { + "epoch": 7.06, + "learning_rate": 4.4208496147703196e-06, + "loss": 5.3974, + "step": 95650 + }, + { + "epoch": 7.06, + "learning_rate": 4.370624102218964e-06, + "loss": 5.0069, + "step": 95700 + }, + { + "epoch": 7.06, + "learning_rate": 4.320398589667608e-06, + "loss": 5.2273, + "step": 95750 + }, + { + "epoch": 7.07, + "learning_rate": 4.270173077116253e-06, + "loss": 6.3129, + "step": 95800 + }, + { + "epoch": 7.07, + "learning_rate": 4.219947564564897e-06, + "loss": 5.6421, + "step": 95850 + }, + { + "epoch": 7.07, + "learning_rate": 4.169722052013541e-06, + "loss": 5.9209, + "step": 95900 + }, + { + "epoch": 7.08, + "learning_rate": 4.119496539462185e-06, + "loss": 5.3418, + "step": 95950 + }, + { + "epoch": 7.08, + "learning_rate": 4.06927102691083e-06, + "loss": 5.1524, + "step": 96000 + }, + { + "epoch": 7.08, + "learning_rate": 4.0190455143594745e-06, + "loss": 5.8734, + "step": 96050 + }, + { + "epoch": 7.09, + "learning_rate": 3.968820001808119e-06, + "loss": 5.1922, + "step": 96100 + }, + { + "epoch": 7.09, + "learning_rate": 3.9185944892567635e-06, + "loss": 5.5077, + "step": 96150 + }, + { + "epoch": 7.1, + "learning_rate": 3.8683689767054076e-06, + "loss": 5.1205, + "step": 96200 + }, + { + "epoch": 7.1, + "learning_rate": 3.818143464154052e-06, + "loss": 5.6354, + "step": 96250 + }, + { + "epoch": 7.1, + "learning_rate": 3.7679179516026965e-06, + "loss": 5.4505, + "step": 96300 + }, + { + "epoch": 7.11, + "learning_rate": 3.717692439051341e-06, + "loss": 5.5936, + "step": 96350 + }, + { + "epoch": 7.11, + "learning_rate": 3.667466926499985e-06, + "loss": 6.0097, + "step": 96400 + }, + { + "epoch": 7.11, + "learning_rate": 3.617241413948629e-06, + "loss": 6.1105, + "step": 96450 + }, + { + "epoch": 7.12, + "learning_rate": 3.567015901397274e-06, + "loss": 6.4328, + "step": 96500 + }, + { + "epoch": 7.12, + "learning_rate": 3.5167903888459185e-06, + "loss": 5.0374, + "step": 96550 + }, + { + "epoch": 7.13, + "learning_rate": 3.4665648762945625e-06, + "loss": 5.6259, + "step": 96600 + }, + { + "epoch": 7.13, + "learning_rate": 3.4163393637432074e-06, + "loss": 5.2756, + "step": 96650 + }, + { + "epoch": 7.13, + "learning_rate": 3.3661138511918515e-06, + "loss": 5.4218, + "step": 96700 + }, + { + "epoch": 7.14, + "learning_rate": 3.315888338640496e-06, + "loss": 5.4047, + "step": 96750 + }, + { + "epoch": 7.14, + "learning_rate": 3.265662826089141e-06, + "loss": 5.6143, + "step": 96800 + }, + { + "epoch": 7.14, + "learning_rate": 3.215437313537785e-06, + "loss": 5.3633, + "step": 96850 + }, + { + "epoch": 7.15, + "learning_rate": 3.165211800986429e-06, + "loss": 4.9261, + "step": 96900 + }, + { + "epoch": 7.15, + "learning_rate": 3.114986288435074e-06, + "loss": 5.4425, + "step": 96950 + }, + { + "epoch": 7.15, + "learning_rate": 3.064760775883718e-06, + "loss": 4.9883, + "step": 97000 + }, + { + "epoch": 7.16, + "learning_rate": 3.0145352633323624e-06, + "loss": 4.4753, + "step": 97050 + }, + { + "epoch": 7.16, + "learning_rate": 2.964309750781007e-06, + "loss": 5.3568, + "step": 97100 + }, + { + "epoch": 7.17, + "learning_rate": 2.9140842382296514e-06, + "loss": 5.2898, + "step": 97150 + }, + { + "epoch": 7.17, + "learning_rate": 2.863858725678296e-06, + "loss": 5.2691, + "step": 97200 + }, + { + "epoch": 7.17, + "learning_rate": 2.81363321312694e-06, + "loss": 4.6615, + "step": 97250 + }, + { + "epoch": 7.18, + "learning_rate": 2.7634077005755844e-06, + "loss": 5.2432, + "step": 97300 + }, + { + "epoch": 7.18, + "learning_rate": 2.713182188024229e-06, + "loss": 4.7523, + "step": 97350 + }, + { + "epoch": 7.18, + "learning_rate": 2.6629566754728733e-06, + "loss": 6.4066, + "step": 97400 + }, + { + "epoch": 7.19, + "learning_rate": 2.612731162921518e-06, + "loss": 4.928, + "step": 97450 + }, + { + "epoch": 7.19, + "learning_rate": 2.562505650370162e-06, + "loss": 5.6982, + "step": 97500 + }, + { + "epoch": 7.2, + "learning_rate": 2.5122801378188068e-06, + "loss": 5.1039, + "step": 97550 + }, + { + "epoch": 7.2, + "learning_rate": 2.4620546252674512e-06, + "loss": 5.573, + "step": 97600 + }, + { + "epoch": 7.2, + "learning_rate": 2.4118291127160953e-06, + "loss": 5.3904, + "step": 97650 + }, + { + "epoch": 7.21, + "learning_rate": 2.3616036001647398e-06, + "loss": 5.0434, + "step": 97700 + }, + { + "epoch": 7.21, + "learning_rate": 2.3113780876133842e-06, + "loss": 5.1437, + "step": 97750 + }, + { + "epoch": 7.21, + "learning_rate": 2.2611525750620287e-06, + "loss": 5.2984, + "step": 97800 + }, + { + "epoch": 7.22, + "learning_rate": 2.210927062510673e-06, + "loss": 4.6668, + "step": 97850 + }, + { + "epoch": 7.22, + "learning_rate": 2.1607015499593172e-06, + "loss": 4.0901, + "step": 97900 + }, + { + "epoch": 7.23, + "learning_rate": 2.1104760374079617e-06, + "loss": 5.1942, + "step": 97950 + }, + { + "epoch": 7.23, + "learning_rate": 2.060250524856606e-06, + "loss": 5.2536, + "step": 98000 + }, + { + "epoch": 7.23, + "learning_rate": 2.0100250123052507e-06, + "loss": 5.6535, + "step": 98050 + }, + { + "epoch": 7.24, + "learning_rate": 1.959799499753895e-06, + "loss": 4.9945, + "step": 98100 + }, + { + "epoch": 7.24, + "learning_rate": 1.9095739872025396e-06, + "loss": 5.3495, + "step": 98150 + }, + { + "epoch": 7.24, + "learning_rate": 1.859348474651184e-06, + "loss": 5.2008, + "step": 98200 + }, + { + "epoch": 7.25, + "learning_rate": 1.8091229620998282e-06, + "loss": 5.2892, + "step": 98250 + }, + { + "epoch": 7.25, + "learning_rate": 1.7588974495484726e-06, + "loss": 5.3336, + "step": 98300 + }, + { + "epoch": 7.25, + "learning_rate": 1.7086719369971173e-06, + "loss": 5.6491, + "step": 98350 + }, + { + "epoch": 7.26, + "learning_rate": 1.6584464244457614e-06, + "loss": 6.371, + "step": 98400 + }, + { + "epoch": 7.26, + "learning_rate": 1.608220911894406e-06, + "loss": 5.6378, + "step": 98450 + }, + { + "epoch": 7.27, + "learning_rate": 1.5579953993430503e-06, + "loss": 5.3871, + "step": 98500 + }, + { + "epoch": 7.27, + "learning_rate": 1.5077698867916948e-06, + "loss": 5.5656, + "step": 98550 + }, + { + "epoch": 7.27, + "learning_rate": 1.457544374240339e-06, + "loss": 5.2364, + "step": 98600 + }, + { + "epoch": 7.28, + "learning_rate": 1.4073188616889836e-06, + "loss": 4.9547, + "step": 98650 + }, + { + "epoch": 7.28, + "learning_rate": 1.357093349137628e-06, + "loss": 5.0353, + "step": 98700 + }, + { + "epoch": 7.28, + "learning_rate": 1.3068678365862725e-06, + "loss": 5.2683, + "step": 98750 + }, + { + "epoch": 7.29, + "learning_rate": 1.2566423240349168e-06, + "loss": 5.4399, + "step": 98800 + }, + { + "epoch": 7.29, + "learning_rate": 1.2064168114835613e-06, + "loss": 4.6897, + "step": 98850 + }, + { + "epoch": 7.3, + "learning_rate": 1.1561912989322055e-06, + "loss": 5.4834, + "step": 98900 + }, + { + "epoch": 7.3, + "learning_rate": 1.1059657863808502e-06, + "loss": 6.2428, + "step": 98950 + }, + { + "epoch": 7.3, + "learning_rate": 1.0557402738294945e-06, + "loss": 5.3142, + "step": 99000 + }, + { + "epoch": 7.31, + "learning_rate": 1.005514761278139e-06, + "loss": 5.601, + "step": 99050 + }, + { + "epoch": 7.31, + "learning_rate": 9.552892487267832e-07, + "loss": 5.5382, + "step": 99100 + }, + { + "epoch": 7.31, + "learning_rate": 9.050637361754277e-07, + "loss": 6.1838, + "step": 99150 + }, + { + "epoch": 7.32, + "learning_rate": 8.548382236240722e-07, + "loss": 5.4926, + "step": 99200 + }, + { + "epoch": 7.32, + "learning_rate": 8.046127110727166e-07, + "loss": 5.5203, + "step": 99250 + }, + { + "epoch": 7.32, + "learning_rate": 7.543871985213609e-07, + "loss": 5.8525, + "step": 99300 + }, + { + "epoch": 7.33, + "learning_rate": 7.041616859700053e-07, + "loss": 5.5614, + "step": 99350 + }, + { + "epoch": 7.33, + "learning_rate": 6.539361734186498e-07, + "loss": 5.1589, + "step": 99400 + }, + { + "epoch": 7.34, + "learning_rate": 6.037106608672941e-07, + "loss": 4.9074, + "step": 99450 + }, + { + "epoch": 7.34, + "learning_rate": 5.534851483159385e-07, + "loss": 5.5361, + "step": 99500 + }, + { + "epoch": 7.34, + "learning_rate": 5.03259635764583e-07, + "loss": 5.632, + "step": 99550 + }, + { + "epoch": 7.35, + "learning_rate": 4.530341232132274e-07, + "loss": 5.3513, + "step": 99600 + }, + { + "epoch": 7.35, + "learning_rate": 4.0280861066187184e-07, + "loss": 5.5489, + "step": 99650 + }, + { + "epoch": 7.35, + "learning_rate": 3.525830981105162e-07, + "loss": 4.6669, + "step": 99700 + }, + { + "epoch": 7.36, + "learning_rate": 3.0235758555916064e-07, + "loss": 5.5143, + "step": 99750 + }, + { + "epoch": 7.36, + "learning_rate": 2.5213207300780506e-07, + "loss": 5.9166, + "step": 99800 + }, + { + "epoch": 7.37, + "learning_rate": 2.0190656045644946e-07, + "loss": 4.9058, + "step": 99850 + }, + { + "epoch": 7.37, + "learning_rate": 1.5168104790509386e-07, + "loss": 5.1908, + "step": 99900 + }, + { + "epoch": 7.37, + "learning_rate": 1.014555353537383e-07, + "loss": 5.2584, + "step": 99950 + }, + { + "epoch": 7.38, + "learning_rate": 5.12300228023827e-08, + "loss": 5.1109, + "step": 100000 + }, + { + "epoch": 7.38, + "eval_loss": 9.184836387634277, + "eval_runtime": 964.8586, + "eval_samples_per_second": 13.575, + "eval_steps_per_second": 3.394, + "eval_wer": 0.21346226533954896, + "step": 100000 + } + ], + "max_steps": 100051, + "num_train_epochs": 8, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-100000/training_args.bin b/checkpoint-100000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dbc064046a36220dd960e955c565bc3e2c9e3abd --- /dev/null +++ b/checkpoint-100000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b64c669f66dd7a2e54d3001ce7e31c26cc60dd58136e8ce90e6055bd0ae15eb +size 3503 diff --git a/checkpoint-20000/optimizer.pt b/checkpoint-20000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..611940c6f10d84f3570a37adde4ebacd8866fca1 --- /dev/null +++ b/checkpoint-20000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f20cc328e6cf018f92f3b71e11bf4a9364f5a247ee5d99d4a62354ede6a516 +size 5154563651 diff --git a/checkpoint-20000/rng_state.pth b/checkpoint-20000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3e9838ac40b16f57d6976aafd83f2dfe6d64496a --- /dev/null +++ b/checkpoint-20000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9fb3410dde03074fae133541463bfebd7d0708693d5ffa17edc4fe4974c0f7eb +size 14503 diff --git a/checkpoint-20000/scheduler.pt b/checkpoint-20000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd876d1736dbcb5a2551e43c66560f2528dcc0bc --- /dev/null +++ b/checkpoint-20000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:caeda3b27b783dbb84d9e4d82bc20bd764fb8fbed5023345d4c45d753ffa45b0 +size 623 diff --git a/checkpoint-20000/stt_en_conformer_transducer_xlarge.nemo b/checkpoint-20000/stt_en_conformer_transducer_xlarge.nemo new file mode 100644 index 0000000000000000000000000000000000000000..cedb628a75939c52c7ca2667325393c721da47b1 --- /dev/null +++ b/checkpoint-20000/stt_en_conformer_transducer_xlarge.nemo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06c6f31b89b77d8eaf30394215a6001e812460139f4276d335e97c10cc0b632e +size 2577971200 diff --git a/checkpoint-20000/trainer_state.json b/checkpoint-20000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e2e7a8669b3854ff8b1bcbb56cacbeb3e0c3f41d --- /dev/null +++ b/checkpoint-20000/trainer_state.json @@ -0,0 +1,2425 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.4752526370140886, + "global_step": 20000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 178.9465, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 164.9707, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 3e-05, + "loss": 142.2782, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4e-05, + "loss": 121.5122, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 91.8622, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 6e-05, + "loss": 82.2062, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 7e-05, + "loss": 72.6893, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 8e-05, + "loss": 71.8709, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 9e-05, + "loss": 69.9995, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001, + "loss": 70.6458, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 9.994977448744865e-05, + "loss": 73.9929, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 9.989954897489729e-05, + "loss": 66.52, + "step": 600 + }, + { + "epoch": 0.05, + "learning_rate": 9.984932346234594e-05, + "loss": 65.8947, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 9.979909794979458e-05, + "loss": 62.5809, + "step": 700 + }, + { + "epoch": 0.06, + "learning_rate": 9.974887243724323e-05, + "loss": 61.212, + "step": 750 + }, + { + "epoch": 0.06, + "learning_rate": 9.969864692469187e-05, + "loss": 68.2408, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 9.964842141214051e-05, + "loss": 61.5308, + "step": 850 + }, + { + "epoch": 0.07, + "learning_rate": 9.959819589958916e-05, + "loss": 58.9116, + "step": 900 + }, + { + "epoch": 0.07, + "learning_rate": 9.95479703870378e-05, + "loss": 60.0702, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 9.949774487448646e-05, + "loss": 57.6135, + "step": 1000 + }, + { + "epoch": 0.08, + "learning_rate": 9.944751936193509e-05, + "loss": 50.9231, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 9.939729384938373e-05, + "loss": 51.187, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 9.934706833683238e-05, + "loss": 52.1127, + "step": 1150 + }, + { + "epoch": 0.09, + "learning_rate": 9.929684282428102e-05, + "loss": 47.4608, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 9.924661731172968e-05, + "loss": 51.6108, + "step": 1250 + }, + { + "epoch": 0.1, + "learning_rate": 9.919639179917831e-05, + "loss": 46.5874, + "step": 1300 + }, + { + "epoch": 0.1, + "learning_rate": 9.914616628662697e-05, + "loss": 41.4706, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 9.90959407740756e-05, + "loss": 43.7544, + "step": 1400 + }, + { + "epoch": 0.11, + "learning_rate": 9.904571526152426e-05, + "loss": 44.6039, + "step": 1450 + }, + { + "epoch": 0.11, + "learning_rate": 9.899548974897289e-05, + "loss": 41.4384, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 9.894526423642154e-05, + "loss": 42.8289, + "step": 1550 + }, + { + "epoch": 0.12, + "learning_rate": 9.889503872387019e-05, + "loss": 39.9726, + "step": 1600 + }, + { + "epoch": 0.12, + "learning_rate": 9.884481321131882e-05, + "loss": 43.9533, + "step": 1650 + }, + { + "epoch": 0.13, + "learning_rate": 9.879458769876748e-05, + "loss": 38.7605, + "step": 1700 + }, + { + "epoch": 0.13, + "learning_rate": 9.87443621862161e-05, + "loss": 39.5425, + "step": 1750 + }, + { + "epoch": 0.13, + "learning_rate": 9.869413667366476e-05, + "loss": 37.588, + "step": 1800 + }, + { + "epoch": 0.14, + "learning_rate": 9.86439111611134e-05, + "loss": 39.7744, + "step": 1850 + }, + { + "epoch": 0.14, + "learning_rate": 9.859368564856205e-05, + "loss": 38.2154, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 9.85434601360107e-05, + "loss": 35.0806, + "step": 1950 + }, + { + "epoch": 0.15, + "learning_rate": 9.849323462345934e-05, + "loss": 39.061, + "step": 2000 + }, + { + "epoch": 0.15, + "learning_rate": 9.844300911090798e-05, + "loss": 35.1544, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 9.839278359835663e-05, + "loss": 38.123, + "step": 2100 + }, + { + "epoch": 0.16, + "learning_rate": 9.834255808580527e-05, + "loss": 33.1144, + "step": 2150 + }, + { + "epoch": 0.16, + "learning_rate": 9.829233257325392e-05, + "loss": 34.3476, + "step": 2200 + }, + { + "epoch": 0.17, + "learning_rate": 9.824210706070256e-05, + "loss": 29.5665, + "step": 2250 + }, + { + "epoch": 0.17, + "learning_rate": 9.81918815481512e-05, + "loss": 35.8756, + "step": 2300 + }, + { + "epoch": 0.17, + "learning_rate": 9.814165603559985e-05, + "loss": 37.2579, + "step": 2350 + }, + { + "epoch": 0.18, + "learning_rate": 9.809143052304849e-05, + "loss": 33.6245, + "step": 2400 + }, + { + "epoch": 0.18, + "learning_rate": 9.804120501049714e-05, + "loss": 35.6543, + "step": 2450 + }, + { + "epoch": 0.18, + "learning_rate": 9.799097949794578e-05, + "loss": 36.7847, + "step": 2500 + }, + { + "epoch": 0.19, + "learning_rate": 9.794075398539442e-05, + "loss": 33.463, + "step": 2550 + }, + { + "epoch": 0.19, + "learning_rate": 9.789052847284307e-05, + "loss": 32.2215, + "step": 2600 + }, + { + "epoch": 0.2, + "learning_rate": 9.784030296029171e-05, + "loss": 33.4301, + "step": 2650 + }, + { + "epoch": 0.2, + "learning_rate": 9.779007744774036e-05, + "loss": 29.9579, + "step": 2700 + }, + { + "epoch": 0.2, + "learning_rate": 9.773985193518901e-05, + "loss": 31.9141, + "step": 2750 + }, + { + "epoch": 0.21, + "learning_rate": 9.768962642263764e-05, + "loss": 33.2049, + "step": 2800 + }, + { + "epoch": 0.21, + "learning_rate": 9.763940091008629e-05, + "loss": 32.8774, + "step": 2850 + }, + { + "epoch": 0.21, + "learning_rate": 9.758917539753493e-05, + "loss": 29.0858, + "step": 2900 + }, + { + "epoch": 0.22, + "learning_rate": 9.753894988498358e-05, + "loss": 30.1145, + "step": 2950 + }, + { + "epoch": 0.22, + "learning_rate": 9.748872437243222e-05, + "loss": 27.6986, + "step": 3000 + }, + { + "epoch": 0.22, + "learning_rate": 9.743849885988087e-05, + "loss": 31.7807, + "step": 3050 + }, + { + "epoch": 0.23, + "learning_rate": 9.738827334732952e-05, + "loss": 30.5108, + "step": 3100 + }, + { + "epoch": 0.23, + "learning_rate": 9.733804783477815e-05, + "loss": 31.0909, + "step": 3150 + }, + { + "epoch": 0.24, + "learning_rate": 9.728782232222681e-05, + "loss": 27.9057, + "step": 3200 + }, + { + "epoch": 0.24, + "learning_rate": 9.723759680967544e-05, + "loss": 29.7323, + "step": 3250 + }, + { + "epoch": 0.24, + "learning_rate": 9.71873712971241e-05, + "loss": 29.7527, + "step": 3300 + }, + { + "epoch": 0.25, + "learning_rate": 9.713714578457273e-05, + "loss": 29.1442, + "step": 3350 + }, + { + "epoch": 0.25, + "learning_rate": 9.708692027202137e-05, + "loss": 30.8906, + "step": 3400 + }, + { + "epoch": 0.25, + "learning_rate": 9.703669475947003e-05, + "loss": 26.8419, + "step": 3450 + }, + { + "epoch": 0.26, + "learning_rate": 9.698646924691866e-05, + "loss": 29.2181, + "step": 3500 + }, + { + "epoch": 0.26, + "learning_rate": 9.693624373436732e-05, + "loss": 27.6549, + "step": 3550 + }, + { + "epoch": 0.27, + "learning_rate": 9.688601822181595e-05, + "loss": 34.0701, + "step": 3600 + }, + { + "epoch": 0.27, + "learning_rate": 9.683579270926461e-05, + "loss": 24.7487, + "step": 3650 + }, + { + "epoch": 0.27, + "learning_rate": 9.678556719671325e-05, + "loss": 30.0266, + "step": 3700 + }, + { + "epoch": 0.28, + "learning_rate": 9.67353416841619e-05, + "loss": 25.5011, + "step": 3750 + }, + { + "epoch": 0.28, + "learning_rate": 9.668511617161054e-05, + "loss": 26.1437, + "step": 3800 + }, + { + "epoch": 0.28, + "learning_rate": 9.663489065905918e-05, + "loss": 23.2303, + "step": 3850 + }, + { + "epoch": 0.29, + "learning_rate": 9.658466514650783e-05, + "loss": 26.357, + "step": 3900 + }, + { + "epoch": 0.29, + "learning_rate": 9.653443963395646e-05, + "loss": 27.2201, + "step": 3950 + }, + { + "epoch": 0.3, + "learning_rate": 9.648421412140512e-05, + "loss": 25.5695, + "step": 4000 + }, + { + "epoch": 0.3, + "learning_rate": 9.643398860885376e-05, + "loss": 24.8346, + "step": 4050 + }, + { + "epoch": 0.3, + "learning_rate": 9.63837630963024e-05, + "loss": 22.3957, + "step": 4100 + }, + { + "epoch": 0.31, + "learning_rate": 9.633353758375105e-05, + "loss": 24.9532, + "step": 4150 + }, + { + "epoch": 0.31, + "learning_rate": 9.628331207119969e-05, + "loss": 23.1574, + "step": 4200 + }, + { + "epoch": 0.31, + "learning_rate": 9.623308655864834e-05, + "loss": 23.7018, + "step": 4250 + }, + { + "epoch": 0.32, + "learning_rate": 9.618286104609698e-05, + "loss": 25.1433, + "step": 4300 + }, + { + "epoch": 0.32, + "learning_rate": 9.613263553354562e-05, + "loss": 25.0571, + "step": 4350 + }, + { + "epoch": 0.32, + "learning_rate": 9.608241002099427e-05, + "loss": 24.2231, + "step": 4400 + }, + { + "epoch": 0.33, + "learning_rate": 9.603218450844291e-05, + "loss": 23.0983, + "step": 4450 + }, + { + "epoch": 0.33, + "learning_rate": 9.598195899589156e-05, + "loss": 25.0078, + "step": 4500 + }, + { + "epoch": 0.34, + "learning_rate": 9.59317334833402e-05, + "loss": 20.6933, + "step": 4550 + }, + { + "epoch": 0.34, + "learning_rate": 9.588150797078884e-05, + "loss": 23.6196, + "step": 4600 + }, + { + "epoch": 0.34, + "learning_rate": 9.583128245823749e-05, + "loss": 25.2331, + "step": 4650 + }, + { + "epoch": 0.35, + "learning_rate": 9.578105694568613e-05, + "loss": 24.7932, + "step": 4700 + }, + { + "epoch": 0.35, + "learning_rate": 9.573083143313478e-05, + "loss": 24.3586, + "step": 4750 + }, + { + "epoch": 0.35, + "learning_rate": 9.568060592058342e-05, + "loss": 22.7161, + "step": 4800 + }, + { + "epoch": 0.36, + "learning_rate": 9.563038040803208e-05, + "loss": 22.4188, + "step": 4850 + }, + { + "epoch": 0.36, + "learning_rate": 9.558015489548071e-05, + "loss": 21.6516, + "step": 4900 + }, + { + "epoch": 0.37, + "learning_rate": 9.552992938292937e-05, + "loss": 21.78, + "step": 4950 + }, + { + "epoch": 0.37, + "learning_rate": 9.5479703870378e-05, + "loss": 21.0172, + "step": 5000 + }, + { + "epoch": 0.37, + "learning_rate": 9.542947835782665e-05, + "loss": 22.4624, + "step": 5050 + }, + { + "epoch": 0.38, + "learning_rate": 9.537925284527528e-05, + "loss": 23.6615, + "step": 5100 + }, + { + "epoch": 0.38, + "learning_rate": 9.532902733272393e-05, + "loss": 21.8091, + "step": 5150 + }, + { + "epoch": 0.38, + "learning_rate": 9.527880182017259e-05, + "loss": 21.4173, + "step": 5200 + }, + { + "epoch": 0.39, + "learning_rate": 9.522857630762122e-05, + "loss": 20.5415, + "step": 5250 + }, + { + "epoch": 0.39, + "learning_rate": 9.517835079506987e-05, + "loss": 21.0639, + "step": 5300 + }, + { + "epoch": 0.39, + "learning_rate": 9.51281252825185e-05, + "loss": 21.6078, + "step": 5350 + }, + { + "epoch": 0.4, + "learning_rate": 9.507789976996716e-05, + "loss": 19.4142, + "step": 5400 + }, + { + "epoch": 0.4, + "learning_rate": 9.50276742574158e-05, + "loss": 20.2504, + "step": 5450 + }, + { + "epoch": 0.41, + "learning_rate": 9.497744874486445e-05, + "loss": 23.8683, + "step": 5500 + }, + { + "epoch": 0.41, + "learning_rate": 9.49272232323131e-05, + "loss": 19.7559, + "step": 5550 + }, + { + "epoch": 0.41, + "learning_rate": 9.487699771976174e-05, + "loss": 21.1743, + "step": 5600 + }, + { + "epoch": 0.42, + "learning_rate": 9.482677220721038e-05, + "loss": 21.1908, + "step": 5650 + }, + { + "epoch": 0.42, + "learning_rate": 9.477654669465901e-05, + "loss": 20.9591, + "step": 5700 + }, + { + "epoch": 0.42, + "learning_rate": 9.472632118210767e-05, + "loss": 20.9036, + "step": 5750 + }, + { + "epoch": 0.43, + "learning_rate": 9.46760956695563e-05, + "loss": 22.249, + "step": 5800 + }, + { + "epoch": 0.43, + "learning_rate": 9.462587015700496e-05, + "loss": 19.1093, + "step": 5850 + }, + { + "epoch": 0.44, + "learning_rate": 9.45756446444536e-05, + "loss": 21.2714, + "step": 5900 + }, + { + "epoch": 0.44, + "learning_rate": 9.452541913190225e-05, + "loss": 21.3794, + "step": 5950 + }, + { + "epoch": 0.44, + "learning_rate": 9.447519361935089e-05, + "loss": 20.0326, + "step": 6000 + }, + { + "epoch": 0.45, + "learning_rate": 9.442496810679954e-05, + "loss": 19.8004, + "step": 6050 + }, + { + "epoch": 0.45, + "learning_rate": 9.437474259424818e-05, + "loss": 19.0229, + "step": 6100 + }, + { + "epoch": 0.45, + "learning_rate": 9.432451708169682e-05, + "loss": 17.6587, + "step": 6150 + }, + { + "epoch": 0.46, + "learning_rate": 9.427429156914547e-05, + "loss": 21.9247, + "step": 6200 + }, + { + "epoch": 0.46, + "learning_rate": 9.422406605659411e-05, + "loss": 19.743, + "step": 6250 + }, + { + "epoch": 0.46, + "learning_rate": 9.417384054404276e-05, + "loss": 22.9746, + "step": 6300 + }, + { + "epoch": 0.47, + "learning_rate": 9.41236150314914e-05, + "loss": 19.6693, + "step": 6350 + }, + { + "epoch": 0.47, + "learning_rate": 9.407338951894004e-05, + "loss": 19.1141, + "step": 6400 + }, + { + "epoch": 0.48, + "learning_rate": 9.402316400638869e-05, + "loss": 18.3847, + "step": 6450 + }, + { + "epoch": 0.48, + "learning_rate": 9.397293849383733e-05, + "loss": 18.9357, + "step": 6500 + }, + { + "epoch": 0.48, + "learning_rate": 9.392271298128598e-05, + "loss": 18.9316, + "step": 6550 + }, + { + "epoch": 0.49, + "learning_rate": 9.387248746873462e-05, + "loss": 20.9141, + "step": 6600 + }, + { + "epoch": 0.49, + "learning_rate": 9.382226195618326e-05, + "loss": 18.7472, + "step": 6650 + }, + { + "epoch": 0.49, + "learning_rate": 9.377203644363192e-05, + "loss": 18.8577, + "step": 6700 + }, + { + "epoch": 0.5, + "learning_rate": 9.372181093108055e-05, + "loss": 17.8061, + "step": 6750 + }, + { + "epoch": 0.5, + "learning_rate": 9.36715854185292e-05, + "loss": 19.4687, + "step": 6800 + }, + { + "epoch": 0.51, + "learning_rate": 9.362135990597784e-05, + "loss": 19.5103, + "step": 6850 + }, + { + "epoch": 0.51, + "learning_rate": 9.357113439342648e-05, + "loss": 18.5319, + "step": 6900 + }, + { + "epoch": 0.51, + "learning_rate": 9.352090888087514e-05, + "loss": 20.16, + "step": 6950 + }, + { + "epoch": 0.52, + "learning_rate": 9.347068336832377e-05, + "loss": 18.1913, + "step": 7000 + }, + { + "epoch": 0.52, + "learning_rate": 9.342045785577243e-05, + "loss": 21.341, + "step": 7050 + }, + { + "epoch": 0.52, + "learning_rate": 9.337023234322106e-05, + "loss": 16.7701, + "step": 7100 + }, + { + "epoch": 0.53, + "learning_rate": 9.332000683066972e-05, + "loss": 18.045, + "step": 7150 + }, + { + "epoch": 0.53, + "learning_rate": 9.326978131811835e-05, + "loss": 16.0393, + "step": 7200 + }, + { + "epoch": 0.53, + "learning_rate": 9.3219555805567e-05, + "loss": 17.4833, + "step": 7250 + }, + { + "epoch": 0.54, + "learning_rate": 9.316933029301565e-05, + "loss": 17.3978, + "step": 7300 + }, + { + "epoch": 0.54, + "learning_rate": 9.31191047804643e-05, + "loss": 18.2649, + "step": 7350 + }, + { + "epoch": 0.55, + "learning_rate": 9.306887926791294e-05, + "loss": 16.3891, + "step": 7400 + }, + { + "epoch": 0.55, + "learning_rate": 9.301865375536157e-05, + "loss": 21.4399, + "step": 7450 + }, + { + "epoch": 0.55, + "learning_rate": 9.296842824281023e-05, + "loss": 16.3082, + "step": 7500 + }, + { + "epoch": 0.56, + "learning_rate": 9.291820273025886e-05, + "loss": 14.8713, + "step": 7550 + }, + { + "epoch": 0.56, + "learning_rate": 9.286797721770751e-05, + "loss": 16.3099, + "step": 7600 + }, + { + "epoch": 0.56, + "learning_rate": 9.281775170515616e-05, + "loss": 17.8771, + "step": 7650 + }, + { + "epoch": 0.57, + "learning_rate": 9.27675261926048e-05, + "loss": 17.1421, + "step": 7700 + }, + { + "epoch": 0.57, + "learning_rate": 9.271730068005345e-05, + "loss": 16.6478, + "step": 7750 + }, + { + "epoch": 0.58, + "learning_rate": 9.266707516750209e-05, + "loss": 15.3247, + "step": 7800 + }, + { + "epoch": 0.58, + "learning_rate": 9.261684965495073e-05, + "loss": 17.6577, + "step": 7850 + }, + { + "epoch": 0.58, + "learning_rate": 9.256662414239938e-05, + "loss": 18.8549, + "step": 7900 + }, + { + "epoch": 0.59, + "learning_rate": 9.251639862984802e-05, + "loss": 17.4187, + "step": 7950 + }, + { + "epoch": 0.59, + "learning_rate": 9.246617311729667e-05, + "loss": 15.6643, + "step": 8000 + }, + { + "epoch": 0.59, + "learning_rate": 9.241594760474531e-05, + "loss": 17.1987, + "step": 8050 + }, + { + "epoch": 0.6, + "learning_rate": 9.236572209219396e-05, + "loss": 18.1712, + "step": 8100 + }, + { + "epoch": 0.6, + "learning_rate": 9.23154965796426e-05, + "loss": 15.8015, + "step": 8150 + }, + { + "epoch": 0.6, + "learning_rate": 9.226527106709124e-05, + "loss": 19.064, + "step": 8200 + }, + { + "epoch": 0.61, + "learning_rate": 9.221504555453989e-05, + "loss": 18.2748, + "step": 8250 + }, + { + "epoch": 0.61, + "learning_rate": 9.216482004198853e-05, + "loss": 15.0679, + "step": 8300 + }, + { + "epoch": 0.62, + "learning_rate": 9.211459452943718e-05, + "loss": 17.995, + "step": 8350 + }, + { + "epoch": 0.62, + "learning_rate": 9.206436901688582e-05, + "loss": 17.467, + "step": 8400 + }, + { + "epoch": 0.62, + "learning_rate": 9.201414350433448e-05, + "loss": 18.6665, + "step": 8450 + }, + { + "epoch": 0.63, + "learning_rate": 9.196391799178311e-05, + "loss": 17.2848, + "step": 8500 + }, + { + "epoch": 0.63, + "learning_rate": 9.191369247923175e-05, + "loss": 14.4767, + "step": 8550 + }, + { + "epoch": 0.63, + "learning_rate": 9.18634669666804e-05, + "loss": 17.5444, + "step": 8600 + }, + { + "epoch": 0.64, + "learning_rate": 9.181324145412904e-05, + "loss": 14.4661, + "step": 8650 + }, + { + "epoch": 0.64, + "learning_rate": 9.176301594157768e-05, + "loss": 16.3339, + "step": 8700 + }, + { + "epoch": 0.65, + "learning_rate": 9.171279042902633e-05, + "loss": 17.5122, + "step": 8750 + }, + { + "epoch": 0.65, + "learning_rate": 9.166256491647499e-05, + "loss": 16.7631, + "step": 8800 + }, + { + "epoch": 0.65, + "learning_rate": 9.161233940392362e-05, + "loss": 16.5193, + "step": 8850 + }, + { + "epoch": 0.66, + "learning_rate": 9.156211389137227e-05, + "loss": 17.8364, + "step": 8900 + }, + { + "epoch": 0.66, + "learning_rate": 9.15118883788209e-05, + "loss": 16.2916, + "step": 8950 + }, + { + "epoch": 0.66, + "learning_rate": 9.146166286626956e-05, + "loss": 14.1719, + "step": 9000 + }, + { + "epoch": 0.67, + "learning_rate": 9.141143735371819e-05, + "loss": 18.2987, + "step": 9050 + }, + { + "epoch": 0.67, + "learning_rate": 9.136121184116684e-05, + "loss": 17.4248, + "step": 9100 + }, + { + "epoch": 0.67, + "learning_rate": 9.13109863286155e-05, + "loss": 16.1862, + "step": 9150 + }, + { + "epoch": 0.68, + "learning_rate": 9.126076081606412e-05, + "loss": 16.3134, + "step": 9200 + }, + { + "epoch": 0.68, + "learning_rate": 9.121053530351278e-05, + "loss": 14.9158, + "step": 9250 + }, + { + "epoch": 0.69, + "learning_rate": 9.116030979096141e-05, + "loss": 15.2504, + "step": 9300 + }, + { + "epoch": 0.69, + "learning_rate": 9.111008427841007e-05, + "loss": 14.1967, + "step": 9350 + }, + { + "epoch": 0.69, + "learning_rate": 9.105985876585871e-05, + "loss": 17.3165, + "step": 9400 + }, + { + "epoch": 0.7, + "learning_rate": 9.100963325330736e-05, + "loss": 14.5912, + "step": 9450 + }, + { + "epoch": 0.7, + "learning_rate": 9.0959407740756e-05, + "loss": 17.5593, + "step": 9500 + }, + { + "epoch": 0.7, + "learning_rate": 9.090918222820465e-05, + "loss": 16.3421, + "step": 9550 + }, + { + "epoch": 0.71, + "learning_rate": 9.085895671565329e-05, + "loss": 16.2821, + "step": 9600 + }, + { + "epoch": 0.71, + "learning_rate": 9.080873120310192e-05, + "loss": 16.4985, + "step": 9650 + }, + { + "epoch": 0.72, + "learning_rate": 9.075850569055058e-05, + "loss": 16.1138, + "step": 9700 + }, + { + "epoch": 0.72, + "learning_rate": 9.070828017799922e-05, + "loss": 16.3997, + "step": 9750 + }, + { + "epoch": 0.72, + "learning_rate": 9.065805466544787e-05, + "loss": 15.518, + "step": 9800 + }, + { + "epoch": 0.73, + "learning_rate": 9.060782915289651e-05, + "loss": 13.8424, + "step": 9850 + }, + { + "epoch": 0.73, + "learning_rate": 9.055760364034515e-05, + "loss": 15.0784, + "step": 9900 + }, + { + "epoch": 0.73, + "learning_rate": 9.05073781277938e-05, + "loss": 14.0163, + "step": 9950 + }, + { + "epoch": 0.74, + "learning_rate": 9.045715261524244e-05, + "loss": 16.7863, + "step": 10000 + }, + { + "epoch": 0.74, + "learning_rate": 9.040692710269109e-05, + "loss": 13.6715, + "step": 10050 + }, + { + "epoch": 0.75, + "learning_rate": 9.035670159013973e-05, + "loss": 15.1071, + "step": 10100 + }, + { + "epoch": 0.75, + "learning_rate": 9.030647607758837e-05, + "loss": 14.2658, + "step": 10150 + }, + { + "epoch": 0.75, + "learning_rate": 9.025625056503703e-05, + "loss": 15.1115, + "step": 10200 + }, + { + "epoch": 0.76, + "learning_rate": 9.020602505248566e-05, + "loss": 14.028, + "step": 10250 + }, + { + "epoch": 0.76, + "learning_rate": 9.015579953993431e-05, + "loss": 13.3066, + "step": 10300 + }, + { + "epoch": 0.76, + "learning_rate": 9.010557402738295e-05, + "loss": 14.1185, + "step": 10350 + }, + { + "epoch": 0.77, + "learning_rate": 9.00553485148316e-05, + "loss": 14.061, + "step": 10400 + }, + { + "epoch": 0.77, + "learning_rate": 9.000512300228024e-05, + "loss": 15.2439, + "step": 10450 + }, + { + "epoch": 0.77, + "learning_rate": 8.995489748972888e-05, + "loss": 13.3617, + "step": 10500 + }, + { + "epoch": 0.78, + "learning_rate": 8.990467197717754e-05, + "loss": 14.5514, + "step": 10550 + }, + { + "epoch": 0.78, + "learning_rate": 8.985444646462617e-05, + "loss": 15.2426, + "step": 10600 + }, + { + "epoch": 0.79, + "learning_rate": 8.980422095207483e-05, + "loss": 16.6418, + "step": 10650 + }, + { + "epoch": 0.79, + "learning_rate": 8.975399543952346e-05, + "loss": 13.3146, + "step": 10700 + }, + { + "epoch": 0.79, + "learning_rate": 8.970376992697212e-05, + "loss": 14.9333, + "step": 10750 + }, + { + "epoch": 0.8, + "learning_rate": 8.965354441442075e-05, + "loss": 14.4502, + "step": 10800 + }, + { + "epoch": 0.8, + "learning_rate": 8.960331890186939e-05, + "loss": 14.7886, + "step": 10850 + }, + { + "epoch": 0.8, + "learning_rate": 8.955309338931805e-05, + "loss": 15.0266, + "step": 10900 + }, + { + "epoch": 0.81, + "learning_rate": 8.950286787676668e-05, + "loss": 14.543, + "step": 10950 + }, + { + "epoch": 0.81, + "learning_rate": 8.945264236421534e-05, + "loss": 15.8078, + "step": 11000 + }, + { + "epoch": 0.82, + "learning_rate": 8.940241685166397e-05, + "loss": 13.6052, + "step": 11050 + }, + { + "epoch": 0.82, + "learning_rate": 8.935219133911263e-05, + "loss": 14.2995, + "step": 11100 + }, + { + "epoch": 0.82, + "learning_rate": 8.930196582656126e-05, + "loss": 15.732, + "step": 11150 + }, + { + "epoch": 0.83, + "learning_rate": 8.925174031400991e-05, + "loss": 14.0573, + "step": 11200 + }, + { + "epoch": 0.83, + "learning_rate": 8.920151480145856e-05, + "loss": 17.5941, + "step": 11250 + }, + { + "epoch": 0.83, + "learning_rate": 8.91512892889072e-05, + "loss": 14.7829, + "step": 11300 + }, + { + "epoch": 0.84, + "learning_rate": 8.910106377635585e-05, + "loss": 14.6669, + "step": 11350 + }, + { + "epoch": 0.84, + "learning_rate": 8.905083826380448e-05, + "loss": 14.3315, + "step": 11400 + }, + { + "epoch": 0.84, + "learning_rate": 8.900061275125313e-05, + "loss": 14.2639, + "step": 11450 + }, + { + "epoch": 0.85, + "learning_rate": 8.895038723870176e-05, + "loss": 14.3226, + "step": 11500 + }, + { + "epoch": 0.85, + "learning_rate": 8.890016172615042e-05, + "loss": 14.4975, + "step": 11550 + }, + { + "epoch": 0.86, + "learning_rate": 8.884993621359907e-05, + "loss": 14.8436, + "step": 11600 + }, + { + "epoch": 0.86, + "learning_rate": 8.879971070104771e-05, + "loss": 13.8481, + "step": 11650 + }, + { + "epoch": 0.86, + "learning_rate": 8.874948518849635e-05, + "loss": 12.8151, + "step": 11700 + }, + { + "epoch": 0.87, + "learning_rate": 8.8699259675945e-05, + "loss": 13.1659, + "step": 11750 + }, + { + "epoch": 0.87, + "learning_rate": 8.864903416339364e-05, + "loss": 15.0919, + "step": 11800 + }, + { + "epoch": 0.87, + "learning_rate": 8.859880865084229e-05, + "loss": 14.4382, + "step": 11850 + }, + { + "epoch": 0.88, + "learning_rate": 8.854858313829093e-05, + "loss": 14.0989, + "step": 11900 + }, + { + "epoch": 0.88, + "learning_rate": 8.849835762573957e-05, + "loss": 14.5763, + "step": 11950 + }, + { + "epoch": 0.89, + "learning_rate": 8.844813211318822e-05, + "loss": 13.4144, + "step": 12000 + }, + { + "epoch": 0.89, + "learning_rate": 8.839790660063686e-05, + "loss": 15.6018, + "step": 12050 + }, + { + "epoch": 0.89, + "learning_rate": 8.83476810880855e-05, + "loss": 14.7849, + "step": 12100 + }, + { + "epoch": 0.9, + "learning_rate": 8.829745557553415e-05, + "loss": 14.441, + "step": 12150 + }, + { + "epoch": 0.9, + "learning_rate": 8.82472300629828e-05, + "loss": 14.2135, + "step": 12200 + }, + { + "epoch": 0.9, + "learning_rate": 8.819700455043144e-05, + "loss": 17.1245, + "step": 12250 + }, + { + "epoch": 0.91, + "learning_rate": 8.814677903788008e-05, + "loss": 14.6629, + "step": 12300 + }, + { + "epoch": 0.91, + "learning_rate": 8.809655352532873e-05, + "loss": 16.6715, + "step": 12350 + }, + { + "epoch": 0.91, + "learning_rate": 8.804632801277738e-05, + "loss": 13.0133, + "step": 12400 + }, + { + "epoch": 0.92, + "learning_rate": 8.799610250022601e-05, + "loss": 14.1551, + "step": 12450 + }, + { + "epoch": 0.92, + "learning_rate": 8.794587698767466e-05, + "loss": 14.019, + "step": 12500 + }, + { + "epoch": 0.93, + "learning_rate": 8.78956514751233e-05, + "loss": 14.4279, + "step": 12550 + }, + { + "epoch": 0.93, + "learning_rate": 8.784542596257195e-05, + "loss": 12.5293, + "step": 12600 + }, + { + "epoch": 0.93, + "learning_rate": 8.77952004500206e-05, + "loss": 15.0403, + "step": 12650 + }, + { + "epoch": 0.94, + "learning_rate": 8.774497493746924e-05, + "loss": 13.8193, + "step": 12700 + }, + { + "epoch": 0.94, + "learning_rate": 8.769474942491789e-05, + "loss": 13.1564, + "step": 12750 + }, + { + "epoch": 0.94, + "learning_rate": 8.764452391236652e-05, + "loss": 14.6415, + "step": 12800 + }, + { + "epoch": 0.95, + "learning_rate": 8.759429839981518e-05, + "loss": 12.2339, + "step": 12850 + }, + { + "epoch": 0.95, + "learning_rate": 8.754407288726381e-05, + "loss": 12.1604, + "step": 12900 + }, + { + "epoch": 0.96, + "learning_rate": 8.749384737471247e-05, + "loss": 15.4939, + "step": 12950 + }, + { + "epoch": 0.96, + "learning_rate": 8.744362186216111e-05, + "loss": 13.9713, + "step": 13000 + }, + { + "epoch": 0.96, + "learning_rate": 8.739339634960976e-05, + "loss": 14.0986, + "step": 13050 + }, + { + "epoch": 0.97, + "learning_rate": 8.73431708370584e-05, + "loss": 13.6334, + "step": 13100 + }, + { + "epoch": 0.97, + "learning_rate": 8.729294532450703e-05, + "loss": 13.5201, + "step": 13150 + }, + { + "epoch": 0.97, + "learning_rate": 8.724271981195569e-05, + "loss": 14.3793, + "step": 13200 + }, + { + "epoch": 0.98, + "learning_rate": 8.719249429940432e-05, + "loss": 13.1741, + "step": 13250 + }, + { + "epoch": 0.98, + "learning_rate": 8.714226878685298e-05, + "loss": 11.7782, + "step": 13300 + }, + { + "epoch": 0.98, + "learning_rate": 8.709204327430162e-05, + "loss": 12.2758, + "step": 13350 + }, + { + "epoch": 0.99, + "learning_rate": 8.704181776175027e-05, + "loss": 13.1723, + "step": 13400 + }, + { + "epoch": 0.99, + "learning_rate": 8.699159224919891e-05, + "loss": 14.0858, + "step": 13450 + }, + { + "epoch": 1.0, + "learning_rate": 8.694136673664755e-05, + "loss": 11.2836, + "step": 13500 + }, + { + "epoch": 1.0, + "learning_rate": 8.68911412240962e-05, + "loss": 15.7226, + "step": 13550 + }, + { + "epoch": 1.0, + "learning_rate": 8.684091571154484e-05, + "loss": 15.8889, + "step": 13600 + }, + { + "epoch": 1.01, + "learning_rate": 8.679069019899349e-05, + "loss": 12.2185, + "step": 13650 + }, + { + "epoch": 1.01, + "learning_rate": 8.674046468644213e-05, + "loss": 11.4647, + "step": 13700 + }, + { + "epoch": 1.01, + "learning_rate": 8.669023917389077e-05, + "loss": 13.1238, + "step": 13750 + }, + { + "epoch": 1.02, + "learning_rate": 8.664001366133942e-05, + "loss": 11.909, + "step": 13800 + }, + { + "epoch": 1.02, + "learning_rate": 8.658978814878806e-05, + "loss": 12.5478, + "step": 13850 + }, + { + "epoch": 1.03, + "learning_rate": 8.65395626362367e-05, + "loss": 13.017, + "step": 13900 + }, + { + "epoch": 1.03, + "learning_rate": 8.648933712368535e-05, + "loss": 12.9134, + "step": 13950 + }, + { + "epoch": 1.03, + "learning_rate": 8.6439111611134e-05, + "loss": 13.3485, + "step": 14000 + }, + { + "epoch": 1.04, + "learning_rate": 8.638888609858264e-05, + "loss": 11.4706, + "step": 14050 + }, + { + "epoch": 1.04, + "learning_rate": 8.633866058603128e-05, + "loss": 11.1063, + "step": 14100 + }, + { + "epoch": 1.04, + "learning_rate": 8.628843507347994e-05, + "loss": 12.7408, + "step": 14150 + }, + { + "epoch": 1.05, + "learning_rate": 8.623820956092857e-05, + "loss": 12.0689, + "step": 14200 + }, + { + "epoch": 1.05, + "learning_rate": 8.618798404837721e-05, + "loss": 11.0724, + "step": 14250 + }, + { + "epoch": 1.05, + "learning_rate": 8.613775853582586e-05, + "loss": 12.5685, + "step": 14300 + }, + { + "epoch": 1.06, + "learning_rate": 8.60875330232745e-05, + "loss": 12.7776, + "step": 14350 + }, + { + "epoch": 1.06, + "learning_rate": 8.603730751072315e-05, + "loss": 11.3066, + "step": 14400 + }, + { + "epoch": 1.07, + "learning_rate": 8.598708199817179e-05, + "loss": 13.06, + "step": 14450 + }, + { + "epoch": 1.07, + "learning_rate": 8.593685648562045e-05, + "loss": 15.6523, + "step": 14500 + }, + { + "epoch": 1.07, + "learning_rate": 8.588663097306908e-05, + "loss": 12.019, + "step": 14550 + }, + { + "epoch": 1.08, + "learning_rate": 8.583640546051774e-05, + "loss": 11.0941, + "step": 14600 + }, + { + "epoch": 1.08, + "learning_rate": 8.578617994796637e-05, + "loss": 12.4755, + "step": 14650 + }, + { + "epoch": 1.08, + "learning_rate": 8.573595443541502e-05, + "loss": 13.7012, + "step": 14700 + }, + { + "epoch": 1.09, + "learning_rate": 8.568572892286366e-05, + "loss": 12.2024, + "step": 14750 + }, + { + "epoch": 1.09, + "learning_rate": 8.56355034103123e-05, + "loss": 12.4744, + "step": 14800 + }, + { + "epoch": 1.1, + "learning_rate": 8.558527789776096e-05, + "loss": 12.3234, + "step": 14850 + }, + { + "epoch": 1.1, + "learning_rate": 8.553505238520959e-05, + "loss": 12.5616, + "step": 14900 + }, + { + "epoch": 1.1, + "learning_rate": 8.548482687265824e-05, + "loss": 11.9559, + "step": 14950 + }, + { + "epoch": 1.11, + "learning_rate": 8.543460136010688e-05, + "loss": 12.0734, + "step": 15000 + }, + { + "epoch": 1.11, + "learning_rate": 8.538437584755553e-05, + "loss": 13.0341, + "step": 15050 + }, + { + "epoch": 1.11, + "learning_rate": 8.533415033500418e-05, + "loss": 12.7406, + "step": 15100 + }, + { + "epoch": 1.12, + "learning_rate": 8.528392482245282e-05, + "loss": 11.7258, + "step": 15150 + }, + { + "epoch": 1.12, + "learning_rate": 8.523369930990147e-05, + "loss": 11.8709, + "step": 15200 + }, + { + "epoch": 1.12, + "learning_rate": 8.518347379735011e-05, + "loss": 11.7021, + "step": 15250 + }, + { + "epoch": 1.13, + "learning_rate": 8.513324828479875e-05, + "loss": 13.2674, + "step": 15300 + }, + { + "epoch": 1.13, + "learning_rate": 8.508302277224738e-05, + "loss": 11.9099, + "step": 15350 + }, + { + "epoch": 1.14, + "learning_rate": 8.503279725969604e-05, + "loss": 11.7841, + "step": 15400 + }, + { + "epoch": 1.14, + "learning_rate": 8.498257174714469e-05, + "loss": 11.9573, + "step": 15450 + }, + { + "epoch": 1.14, + "learning_rate": 8.493234623459333e-05, + "loss": 11.7211, + "step": 15500 + }, + { + "epoch": 1.15, + "learning_rate": 8.488212072204197e-05, + "loss": 12.3513, + "step": 15550 + }, + { + "epoch": 1.15, + "learning_rate": 8.483189520949062e-05, + "loss": 11.0709, + "step": 15600 + }, + { + "epoch": 1.15, + "learning_rate": 8.478166969693926e-05, + "loss": 11.6544, + "step": 15650 + }, + { + "epoch": 1.16, + "learning_rate": 8.47314441843879e-05, + "loss": 11.8285, + "step": 15700 + }, + { + "epoch": 1.16, + "learning_rate": 8.468121867183655e-05, + "loss": 10.4208, + "step": 15750 + }, + { + "epoch": 1.17, + "learning_rate": 8.46309931592852e-05, + "loss": 10.7821, + "step": 15800 + }, + { + "epoch": 1.17, + "learning_rate": 8.458076764673384e-05, + "loss": 13.2724, + "step": 15850 + }, + { + "epoch": 1.17, + "learning_rate": 8.45305421341825e-05, + "loss": 10.9219, + "step": 15900 + }, + { + "epoch": 1.18, + "learning_rate": 8.448031662163113e-05, + "loss": 12.2532, + "step": 15950 + }, + { + "epoch": 1.18, + "learning_rate": 8.443009110907977e-05, + "loss": 11.0132, + "step": 16000 + }, + { + "epoch": 1.18, + "learning_rate": 8.437986559652841e-05, + "loss": 12.319, + "step": 16050 + }, + { + "epoch": 1.19, + "learning_rate": 8.432964008397706e-05, + "loss": 12.9871, + "step": 16100 + }, + { + "epoch": 1.19, + "learning_rate": 8.42794145714257e-05, + "loss": 12.0625, + "step": 16150 + }, + { + "epoch": 1.19, + "learning_rate": 8.422918905887435e-05, + "loss": 13.4629, + "step": 16200 + }, + { + "epoch": 1.2, + "learning_rate": 8.4178963546323e-05, + "loss": 10.9291, + "step": 16250 + }, + { + "epoch": 1.2, + "learning_rate": 8.412873803377163e-05, + "loss": 13.7719, + "step": 16300 + }, + { + "epoch": 1.21, + "learning_rate": 8.407851252122029e-05, + "loss": 11.3634, + "step": 16350 + }, + { + "epoch": 1.21, + "learning_rate": 8.402828700866892e-05, + "loss": 12.7941, + "step": 16400 + }, + { + "epoch": 1.21, + "learning_rate": 8.397806149611758e-05, + "loss": 11.8863, + "step": 16450 + }, + { + "epoch": 1.22, + "learning_rate": 8.392783598356621e-05, + "loss": 9.5225, + "step": 16500 + }, + { + "epoch": 1.22, + "learning_rate": 8.387761047101485e-05, + "loss": 12.983, + "step": 16550 + }, + { + "epoch": 1.22, + "learning_rate": 8.382738495846351e-05, + "loss": 11.8489, + "step": 16600 + }, + { + "epoch": 1.23, + "learning_rate": 8.377715944591214e-05, + "loss": 11.8122, + "step": 16650 + }, + { + "epoch": 1.23, + "learning_rate": 8.37269339333608e-05, + "loss": 12.3387, + "step": 16700 + }, + { + "epoch": 1.24, + "learning_rate": 8.367670842080943e-05, + "loss": 13.4648, + "step": 16750 + }, + { + "epoch": 1.24, + "learning_rate": 8.362648290825809e-05, + "loss": 10.2301, + "step": 16800 + }, + { + "epoch": 1.24, + "learning_rate": 8.357625739570672e-05, + "loss": 11.492, + "step": 16850 + }, + { + "epoch": 1.25, + "learning_rate": 8.352603188315538e-05, + "loss": 12.5997, + "step": 16900 + }, + { + "epoch": 1.25, + "learning_rate": 8.347580637060402e-05, + "loss": 11.5588, + "step": 16950 + }, + { + "epoch": 1.25, + "learning_rate": 8.342558085805266e-05, + "loss": 11.8627, + "step": 17000 + }, + { + "epoch": 1.26, + "learning_rate": 8.337535534550131e-05, + "loss": 13.2469, + "step": 17050 + }, + { + "epoch": 1.26, + "learning_rate": 8.332512983294994e-05, + "loss": 10.4327, + "step": 17100 + }, + { + "epoch": 1.27, + "learning_rate": 8.32749043203986e-05, + "loss": 12.7566, + "step": 17150 + }, + { + "epoch": 1.27, + "learning_rate": 8.322467880784723e-05, + "loss": 11.0729, + "step": 17200 + }, + { + "epoch": 1.27, + "learning_rate": 8.317445329529588e-05, + "loss": 12.3484, + "step": 17250 + }, + { + "epoch": 1.28, + "learning_rate": 8.312422778274453e-05, + "loss": 10.5193, + "step": 17300 + }, + { + "epoch": 1.28, + "learning_rate": 8.307400227019317e-05, + "loss": 12.2369, + "step": 17350 + }, + { + "epoch": 1.28, + "learning_rate": 8.302377675764182e-05, + "loss": 12.2976, + "step": 17400 + }, + { + "epoch": 1.29, + "learning_rate": 8.297355124509046e-05, + "loss": 12.3852, + "step": 17450 + }, + { + "epoch": 1.29, + "learning_rate": 8.29233257325391e-05, + "loss": 11.2137, + "step": 17500 + }, + { + "epoch": 1.29, + "learning_rate": 8.287310021998775e-05, + "loss": 11.609, + "step": 17550 + }, + { + "epoch": 1.3, + "learning_rate": 8.282287470743639e-05, + "loss": 13.3339, + "step": 17600 + }, + { + "epoch": 1.3, + "learning_rate": 8.277264919488504e-05, + "loss": 11.4263, + "step": 17650 + }, + { + "epoch": 1.31, + "learning_rate": 8.272242368233368e-05, + "loss": 12.6949, + "step": 17700 + }, + { + "epoch": 1.31, + "learning_rate": 8.267219816978233e-05, + "loss": 11.4767, + "step": 17750 + }, + { + "epoch": 1.31, + "learning_rate": 8.262197265723097e-05, + "loss": 12.2225, + "step": 17800 + }, + { + "epoch": 1.32, + "learning_rate": 8.257174714467961e-05, + "loss": 11.0755, + "step": 17850 + }, + { + "epoch": 1.32, + "learning_rate": 8.252152163212826e-05, + "loss": 11.9677, + "step": 17900 + }, + { + "epoch": 1.32, + "learning_rate": 8.24712961195769e-05, + "loss": 11.098, + "step": 17950 + }, + { + "epoch": 1.33, + "learning_rate": 8.242107060702555e-05, + "loss": 11.1102, + "step": 18000 + }, + { + "epoch": 1.33, + "learning_rate": 8.237084509447419e-05, + "loss": 11.4985, + "step": 18050 + }, + { + "epoch": 1.34, + "learning_rate": 8.232061958192285e-05, + "loss": 11.7356, + "step": 18100 + }, + { + "epoch": 1.34, + "learning_rate": 8.227039406937148e-05, + "loss": 11.3336, + "step": 18150 + }, + { + "epoch": 1.34, + "learning_rate": 8.222016855682012e-05, + "loss": 11.0448, + "step": 18200 + }, + { + "epoch": 1.35, + "learning_rate": 8.216994304426877e-05, + "loss": 10.9986, + "step": 18250 + }, + { + "epoch": 1.35, + "learning_rate": 8.211971753171741e-05, + "loss": 10.768, + "step": 18300 + }, + { + "epoch": 1.35, + "learning_rate": 8.206949201916607e-05, + "loss": 11.6844, + "step": 18350 + }, + { + "epoch": 1.36, + "learning_rate": 8.20192665066147e-05, + "loss": 11.5615, + "step": 18400 + }, + { + "epoch": 1.36, + "learning_rate": 8.196904099406336e-05, + "loss": 11.4019, + "step": 18450 + }, + { + "epoch": 1.36, + "learning_rate": 8.191881548151199e-05, + "loss": 12.1784, + "step": 18500 + }, + { + "epoch": 1.37, + "learning_rate": 8.186858996896064e-05, + "loss": 12.4565, + "step": 18550 + }, + { + "epoch": 1.37, + "learning_rate": 8.181836445640927e-05, + "loss": 11.0557, + "step": 18600 + }, + { + "epoch": 1.38, + "learning_rate": 8.176813894385793e-05, + "loss": 12.1892, + "step": 18650 + }, + { + "epoch": 1.38, + "learning_rate": 8.171791343130658e-05, + "loss": 12.0531, + "step": 18700 + }, + { + "epoch": 1.38, + "learning_rate": 8.166768791875522e-05, + "loss": 10.1791, + "step": 18750 + }, + { + "epoch": 1.39, + "learning_rate": 8.161746240620386e-05, + "loss": 11.2501, + "step": 18800 + }, + { + "epoch": 1.39, + "learning_rate": 8.15672368936525e-05, + "loss": 9.92, + "step": 18850 + }, + { + "epoch": 1.39, + "learning_rate": 8.151701138110115e-05, + "loss": 10.0603, + "step": 18900 + }, + { + "epoch": 1.4, + "learning_rate": 8.146678586854978e-05, + "loss": 10.9477, + "step": 18950 + }, + { + "epoch": 1.4, + "learning_rate": 8.141656035599844e-05, + "loss": 9.7579, + "step": 19000 + }, + { + "epoch": 1.41, + "learning_rate": 8.136633484344708e-05, + "loss": 11.243, + "step": 19050 + }, + { + "epoch": 1.41, + "learning_rate": 8.131610933089573e-05, + "loss": 11.0069, + "step": 19100 + }, + { + "epoch": 1.41, + "learning_rate": 8.126588381834437e-05, + "loss": 9.7387, + "step": 19150 + }, + { + "epoch": 1.42, + "learning_rate": 8.121565830579302e-05, + "loss": 11.4624, + "step": 19200 + }, + { + "epoch": 1.42, + "learning_rate": 8.116543279324166e-05, + "loss": 12.1299, + "step": 19250 + }, + { + "epoch": 1.42, + "learning_rate": 8.11152072806903e-05, + "loss": 12.2796, + "step": 19300 + }, + { + "epoch": 1.43, + "learning_rate": 8.106498176813895e-05, + "loss": 10.3295, + "step": 19350 + }, + { + "epoch": 1.43, + "learning_rate": 8.101475625558759e-05, + "loss": 10.0709, + "step": 19400 + }, + { + "epoch": 1.43, + "learning_rate": 8.096453074303624e-05, + "loss": 11.0725, + "step": 19450 + }, + { + "epoch": 1.44, + "learning_rate": 8.091430523048488e-05, + "loss": 10.7882, + "step": 19500 + }, + { + "epoch": 1.44, + "learning_rate": 8.086407971793352e-05, + "loss": 11.4124, + "step": 19550 + }, + { + "epoch": 1.45, + "learning_rate": 8.081385420538217e-05, + "loss": 10.4941, + "step": 19600 + }, + { + "epoch": 1.45, + "learning_rate": 8.076362869283081e-05, + "loss": 11.8687, + "step": 19650 + }, + { + "epoch": 1.45, + "learning_rate": 8.071340318027946e-05, + "loss": 11.3221, + "step": 19700 + }, + { + "epoch": 1.46, + "learning_rate": 8.06631776677281e-05, + "loss": 10.2167, + "step": 19750 + }, + { + "epoch": 1.46, + "learning_rate": 8.061295215517675e-05, + "loss": 10.5425, + "step": 19800 + }, + { + "epoch": 1.46, + "learning_rate": 8.05627266426254e-05, + "loss": 11.2982, + "step": 19850 + }, + { + "epoch": 1.47, + "learning_rate": 8.051250113007403e-05, + "loss": 12.0685, + "step": 19900 + }, + { + "epoch": 1.47, + "learning_rate": 8.046227561752268e-05, + "loss": 10.6613, + "step": 19950 + }, + { + "epoch": 1.48, + "learning_rate": 8.041205010497132e-05, + "loss": 10.8245, + "step": 20000 + }, + { + "epoch": 1.48, + "eval_loss": 10.409339904785156, + "eval_runtime": 890.9956, + "eval_samples_per_second": 14.7, + "eval_steps_per_second": 3.676, + "eval_wer": 0.2624627273109067, + "step": 20000 + } + ], + "max_steps": 100051, + "num_train_epochs": 8, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-20000/training_args.bin b/checkpoint-20000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dbc064046a36220dd960e955c565bc3e2c9e3abd --- /dev/null +++ b/checkpoint-20000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b64c669f66dd7a2e54d3001ce7e31c26cc60dd58136e8ce90e6055bd0ae15eb +size 3503 diff --git a/checkpoint-40000/optimizer.pt b/checkpoint-40000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a63e28483d5d1ced8e247e86e31e1275fc194be --- /dev/null +++ b/checkpoint-40000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1c09a7ddf632fa2b5485de6d094cf8a763affbefcb8dc5c93001a0539bad686 +size 5154563651 diff --git a/checkpoint-40000/rng_state.pth b/checkpoint-40000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..734e853fc9655555507608b2d561873946d35db7 --- /dev/null +++ b/checkpoint-40000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31b1895952e1807b396d4e924fa1fb61ed026336fa2d9b568b14c899ec1ae878 +size 14503 diff --git a/checkpoint-40000/scheduler.pt b/checkpoint-40000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3846c4fabc9a728e1fc52d52d71d1a952f94772b --- /dev/null +++ b/checkpoint-40000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b81037f0665e42c49d437ecf24e1e38406f2a8f8a1c463379f77ea33597052a +size 623 diff --git a/checkpoint-40000/stt_en_conformer_transducer_xlarge.nemo b/checkpoint-40000/stt_en_conformer_transducer_xlarge.nemo new file mode 100644 index 0000000000000000000000000000000000000000..b2243c1fe30e7402b6bf34d068d9f703c3f50f9b --- /dev/null +++ b/checkpoint-40000/stt_en_conformer_transducer_xlarge.nemo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c822e20c23a0eb709dc03222743ce215a42db9863af172c34297cd8c402f9e4 +size 2577971200 diff --git a/checkpoint-40000/trainer_state.json b/checkpoint-40000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..63066814e96aa88c77bd940cfc9a3435bf3ea3b4 --- /dev/null +++ b/checkpoint-40000/trainer_state.json @@ -0,0 +1,4834 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.950505274028177, + "global_step": 40000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 178.9465, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 164.9707, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 3e-05, + "loss": 142.2782, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4e-05, + "loss": 121.5122, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 91.8622, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 6e-05, + "loss": 82.2062, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 7e-05, + "loss": 72.6893, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 8e-05, + "loss": 71.8709, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 9e-05, + "loss": 69.9995, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001, + "loss": 70.6458, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 9.994977448744865e-05, + "loss": 73.9929, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 9.989954897489729e-05, + "loss": 66.52, + "step": 600 + }, + { + "epoch": 0.05, + "learning_rate": 9.984932346234594e-05, + "loss": 65.8947, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 9.979909794979458e-05, + "loss": 62.5809, + "step": 700 + }, + { + "epoch": 0.06, + "learning_rate": 9.974887243724323e-05, + "loss": 61.212, + "step": 750 + }, + { + "epoch": 0.06, + "learning_rate": 9.969864692469187e-05, + "loss": 68.2408, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 9.964842141214051e-05, + "loss": 61.5308, + "step": 850 + }, + { + "epoch": 0.07, + "learning_rate": 9.959819589958916e-05, + "loss": 58.9116, + "step": 900 + }, + { + "epoch": 0.07, + "learning_rate": 9.95479703870378e-05, + "loss": 60.0702, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 9.949774487448646e-05, + "loss": 57.6135, + "step": 1000 + }, + { + "epoch": 0.08, + "learning_rate": 9.944751936193509e-05, + "loss": 50.9231, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 9.939729384938373e-05, + "loss": 51.187, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 9.934706833683238e-05, + "loss": 52.1127, + "step": 1150 + }, + { + "epoch": 0.09, + "learning_rate": 9.929684282428102e-05, + "loss": 47.4608, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 9.924661731172968e-05, + "loss": 51.6108, + "step": 1250 + }, + { + "epoch": 0.1, + "learning_rate": 9.919639179917831e-05, + "loss": 46.5874, + "step": 1300 + }, + { + "epoch": 0.1, + "learning_rate": 9.914616628662697e-05, + "loss": 41.4706, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 9.90959407740756e-05, + "loss": 43.7544, + "step": 1400 + }, + { + "epoch": 0.11, + "learning_rate": 9.904571526152426e-05, + "loss": 44.6039, + "step": 1450 + }, + { + "epoch": 0.11, + "learning_rate": 9.899548974897289e-05, + "loss": 41.4384, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 9.894526423642154e-05, + "loss": 42.8289, + "step": 1550 + }, + { + "epoch": 0.12, + "learning_rate": 9.889503872387019e-05, + "loss": 39.9726, + "step": 1600 + }, + { + "epoch": 0.12, + "learning_rate": 9.884481321131882e-05, + "loss": 43.9533, + "step": 1650 + }, + { + "epoch": 0.13, + "learning_rate": 9.879458769876748e-05, + "loss": 38.7605, + "step": 1700 + }, + { + "epoch": 0.13, + "learning_rate": 9.87443621862161e-05, + "loss": 39.5425, + "step": 1750 + }, + { + "epoch": 0.13, + "learning_rate": 9.869413667366476e-05, + "loss": 37.588, + "step": 1800 + }, + { + "epoch": 0.14, + "learning_rate": 9.86439111611134e-05, + "loss": 39.7744, + "step": 1850 + }, + { + "epoch": 0.14, + "learning_rate": 9.859368564856205e-05, + "loss": 38.2154, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 9.85434601360107e-05, + "loss": 35.0806, + "step": 1950 + }, + { + "epoch": 0.15, + "learning_rate": 9.849323462345934e-05, + "loss": 39.061, + "step": 2000 + }, + { + "epoch": 0.15, + "learning_rate": 9.844300911090798e-05, + "loss": 35.1544, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 9.839278359835663e-05, + "loss": 38.123, + "step": 2100 + }, + { + "epoch": 0.16, + "learning_rate": 9.834255808580527e-05, + "loss": 33.1144, + "step": 2150 + }, + { + "epoch": 0.16, + "learning_rate": 9.829233257325392e-05, + "loss": 34.3476, + "step": 2200 + }, + { + "epoch": 0.17, + "learning_rate": 9.824210706070256e-05, + "loss": 29.5665, + "step": 2250 + }, + { + "epoch": 0.17, + "learning_rate": 9.81918815481512e-05, + "loss": 35.8756, + "step": 2300 + }, + { + "epoch": 0.17, + "learning_rate": 9.814165603559985e-05, + "loss": 37.2579, + "step": 2350 + }, + { + "epoch": 0.18, + "learning_rate": 9.809143052304849e-05, + "loss": 33.6245, + "step": 2400 + }, + { + "epoch": 0.18, + "learning_rate": 9.804120501049714e-05, + "loss": 35.6543, + "step": 2450 + }, + { + "epoch": 0.18, + "learning_rate": 9.799097949794578e-05, + "loss": 36.7847, + "step": 2500 + }, + { + "epoch": 0.19, + "learning_rate": 9.794075398539442e-05, + "loss": 33.463, + "step": 2550 + }, + { + "epoch": 0.19, + "learning_rate": 9.789052847284307e-05, + "loss": 32.2215, + "step": 2600 + }, + { + "epoch": 0.2, + "learning_rate": 9.784030296029171e-05, + "loss": 33.4301, + "step": 2650 + }, + { + "epoch": 0.2, + "learning_rate": 9.779007744774036e-05, + "loss": 29.9579, + "step": 2700 + }, + { + "epoch": 0.2, + "learning_rate": 9.773985193518901e-05, + "loss": 31.9141, + "step": 2750 + }, + { + "epoch": 0.21, + "learning_rate": 9.768962642263764e-05, + "loss": 33.2049, + "step": 2800 + }, + { + "epoch": 0.21, + "learning_rate": 9.763940091008629e-05, + "loss": 32.8774, + "step": 2850 + }, + { + "epoch": 0.21, + "learning_rate": 9.758917539753493e-05, + "loss": 29.0858, + "step": 2900 + }, + { + "epoch": 0.22, + "learning_rate": 9.753894988498358e-05, + "loss": 30.1145, + "step": 2950 + }, + { + "epoch": 0.22, + "learning_rate": 9.748872437243222e-05, + "loss": 27.6986, + "step": 3000 + }, + { + "epoch": 0.22, + "learning_rate": 9.743849885988087e-05, + "loss": 31.7807, + "step": 3050 + }, + { + "epoch": 0.23, + "learning_rate": 9.738827334732952e-05, + "loss": 30.5108, + "step": 3100 + }, + { + "epoch": 0.23, + "learning_rate": 9.733804783477815e-05, + "loss": 31.0909, + "step": 3150 + }, + { + "epoch": 0.24, + "learning_rate": 9.728782232222681e-05, + "loss": 27.9057, + "step": 3200 + }, + { + "epoch": 0.24, + "learning_rate": 9.723759680967544e-05, + "loss": 29.7323, + "step": 3250 + }, + { + "epoch": 0.24, + "learning_rate": 9.71873712971241e-05, + "loss": 29.7527, + "step": 3300 + }, + { + "epoch": 0.25, + "learning_rate": 9.713714578457273e-05, + "loss": 29.1442, + "step": 3350 + }, + { + "epoch": 0.25, + "learning_rate": 9.708692027202137e-05, + "loss": 30.8906, + "step": 3400 + }, + { + "epoch": 0.25, + "learning_rate": 9.703669475947003e-05, + "loss": 26.8419, + "step": 3450 + }, + { + "epoch": 0.26, + "learning_rate": 9.698646924691866e-05, + "loss": 29.2181, + "step": 3500 + }, + { + "epoch": 0.26, + "learning_rate": 9.693624373436732e-05, + "loss": 27.6549, + "step": 3550 + }, + { + "epoch": 0.27, + "learning_rate": 9.688601822181595e-05, + "loss": 34.0701, + "step": 3600 + }, + { + "epoch": 0.27, + "learning_rate": 9.683579270926461e-05, + "loss": 24.7487, + "step": 3650 + }, + { + "epoch": 0.27, + "learning_rate": 9.678556719671325e-05, + "loss": 30.0266, + "step": 3700 + }, + { + "epoch": 0.28, + "learning_rate": 9.67353416841619e-05, + "loss": 25.5011, + "step": 3750 + }, + { + "epoch": 0.28, + "learning_rate": 9.668511617161054e-05, + "loss": 26.1437, + "step": 3800 + }, + { + "epoch": 0.28, + "learning_rate": 9.663489065905918e-05, + "loss": 23.2303, + "step": 3850 + }, + { + "epoch": 0.29, + "learning_rate": 9.658466514650783e-05, + "loss": 26.357, + "step": 3900 + }, + { + "epoch": 0.29, + "learning_rate": 9.653443963395646e-05, + "loss": 27.2201, + "step": 3950 + }, + { + "epoch": 0.3, + "learning_rate": 9.648421412140512e-05, + "loss": 25.5695, + "step": 4000 + }, + { + "epoch": 0.3, + "learning_rate": 9.643398860885376e-05, + "loss": 24.8346, + "step": 4050 + }, + { + "epoch": 0.3, + "learning_rate": 9.63837630963024e-05, + "loss": 22.3957, + "step": 4100 + }, + { + "epoch": 0.31, + "learning_rate": 9.633353758375105e-05, + "loss": 24.9532, + "step": 4150 + }, + { + "epoch": 0.31, + "learning_rate": 9.628331207119969e-05, + "loss": 23.1574, + "step": 4200 + }, + { + "epoch": 0.31, + "learning_rate": 9.623308655864834e-05, + "loss": 23.7018, + "step": 4250 + }, + { + "epoch": 0.32, + "learning_rate": 9.618286104609698e-05, + "loss": 25.1433, + "step": 4300 + }, + { + "epoch": 0.32, + "learning_rate": 9.613263553354562e-05, + "loss": 25.0571, + "step": 4350 + }, + { + "epoch": 0.32, + "learning_rate": 9.608241002099427e-05, + "loss": 24.2231, + "step": 4400 + }, + { + "epoch": 0.33, + "learning_rate": 9.603218450844291e-05, + "loss": 23.0983, + "step": 4450 + }, + { + "epoch": 0.33, + "learning_rate": 9.598195899589156e-05, + "loss": 25.0078, + "step": 4500 + }, + { + "epoch": 0.34, + "learning_rate": 9.59317334833402e-05, + "loss": 20.6933, + "step": 4550 + }, + { + "epoch": 0.34, + "learning_rate": 9.588150797078884e-05, + "loss": 23.6196, + "step": 4600 + }, + { + "epoch": 0.34, + "learning_rate": 9.583128245823749e-05, + "loss": 25.2331, + "step": 4650 + }, + { + "epoch": 0.35, + "learning_rate": 9.578105694568613e-05, + "loss": 24.7932, + "step": 4700 + }, + { + "epoch": 0.35, + "learning_rate": 9.573083143313478e-05, + "loss": 24.3586, + "step": 4750 + }, + { + "epoch": 0.35, + "learning_rate": 9.568060592058342e-05, + "loss": 22.7161, + "step": 4800 + }, + { + "epoch": 0.36, + "learning_rate": 9.563038040803208e-05, + "loss": 22.4188, + "step": 4850 + }, + { + "epoch": 0.36, + "learning_rate": 9.558015489548071e-05, + "loss": 21.6516, + "step": 4900 + }, + { + "epoch": 0.37, + "learning_rate": 9.552992938292937e-05, + "loss": 21.78, + "step": 4950 + }, + { + "epoch": 0.37, + "learning_rate": 9.5479703870378e-05, + "loss": 21.0172, + "step": 5000 + }, + { + "epoch": 0.37, + "learning_rate": 9.542947835782665e-05, + "loss": 22.4624, + "step": 5050 + }, + { + "epoch": 0.38, + "learning_rate": 9.537925284527528e-05, + "loss": 23.6615, + "step": 5100 + }, + { + "epoch": 0.38, + "learning_rate": 9.532902733272393e-05, + "loss": 21.8091, + "step": 5150 + }, + { + "epoch": 0.38, + "learning_rate": 9.527880182017259e-05, + "loss": 21.4173, + "step": 5200 + }, + { + "epoch": 0.39, + "learning_rate": 9.522857630762122e-05, + "loss": 20.5415, + "step": 5250 + }, + { + "epoch": 0.39, + "learning_rate": 9.517835079506987e-05, + "loss": 21.0639, + "step": 5300 + }, + { + "epoch": 0.39, + "learning_rate": 9.51281252825185e-05, + "loss": 21.6078, + "step": 5350 + }, + { + "epoch": 0.4, + "learning_rate": 9.507789976996716e-05, + "loss": 19.4142, + "step": 5400 + }, + { + "epoch": 0.4, + "learning_rate": 9.50276742574158e-05, + "loss": 20.2504, + "step": 5450 + }, + { + "epoch": 0.41, + "learning_rate": 9.497744874486445e-05, + "loss": 23.8683, + "step": 5500 + }, + { + "epoch": 0.41, + "learning_rate": 9.49272232323131e-05, + "loss": 19.7559, + "step": 5550 + }, + { + "epoch": 0.41, + "learning_rate": 9.487699771976174e-05, + "loss": 21.1743, + "step": 5600 + }, + { + "epoch": 0.42, + "learning_rate": 9.482677220721038e-05, + "loss": 21.1908, + "step": 5650 + }, + { + "epoch": 0.42, + "learning_rate": 9.477654669465901e-05, + "loss": 20.9591, + "step": 5700 + }, + { + "epoch": 0.42, + "learning_rate": 9.472632118210767e-05, + "loss": 20.9036, + "step": 5750 + }, + { + "epoch": 0.43, + "learning_rate": 9.46760956695563e-05, + "loss": 22.249, + "step": 5800 + }, + { + "epoch": 0.43, + "learning_rate": 9.462587015700496e-05, + "loss": 19.1093, + "step": 5850 + }, + { + "epoch": 0.44, + "learning_rate": 9.45756446444536e-05, + "loss": 21.2714, + "step": 5900 + }, + { + "epoch": 0.44, + "learning_rate": 9.452541913190225e-05, + "loss": 21.3794, + "step": 5950 + }, + { + "epoch": 0.44, + "learning_rate": 9.447519361935089e-05, + "loss": 20.0326, + "step": 6000 + }, + { + "epoch": 0.45, + "learning_rate": 9.442496810679954e-05, + "loss": 19.8004, + "step": 6050 + }, + { + "epoch": 0.45, + "learning_rate": 9.437474259424818e-05, + "loss": 19.0229, + "step": 6100 + }, + { + "epoch": 0.45, + "learning_rate": 9.432451708169682e-05, + "loss": 17.6587, + "step": 6150 + }, + { + "epoch": 0.46, + "learning_rate": 9.427429156914547e-05, + "loss": 21.9247, + "step": 6200 + }, + { + "epoch": 0.46, + "learning_rate": 9.422406605659411e-05, + "loss": 19.743, + "step": 6250 + }, + { + "epoch": 0.46, + "learning_rate": 9.417384054404276e-05, + "loss": 22.9746, + "step": 6300 + }, + { + "epoch": 0.47, + "learning_rate": 9.41236150314914e-05, + "loss": 19.6693, + "step": 6350 + }, + { + "epoch": 0.47, + "learning_rate": 9.407338951894004e-05, + "loss": 19.1141, + "step": 6400 + }, + { + "epoch": 0.48, + "learning_rate": 9.402316400638869e-05, + "loss": 18.3847, + "step": 6450 + }, + { + "epoch": 0.48, + "learning_rate": 9.397293849383733e-05, + "loss": 18.9357, + "step": 6500 + }, + { + "epoch": 0.48, + "learning_rate": 9.392271298128598e-05, + "loss": 18.9316, + "step": 6550 + }, + { + "epoch": 0.49, + "learning_rate": 9.387248746873462e-05, + "loss": 20.9141, + "step": 6600 + }, + { + "epoch": 0.49, + "learning_rate": 9.382226195618326e-05, + "loss": 18.7472, + "step": 6650 + }, + { + "epoch": 0.49, + "learning_rate": 9.377203644363192e-05, + "loss": 18.8577, + "step": 6700 + }, + { + "epoch": 0.5, + "learning_rate": 9.372181093108055e-05, + "loss": 17.8061, + "step": 6750 + }, + { + "epoch": 0.5, + "learning_rate": 9.36715854185292e-05, + "loss": 19.4687, + "step": 6800 + }, + { + "epoch": 0.51, + "learning_rate": 9.362135990597784e-05, + "loss": 19.5103, + "step": 6850 + }, + { + "epoch": 0.51, + "learning_rate": 9.357113439342648e-05, + "loss": 18.5319, + "step": 6900 + }, + { + "epoch": 0.51, + "learning_rate": 9.352090888087514e-05, + "loss": 20.16, + "step": 6950 + }, + { + "epoch": 0.52, + "learning_rate": 9.347068336832377e-05, + "loss": 18.1913, + "step": 7000 + }, + { + "epoch": 0.52, + "learning_rate": 9.342045785577243e-05, + "loss": 21.341, + "step": 7050 + }, + { + "epoch": 0.52, + "learning_rate": 9.337023234322106e-05, + "loss": 16.7701, + "step": 7100 + }, + { + "epoch": 0.53, + "learning_rate": 9.332000683066972e-05, + "loss": 18.045, + "step": 7150 + }, + { + "epoch": 0.53, + "learning_rate": 9.326978131811835e-05, + "loss": 16.0393, + "step": 7200 + }, + { + "epoch": 0.53, + "learning_rate": 9.3219555805567e-05, + "loss": 17.4833, + "step": 7250 + }, + { + "epoch": 0.54, + "learning_rate": 9.316933029301565e-05, + "loss": 17.3978, + "step": 7300 + }, + { + "epoch": 0.54, + "learning_rate": 9.31191047804643e-05, + "loss": 18.2649, + "step": 7350 + }, + { + "epoch": 0.55, + "learning_rate": 9.306887926791294e-05, + "loss": 16.3891, + "step": 7400 + }, + { + "epoch": 0.55, + "learning_rate": 9.301865375536157e-05, + "loss": 21.4399, + "step": 7450 + }, + { + "epoch": 0.55, + "learning_rate": 9.296842824281023e-05, + "loss": 16.3082, + "step": 7500 + }, + { + "epoch": 0.56, + "learning_rate": 9.291820273025886e-05, + "loss": 14.8713, + "step": 7550 + }, + { + "epoch": 0.56, + "learning_rate": 9.286797721770751e-05, + "loss": 16.3099, + "step": 7600 + }, + { + "epoch": 0.56, + "learning_rate": 9.281775170515616e-05, + "loss": 17.8771, + "step": 7650 + }, + { + "epoch": 0.57, + "learning_rate": 9.27675261926048e-05, + "loss": 17.1421, + "step": 7700 + }, + { + "epoch": 0.57, + "learning_rate": 9.271730068005345e-05, + "loss": 16.6478, + "step": 7750 + }, + { + "epoch": 0.58, + "learning_rate": 9.266707516750209e-05, + "loss": 15.3247, + "step": 7800 + }, + { + "epoch": 0.58, + "learning_rate": 9.261684965495073e-05, + "loss": 17.6577, + "step": 7850 + }, + { + "epoch": 0.58, + "learning_rate": 9.256662414239938e-05, + "loss": 18.8549, + "step": 7900 + }, + { + "epoch": 0.59, + "learning_rate": 9.251639862984802e-05, + "loss": 17.4187, + "step": 7950 + }, + { + "epoch": 0.59, + "learning_rate": 9.246617311729667e-05, + "loss": 15.6643, + "step": 8000 + }, + { + "epoch": 0.59, + "learning_rate": 9.241594760474531e-05, + "loss": 17.1987, + "step": 8050 + }, + { + "epoch": 0.6, + "learning_rate": 9.236572209219396e-05, + "loss": 18.1712, + "step": 8100 + }, + { + "epoch": 0.6, + "learning_rate": 9.23154965796426e-05, + "loss": 15.8015, + "step": 8150 + }, + { + "epoch": 0.6, + "learning_rate": 9.226527106709124e-05, + "loss": 19.064, + "step": 8200 + }, + { + "epoch": 0.61, + "learning_rate": 9.221504555453989e-05, + "loss": 18.2748, + "step": 8250 + }, + { + "epoch": 0.61, + "learning_rate": 9.216482004198853e-05, + "loss": 15.0679, + "step": 8300 + }, + { + "epoch": 0.62, + "learning_rate": 9.211459452943718e-05, + "loss": 17.995, + "step": 8350 + }, + { + "epoch": 0.62, + "learning_rate": 9.206436901688582e-05, + "loss": 17.467, + "step": 8400 + }, + { + "epoch": 0.62, + "learning_rate": 9.201414350433448e-05, + "loss": 18.6665, + "step": 8450 + }, + { + "epoch": 0.63, + "learning_rate": 9.196391799178311e-05, + "loss": 17.2848, + "step": 8500 + }, + { + "epoch": 0.63, + "learning_rate": 9.191369247923175e-05, + "loss": 14.4767, + "step": 8550 + }, + { + "epoch": 0.63, + "learning_rate": 9.18634669666804e-05, + "loss": 17.5444, + "step": 8600 + }, + { + "epoch": 0.64, + "learning_rate": 9.181324145412904e-05, + "loss": 14.4661, + "step": 8650 + }, + { + "epoch": 0.64, + "learning_rate": 9.176301594157768e-05, + "loss": 16.3339, + "step": 8700 + }, + { + "epoch": 0.65, + "learning_rate": 9.171279042902633e-05, + "loss": 17.5122, + "step": 8750 + }, + { + "epoch": 0.65, + "learning_rate": 9.166256491647499e-05, + "loss": 16.7631, + "step": 8800 + }, + { + "epoch": 0.65, + "learning_rate": 9.161233940392362e-05, + "loss": 16.5193, + "step": 8850 + }, + { + "epoch": 0.66, + "learning_rate": 9.156211389137227e-05, + "loss": 17.8364, + "step": 8900 + }, + { + "epoch": 0.66, + "learning_rate": 9.15118883788209e-05, + "loss": 16.2916, + "step": 8950 + }, + { + "epoch": 0.66, + "learning_rate": 9.146166286626956e-05, + "loss": 14.1719, + "step": 9000 + }, + { + "epoch": 0.67, + "learning_rate": 9.141143735371819e-05, + "loss": 18.2987, + "step": 9050 + }, + { + "epoch": 0.67, + "learning_rate": 9.136121184116684e-05, + "loss": 17.4248, + "step": 9100 + }, + { + "epoch": 0.67, + "learning_rate": 9.13109863286155e-05, + "loss": 16.1862, + "step": 9150 + }, + { + "epoch": 0.68, + "learning_rate": 9.126076081606412e-05, + "loss": 16.3134, + "step": 9200 + }, + { + "epoch": 0.68, + "learning_rate": 9.121053530351278e-05, + "loss": 14.9158, + "step": 9250 + }, + { + "epoch": 0.69, + "learning_rate": 9.116030979096141e-05, + "loss": 15.2504, + "step": 9300 + }, + { + "epoch": 0.69, + "learning_rate": 9.111008427841007e-05, + "loss": 14.1967, + "step": 9350 + }, + { + "epoch": 0.69, + "learning_rate": 9.105985876585871e-05, + "loss": 17.3165, + "step": 9400 + }, + { + "epoch": 0.7, + "learning_rate": 9.100963325330736e-05, + "loss": 14.5912, + "step": 9450 + }, + { + "epoch": 0.7, + "learning_rate": 9.0959407740756e-05, + "loss": 17.5593, + "step": 9500 + }, + { + "epoch": 0.7, + "learning_rate": 9.090918222820465e-05, + "loss": 16.3421, + "step": 9550 + }, + { + "epoch": 0.71, + "learning_rate": 9.085895671565329e-05, + "loss": 16.2821, + "step": 9600 + }, + { + "epoch": 0.71, + "learning_rate": 9.080873120310192e-05, + "loss": 16.4985, + "step": 9650 + }, + { + "epoch": 0.72, + "learning_rate": 9.075850569055058e-05, + "loss": 16.1138, + "step": 9700 + }, + { + "epoch": 0.72, + "learning_rate": 9.070828017799922e-05, + "loss": 16.3997, + "step": 9750 + }, + { + "epoch": 0.72, + "learning_rate": 9.065805466544787e-05, + "loss": 15.518, + "step": 9800 + }, + { + "epoch": 0.73, + "learning_rate": 9.060782915289651e-05, + "loss": 13.8424, + "step": 9850 + }, + { + "epoch": 0.73, + "learning_rate": 9.055760364034515e-05, + "loss": 15.0784, + "step": 9900 + }, + { + "epoch": 0.73, + "learning_rate": 9.05073781277938e-05, + "loss": 14.0163, + "step": 9950 + }, + { + "epoch": 0.74, + "learning_rate": 9.045715261524244e-05, + "loss": 16.7863, + "step": 10000 + }, + { + "epoch": 0.74, + "learning_rate": 9.040692710269109e-05, + "loss": 13.6715, + "step": 10050 + }, + { + "epoch": 0.75, + "learning_rate": 9.035670159013973e-05, + "loss": 15.1071, + "step": 10100 + }, + { + "epoch": 0.75, + "learning_rate": 9.030647607758837e-05, + "loss": 14.2658, + "step": 10150 + }, + { + "epoch": 0.75, + "learning_rate": 9.025625056503703e-05, + "loss": 15.1115, + "step": 10200 + }, + { + "epoch": 0.76, + "learning_rate": 9.020602505248566e-05, + "loss": 14.028, + "step": 10250 + }, + { + "epoch": 0.76, + "learning_rate": 9.015579953993431e-05, + "loss": 13.3066, + "step": 10300 + }, + { + "epoch": 0.76, + "learning_rate": 9.010557402738295e-05, + "loss": 14.1185, + "step": 10350 + }, + { + "epoch": 0.77, + "learning_rate": 9.00553485148316e-05, + "loss": 14.061, + "step": 10400 + }, + { + "epoch": 0.77, + "learning_rate": 9.000512300228024e-05, + "loss": 15.2439, + "step": 10450 + }, + { + "epoch": 0.77, + "learning_rate": 8.995489748972888e-05, + "loss": 13.3617, + "step": 10500 + }, + { + "epoch": 0.78, + "learning_rate": 8.990467197717754e-05, + "loss": 14.5514, + "step": 10550 + }, + { + "epoch": 0.78, + "learning_rate": 8.985444646462617e-05, + "loss": 15.2426, + "step": 10600 + }, + { + "epoch": 0.79, + "learning_rate": 8.980422095207483e-05, + "loss": 16.6418, + "step": 10650 + }, + { + "epoch": 0.79, + "learning_rate": 8.975399543952346e-05, + "loss": 13.3146, + "step": 10700 + }, + { + "epoch": 0.79, + "learning_rate": 8.970376992697212e-05, + "loss": 14.9333, + "step": 10750 + }, + { + "epoch": 0.8, + "learning_rate": 8.965354441442075e-05, + "loss": 14.4502, + "step": 10800 + }, + { + "epoch": 0.8, + "learning_rate": 8.960331890186939e-05, + "loss": 14.7886, + "step": 10850 + }, + { + "epoch": 0.8, + "learning_rate": 8.955309338931805e-05, + "loss": 15.0266, + "step": 10900 + }, + { + "epoch": 0.81, + "learning_rate": 8.950286787676668e-05, + "loss": 14.543, + "step": 10950 + }, + { + "epoch": 0.81, + "learning_rate": 8.945264236421534e-05, + "loss": 15.8078, + "step": 11000 + }, + { + "epoch": 0.82, + "learning_rate": 8.940241685166397e-05, + "loss": 13.6052, + "step": 11050 + }, + { + "epoch": 0.82, + "learning_rate": 8.935219133911263e-05, + "loss": 14.2995, + "step": 11100 + }, + { + "epoch": 0.82, + "learning_rate": 8.930196582656126e-05, + "loss": 15.732, + "step": 11150 + }, + { + "epoch": 0.83, + "learning_rate": 8.925174031400991e-05, + "loss": 14.0573, + "step": 11200 + }, + { + "epoch": 0.83, + "learning_rate": 8.920151480145856e-05, + "loss": 17.5941, + "step": 11250 + }, + { + "epoch": 0.83, + "learning_rate": 8.91512892889072e-05, + "loss": 14.7829, + "step": 11300 + }, + { + "epoch": 0.84, + "learning_rate": 8.910106377635585e-05, + "loss": 14.6669, + "step": 11350 + }, + { + "epoch": 0.84, + "learning_rate": 8.905083826380448e-05, + "loss": 14.3315, + "step": 11400 + }, + { + "epoch": 0.84, + "learning_rate": 8.900061275125313e-05, + "loss": 14.2639, + "step": 11450 + }, + { + "epoch": 0.85, + "learning_rate": 8.895038723870176e-05, + "loss": 14.3226, + "step": 11500 + }, + { + "epoch": 0.85, + "learning_rate": 8.890016172615042e-05, + "loss": 14.4975, + "step": 11550 + }, + { + "epoch": 0.86, + "learning_rate": 8.884993621359907e-05, + "loss": 14.8436, + "step": 11600 + }, + { + "epoch": 0.86, + "learning_rate": 8.879971070104771e-05, + "loss": 13.8481, + "step": 11650 + }, + { + "epoch": 0.86, + "learning_rate": 8.874948518849635e-05, + "loss": 12.8151, + "step": 11700 + }, + { + "epoch": 0.87, + "learning_rate": 8.8699259675945e-05, + "loss": 13.1659, + "step": 11750 + }, + { + "epoch": 0.87, + "learning_rate": 8.864903416339364e-05, + "loss": 15.0919, + "step": 11800 + }, + { + "epoch": 0.87, + "learning_rate": 8.859880865084229e-05, + "loss": 14.4382, + "step": 11850 + }, + { + "epoch": 0.88, + "learning_rate": 8.854858313829093e-05, + "loss": 14.0989, + "step": 11900 + }, + { + "epoch": 0.88, + "learning_rate": 8.849835762573957e-05, + "loss": 14.5763, + "step": 11950 + }, + { + "epoch": 0.89, + "learning_rate": 8.844813211318822e-05, + "loss": 13.4144, + "step": 12000 + }, + { + "epoch": 0.89, + "learning_rate": 8.839790660063686e-05, + "loss": 15.6018, + "step": 12050 + }, + { + "epoch": 0.89, + "learning_rate": 8.83476810880855e-05, + "loss": 14.7849, + "step": 12100 + }, + { + "epoch": 0.9, + "learning_rate": 8.829745557553415e-05, + "loss": 14.441, + "step": 12150 + }, + { + "epoch": 0.9, + "learning_rate": 8.82472300629828e-05, + "loss": 14.2135, + "step": 12200 + }, + { + "epoch": 0.9, + "learning_rate": 8.819700455043144e-05, + "loss": 17.1245, + "step": 12250 + }, + { + "epoch": 0.91, + "learning_rate": 8.814677903788008e-05, + "loss": 14.6629, + "step": 12300 + }, + { + "epoch": 0.91, + "learning_rate": 8.809655352532873e-05, + "loss": 16.6715, + "step": 12350 + }, + { + "epoch": 0.91, + "learning_rate": 8.804632801277738e-05, + "loss": 13.0133, + "step": 12400 + }, + { + "epoch": 0.92, + "learning_rate": 8.799610250022601e-05, + "loss": 14.1551, + "step": 12450 + }, + { + "epoch": 0.92, + "learning_rate": 8.794587698767466e-05, + "loss": 14.019, + "step": 12500 + }, + { + "epoch": 0.93, + "learning_rate": 8.78956514751233e-05, + "loss": 14.4279, + "step": 12550 + }, + { + "epoch": 0.93, + "learning_rate": 8.784542596257195e-05, + "loss": 12.5293, + "step": 12600 + }, + { + "epoch": 0.93, + "learning_rate": 8.77952004500206e-05, + "loss": 15.0403, + "step": 12650 + }, + { + "epoch": 0.94, + "learning_rate": 8.774497493746924e-05, + "loss": 13.8193, + "step": 12700 + }, + { + "epoch": 0.94, + "learning_rate": 8.769474942491789e-05, + "loss": 13.1564, + "step": 12750 + }, + { + "epoch": 0.94, + "learning_rate": 8.764452391236652e-05, + "loss": 14.6415, + "step": 12800 + }, + { + "epoch": 0.95, + "learning_rate": 8.759429839981518e-05, + "loss": 12.2339, + "step": 12850 + }, + { + "epoch": 0.95, + "learning_rate": 8.754407288726381e-05, + "loss": 12.1604, + "step": 12900 + }, + { + "epoch": 0.96, + "learning_rate": 8.749384737471247e-05, + "loss": 15.4939, + "step": 12950 + }, + { + "epoch": 0.96, + "learning_rate": 8.744362186216111e-05, + "loss": 13.9713, + "step": 13000 + }, + { + "epoch": 0.96, + "learning_rate": 8.739339634960976e-05, + "loss": 14.0986, + "step": 13050 + }, + { + "epoch": 0.97, + "learning_rate": 8.73431708370584e-05, + "loss": 13.6334, + "step": 13100 + }, + { + "epoch": 0.97, + "learning_rate": 8.729294532450703e-05, + "loss": 13.5201, + "step": 13150 + }, + { + "epoch": 0.97, + "learning_rate": 8.724271981195569e-05, + "loss": 14.3793, + "step": 13200 + }, + { + "epoch": 0.98, + "learning_rate": 8.719249429940432e-05, + "loss": 13.1741, + "step": 13250 + }, + { + "epoch": 0.98, + "learning_rate": 8.714226878685298e-05, + "loss": 11.7782, + "step": 13300 + }, + { + "epoch": 0.98, + "learning_rate": 8.709204327430162e-05, + "loss": 12.2758, + "step": 13350 + }, + { + "epoch": 0.99, + "learning_rate": 8.704181776175027e-05, + "loss": 13.1723, + "step": 13400 + }, + { + "epoch": 0.99, + "learning_rate": 8.699159224919891e-05, + "loss": 14.0858, + "step": 13450 + }, + { + "epoch": 1.0, + "learning_rate": 8.694136673664755e-05, + "loss": 11.2836, + "step": 13500 + }, + { + "epoch": 1.0, + "learning_rate": 8.68911412240962e-05, + "loss": 15.7226, + "step": 13550 + }, + { + "epoch": 1.0, + "learning_rate": 8.684091571154484e-05, + "loss": 15.8889, + "step": 13600 + }, + { + "epoch": 1.01, + "learning_rate": 8.679069019899349e-05, + "loss": 12.2185, + "step": 13650 + }, + { + "epoch": 1.01, + "learning_rate": 8.674046468644213e-05, + "loss": 11.4647, + "step": 13700 + }, + { + "epoch": 1.01, + "learning_rate": 8.669023917389077e-05, + "loss": 13.1238, + "step": 13750 + }, + { + "epoch": 1.02, + "learning_rate": 8.664001366133942e-05, + "loss": 11.909, + "step": 13800 + }, + { + "epoch": 1.02, + "learning_rate": 8.658978814878806e-05, + "loss": 12.5478, + "step": 13850 + }, + { + "epoch": 1.03, + "learning_rate": 8.65395626362367e-05, + "loss": 13.017, + "step": 13900 + }, + { + "epoch": 1.03, + "learning_rate": 8.648933712368535e-05, + "loss": 12.9134, + "step": 13950 + }, + { + "epoch": 1.03, + "learning_rate": 8.6439111611134e-05, + "loss": 13.3485, + "step": 14000 + }, + { + "epoch": 1.04, + "learning_rate": 8.638888609858264e-05, + "loss": 11.4706, + "step": 14050 + }, + { + "epoch": 1.04, + "learning_rate": 8.633866058603128e-05, + "loss": 11.1063, + "step": 14100 + }, + { + "epoch": 1.04, + "learning_rate": 8.628843507347994e-05, + "loss": 12.7408, + "step": 14150 + }, + { + "epoch": 1.05, + "learning_rate": 8.623820956092857e-05, + "loss": 12.0689, + "step": 14200 + }, + { + "epoch": 1.05, + "learning_rate": 8.618798404837721e-05, + "loss": 11.0724, + "step": 14250 + }, + { + "epoch": 1.05, + "learning_rate": 8.613775853582586e-05, + "loss": 12.5685, + "step": 14300 + }, + { + "epoch": 1.06, + "learning_rate": 8.60875330232745e-05, + "loss": 12.7776, + "step": 14350 + }, + { + "epoch": 1.06, + "learning_rate": 8.603730751072315e-05, + "loss": 11.3066, + "step": 14400 + }, + { + "epoch": 1.07, + "learning_rate": 8.598708199817179e-05, + "loss": 13.06, + "step": 14450 + }, + { + "epoch": 1.07, + "learning_rate": 8.593685648562045e-05, + "loss": 15.6523, + "step": 14500 + }, + { + "epoch": 1.07, + "learning_rate": 8.588663097306908e-05, + "loss": 12.019, + "step": 14550 + }, + { + "epoch": 1.08, + "learning_rate": 8.583640546051774e-05, + "loss": 11.0941, + "step": 14600 + }, + { + "epoch": 1.08, + "learning_rate": 8.578617994796637e-05, + "loss": 12.4755, + "step": 14650 + }, + { + "epoch": 1.08, + "learning_rate": 8.573595443541502e-05, + "loss": 13.7012, + "step": 14700 + }, + { + "epoch": 1.09, + "learning_rate": 8.568572892286366e-05, + "loss": 12.2024, + "step": 14750 + }, + { + "epoch": 1.09, + "learning_rate": 8.56355034103123e-05, + "loss": 12.4744, + "step": 14800 + }, + { + "epoch": 1.1, + "learning_rate": 8.558527789776096e-05, + "loss": 12.3234, + "step": 14850 + }, + { + "epoch": 1.1, + "learning_rate": 8.553505238520959e-05, + "loss": 12.5616, + "step": 14900 + }, + { + "epoch": 1.1, + "learning_rate": 8.548482687265824e-05, + "loss": 11.9559, + "step": 14950 + }, + { + "epoch": 1.11, + "learning_rate": 8.543460136010688e-05, + "loss": 12.0734, + "step": 15000 + }, + { + "epoch": 1.11, + "learning_rate": 8.538437584755553e-05, + "loss": 13.0341, + "step": 15050 + }, + { + "epoch": 1.11, + "learning_rate": 8.533415033500418e-05, + "loss": 12.7406, + "step": 15100 + }, + { + "epoch": 1.12, + "learning_rate": 8.528392482245282e-05, + "loss": 11.7258, + "step": 15150 + }, + { + "epoch": 1.12, + "learning_rate": 8.523369930990147e-05, + "loss": 11.8709, + "step": 15200 + }, + { + "epoch": 1.12, + "learning_rate": 8.518347379735011e-05, + "loss": 11.7021, + "step": 15250 + }, + { + "epoch": 1.13, + "learning_rate": 8.513324828479875e-05, + "loss": 13.2674, + "step": 15300 + }, + { + "epoch": 1.13, + "learning_rate": 8.508302277224738e-05, + "loss": 11.9099, + "step": 15350 + }, + { + "epoch": 1.14, + "learning_rate": 8.503279725969604e-05, + "loss": 11.7841, + "step": 15400 + }, + { + "epoch": 1.14, + "learning_rate": 8.498257174714469e-05, + "loss": 11.9573, + "step": 15450 + }, + { + "epoch": 1.14, + "learning_rate": 8.493234623459333e-05, + "loss": 11.7211, + "step": 15500 + }, + { + "epoch": 1.15, + "learning_rate": 8.488212072204197e-05, + "loss": 12.3513, + "step": 15550 + }, + { + "epoch": 1.15, + "learning_rate": 8.483189520949062e-05, + "loss": 11.0709, + "step": 15600 + }, + { + "epoch": 1.15, + "learning_rate": 8.478166969693926e-05, + "loss": 11.6544, + "step": 15650 + }, + { + "epoch": 1.16, + "learning_rate": 8.47314441843879e-05, + "loss": 11.8285, + "step": 15700 + }, + { + "epoch": 1.16, + "learning_rate": 8.468121867183655e-05, + "loss": 10.4208, + "step": 15750 + }, + { + "epoch": 1.17, + "learning_rate": 8.46309931592852e-05, + "loss": 10.7821, + "step": 15800 + }, + { + "epoch": 1.17, + "learning_rate": 8.458076764673384e-05, + "loss": 13.2724, + "step": 15850 + }, + { + "epoch": 1.17, + "learning_rate": 8.45305421341825e-05, + "loss": 10.9219, + "step": 15900 + }, + { + "epoch": 1.18, + "learning_rate": 8.448031662163113e-05, + "loss": 12.2532, + "step": 15950 + }, + { + "epoch": 1.18, + "learning_rate": 8.443009110907977e-05, + "loss": 11.0132, + "step": 16000 + }, + { + "epoch": 1.18, + "learning_rate": 8.437986559652841e-05, + "loss": 12.319, + "step": 16050 + }, + { + "epoch": 1.19, + "learning_rate": 8.432964008397706e-05, + "loss": 12.9871, + "step": 16100 + }, + { + "epoch": 1.19, + "learning_rate": 8.42794145714257e-05, + "loss": 12.0625, + "step": 16150 + }, + { + "epoch": 1.19, + "learning_rate": 8.422918905887435e-05, + "loss": 13.4629, + "step": 16200 + }, + { + "epoch": 1.2, + "learning_rate": 8.4178963546323e-05, + "loss": 10.9291, + "step": 16250 + }, + { + "epoch": 1.2, + "learning_rate": 8.412873803377163e-05, + "loss": 13.7719, + "step": 16300 + }, + { + "epoch": 1.21, + "learning_rate": 8.407851252122029e-05, + "loss": 11.3634, + "step": 16350 + }, + { + "epoch": 1.21, + "learning_rate": 8.402828700866892e-05, + "loss": 12.7941, + "step": 16400 + }, + { + "epoch": 1.21, + "learning_rate": 8.397806149611758e-05, + "loss": 11.8863, + "step": 16450 + }, + { + "epoch": 1.22, + "learning_rate": 8.392783598356621e-05, + "loss": 9.5225, + "step": 16500 + }, + { + "epoch": 1.22, + "learning_rate": 8.387761047101485e-05, + "loss": 12.983, + "step": 16550 + }, + { + "epoch": 1.22, + "learning_rate": 8.382738495846351e-05, + "loss": 11.8489, + "step": 16600 + }, + { + "epoch": 1.23, + "learning_rate": 8.377715944591214e-05, + "loss": 11.8122, + "step": 16650 + }, + { + "epoch": 1.23, + "learning_rate": 8.37269339333608e-05, + "loss": 12.3387, + "step": 16700 + }, + { + "epoch": 1.24, + "learning_rate": 8.367670842080943e-05, + "loss": 13.4648, + "step": 16750 + }, + { + "epoch": 1.24, + "learning_rate": 8.362648290825809e-05, + "loss": 10.2301, + "step": 16800 + }, + { + "epoch": 1.24, + "learning_rate": 8.357625739570672e-05, + "loss": 11.492, + "step": 16850 + }, + { + "epoch": 1.25, + "learning_rate": 8.352603188315538e-05, + "loss": 12.5997, + "step": 16900 + }, + { + "epoch": 1.25, + "learning_rate": 8.347580637060402e-05, + "loss": 11.5588, + "step": 16950 + }, + { + "epoch": 1.25, + "learning_rate": 8.342558085805266e-05, + "loss": 11.8627, + "step": 17000 + }, + { + "epoch": 1.26, + "learning_rate": 8.337535534550131e-05, + "loss": 13.2469, + "step": 17050 + }, + { + "epoch": 1.26, + "learning_rate": 8.332512983294994e-05, + "loss": 10.4327, + "step": 17100 + }, + { + "epoch": 1.27, + "learning_rate": 8.32749043203986e-05, + "loss": 12.7566, + "step": 17150 + }, + { + "epoch": 1.27, + "learning_rate": 8.322467880784723e-05, + "loss": 11.0729, + "step": 17200 + }, + { + "epoch": 1.27, + "learning_rate": 8.317445329529588e-05, + "loss": 12.3484, + "step": 17250 + }, + { + "epoch": 1.28, + "learning_rate": 8.312422778274453e-05, + "loss": 10.5193, + "step": 17300 + }, + { + "epoch": 1.28, + "learning_rate": 8.307400227019317e-05, + "loss": 12.2369, + "step": 17350 + }, + { + "epoch": 1.28, + "learning_rate": 8.302377675764182e-05, + "loss": 12.2976, + "step": 17400 + }, + { + "epoch": 1.29, + "learning_rate": 8.297355124509046e-05, + "loss": 12.3852, + "step": 17450 + }, + { + "epoch": 1.29, + "learning_rate": 8.29233257325391e-05, + "loss": 11.2137, + "step": 17500 + }, + { + "epoch": 1.29, + "learning_rate": 8.287310021998775e-05, + "loss": 11.609, + "step": 17550 + }, + { + "epoch": 1.3, + "learning_rate": 8.282287470743639e-05, + "loss": 13.3339, + "step": 17600 + }, + { + "epoch": 1.3, + "learning_rate": 8.277264919488504e-05, + "loss": 11.4263, + "step": 17650 + }, + { + "epoch": 1.31, + "learning_rate": 8.272242368233368e-05, + "loss": 12.6949, + "step": 17700 + }, + { + "epoch": 1.31, + "learning_rate": 8.267219816978233e-05, + "loss": 11.4767, + "step": 17750 + }, + { + "epoch": 1.31, + "learning_rate": 8.262197265723097e-05, + "loss": 12.2225, + "step": 17800 + }, + { + "epoch": 1.32, + "learning_rate": 8.257174714467961e-05, + "loss": 11.0755, + "step": 17850 + }, + { + "epoch": 1.32, + "learning_rate": 8.252152163212826e-05, + "loss": 11.9677, + "step": 17900 + }, + { + "epoch": 1.32, + "learning_rate": 8.24712961195769e-05, + "loss": 11.098, + "step": 17950 + }, + { + "epoch": 1.33, + "learning_rate": 8.242107060702555e-05, + "loss": 11.1102, + "step": 18000 + }, + { + "epoch": 1.33, + "learning_rate": 8.237084509447419e-05, + "loss": 11.4985, + "step": 18050 + }, + { + "epoch": 1.34, + "learning_rate": 8.232061958192285e-05, + "loss": 11.7356, + "step": 18100 + }, + { + "epoch": 1.34, + "learning_rate": 8.227039406937148e-05, + "loss": 11.3336, + "step": 18150 + }, + { + "epoch": 1.34, + "learning_rate": 8.222016855682012e-05, + "loss": 11.0448, + "step": 18200 + }, + { + "epoch": 1.35, + "learning_rate": 8.216994304426877e-05, + "loss": 10.9986, + "step": 18250 + }, + { + "epoch": 1.35, + "learning_rate": 8.211971753171741e-05, + "loss": 10.768, + "step": 18300 + }, + { + "epoch": 1.35, + "learning_rate": 8.206949201916607e-05, + "loss": 11.6844, + "step": 18350 + }, + { + "epoch": 1.36, + "learning_rate": 8.20192665066147e-05, + "loss": 11.5615, + "step": 18400 + }, + { + "epoch": 1.36, + "learning_rate": 8.196904099406336e-05, + "loss": 11.4019, + "step": 18450 + }, + { + "epoch": 1.36, + "learning_rate": 8.191881548151199e-05, + "loss": 12.1784, + "step": 18500 + }, + { + "epoch": 1.37, + "learning_rate": 8.186858996896064e-05, + "loss": 12.4565, + "step": 18550 + }, + { + "epoch": 1.37, + "learning_rate": 8.181836445640927e-05, + "loss": 11.0557, + "step": 18600 + }, + { + "epoch": 1.38, + "learning_rate": 8.176813894385793e-05, + "loss": 12.1892, + "step": 18650 + }, + { + "epoch": 1.38, + "learning_rate": 8.171791343130658e-05, + "loss": 12.0531, + "step": 18700 + }, + { + "epoch": 1.38, + "learning_rate": 8.166768791875522e-05, + "loss": 10.1791, + "step": 18750 + }, + { + "epoch": 1.39, + "learning_rate": 8.161746240620386e-05, + "loss": 11.2501, + "step": 18800 + }, + { + "epoch": 1.39, + "learning_rate": 8.15672368936525e-05, + "loss": 9.92, + "step": 18850 + }, + { + "epoch": 1.39, + "learning_rate": 8.151701138110115e-05, + "loss": 10.0603, + "step": 18900 + }, + { + "epoch": 1.4, + "learning_rate": 8.146678586854978e-05, + "loss": 10.9477, + "step": 18950 + }, + { + "epoch": 1.4, + "learning_rate": 8.141656035599844e-05, + "loss": 9.7579, + "step": 19000 + }, + { + "epoch": 1.41, + "learning_rate": 8.136633484344708e-05, + "loss": 11.243, + "step": 19050 + }, + { + "epoch": 1.41, + "learning_rate": 8.131610933089573e-05, + "loss": 11.0069, + "step": 19100 + }, + { + "epoch": 1.41, + "learning_rate": 8.126588381834437e-05, + "loss": 9.7387, + "step": 19150 + }, + { + "epoch": 1.42, + "learning_rate": 8.121565830579302e-05, + "loss": 11.4624, + "step": 19200 + }, + { + "epoch": 1.42, + "learning_rate": 8.116543279324166e-05, + "loss": 12.1299, + "step": 19250 + }, + { + "epoch": 1.42, + "learning_rate": 8.11152072806903e-05, + "loss": 12.2796, + "step": 19300 + }, + { + "epoch": 1.43, + "learning_rate": 8.106498176813895e-05, + "loss": 10.3295, + "step": 19350 + }, + { + "epoch": 1.43, + "learning_rate": 8.101475625558759e-05, + "loss": 10.0709, + "step": 19400 + }, + { + "epoch": 1.43, + "learning_rate": 8.096453074303624e-05, + "loss": 11.0725, + "step": 19450 + }, + { + "epoch": 1.44, + "learning_rate": 8.091430523048488e-05, + "loss": 10.7882, + "step": 19500 + }, + { + "epoch": 1.44, + "learning_rate": 8.086407971793352e-05, + "loss": 11.4124, + "step": 19550 + }, + { + "epoch": 1.45, + "learning_rate": 8.081385420538217e-05, + "loss": 10.4941, + "step": 19600 + }, + { + "epoch": 1.45, + "learning_rate": 8.076362869283081e-05, + "loss": 11.8687, + "step": 19650 + }, + { + "epoch": 1.45, + "learning_rate": 8.071340318027946e-05, + "loss": 11.3221, + "step": 19700 + }, + { + "epoch": 1.46, + "learning_rate": 8.06631776677281e-05, + "loss": 10.2167, + "step": 19750 + }, + { + "epoch": 1.46, + "learning_rate": 8.061295215517675e-05, + "loss": 10.5425, + "step": 19800 + }, + { + "epoch": 1.46, + "learning_rate": 8.05627266426254e-05, + "loss": 11.2982, + "step": 19850 + }, + { + "epoch": 1.47, + "learning_rate": 8.051250113007403e-05, + "loss": 12.0685, + "step": 19900 + }, + { + "epoch": 1.47, + "learning_rate": 8.046227561752268e-05, + "loss": 10.6613, + "step": 19950 + }, + { + "epoch": 1.48, + "learning_rate": 8.041205010497132e-05, + "loss": 10.8245, + "step": 20000 + }, + { + "epoch": 1.48, + "eval_loss": 10.409339904785156, + "eval_runtime": 890.9956, + "eval_samples_per_second": 14.7, + "eval_steps_per_second": 3.676, + "eval_wer": 0.2624627273109067, + "step": 20000 + }, + { + "epoch": 1.48, + "learning_rate": 8.036182459241997e-05, + "loss": 10.671, + "step": 20050 + }, + { + "epoch": 1.48, + "learning_rate": 8.031159907986861e-05, + "loss": 11.0263, + "step": 20100 + }, + { + "epoch": 1.49, + "learning_rate": 8.026137356731725e-05, + "loss": 11.0571, + "step": 20150 + }, + { + "epoch": 1.49, + "learning_rate": 8.021114805476591e-05, + "loss": 13.0778, + "step": 20200 + }, + { + "epoch": 1.49, + "learning_rate": 8.016092254221454e-05, + "loss": 11.0495, + "step": 20250 + }, + { + "epoch": 1.5, + "learning_rate": 8.01106970296632e-05, + "loss": 10.6039, + "step": 20300 + }, + { + "epoch": 1.5, + "learning_rate": 8.006047151711183e-05, + "loss": 11.4221, + "step": 20350 + }, + { + "epoch": 1.5, + "learning_rate": 8.001024600456049e-05, + "loss": 10.7975, + "step": 20400 + }, + { + "epoch": 1.51, + "learning_rate": 7.996002049200912e-05, + "loss": 10.1123, + "step": 20450 + }, + { + "epoch": 1.51, + "learning_rate": 7.990979497945776e-05, + "loss": 10.2241, + "step": 20500 + }, + { + "epoch": 1.52, + "learning_rate": 7.985956946690642e-05, + "loss": 10.0191, + "step": 20550 + }, + { + "epoch": 1.52, + "learning_rate": 7.980934395435505e-05, + "loss": 10.649, + "step": 20600 + }, + { + "epoch": 1.52, + "learning_rate": 7.975911844180371e-05, + "loss": 9.6091, + "step": 20650 + }, + { + "epoch": 1.53, + "learning_rate": 7.970889292925234e-05, + "loss": 9.9386, + "step": 20700 + }, + { + "epoch": 1.53, + "learning_rate": 7.9658667416701e-05, + "loss": 11.2646, + "step": 20750 + }, + { + "epoch": 1.53, + "learning_rate": 7.960844190414964e-05, + "loss": 10.0181, + "step": 20800 + }, + { + "epoch": 1.54, + "learning_rate": 7.955821639159828e-05, + "loss": 11.9437, + "step": 20850 + }, + { + "epoch": 1.54, + "learning_rate": 7.950799087904693e-05, + "loss": 10.9254, + "step": 20900 + }, + { + "epoch": 1.55, + "learning_rate": 7.945776536649557e-05, + "loss": 11.7954, + "step": 20950 + }, + { + "epoch": 1.55, + "learning_rate": 7.940753985394422e-05, + "loss": 9.6569, + "step": 21000 + }, + { + "epoch": 1.55, + "learning_rate": 7.935731434139286e-05, + "loss": 10.6546, + "step": 21050 + }, + { + "epoch": 1.56, + "learning_rate": 7.93070888288415e-05, + "loss": 10.2795, + "step": 21100 + }, + { + "epoch": 1.56, + "learning_rate": 7.925686331629015e-05, + "loss": 10.4595, + "step": 21150 + }, + { + "epoch": 1.56, + "learning_rate": 7.920663780373879e-05, + "loss": 9.2921, + "step": 21200 + }, + { + "epoch": 1.57, + "learning_rate": 7.915641229118744e-05, + "loss": 10.1245, + "step": 21250 + }, + { + "epoch": 1.57, + "learning_rate": 7.910618677863608e-05, + "loss": 11.2896, + "step": 21300 + }, + { + "epoch": 1.57, + "learning_rate": 7.905596126608472e-05, + "loss": 11.3328, + "step": 21350 + }, + { + "epoch": 1.58, + "learning_rate": 7.900573575353337e-05, + "loss": 10.0718, + "step": 21400 + }, + { + "epoch": 1.58, + "learning_rate": 7.895551024098201e-05, + "loss": 10.8954, + "step": 21450 + }, + { + "epoch": 1.59, + "learning_rate": 7.890528472843066e-05, + "loss": 10.2921, + "step": 21500 + }, + { + "epoch": 1.59, + "learning_rate": 7.88550592158793e-05, + "loss": 9.4609, + "step": 21550 + }, + { + "epoch": 1.59, + "learning_rate": 7.880483370332796e-05, + "loss": 11.4751, + "step": 21600 + }, + { + "epoch": 1.6, + "learning_rate": 7.875460819077659e-05, + "loss": 10.1189, + "step": 21650 + }, + { + "epoch": 1.6, + "learning_rate": 7.870438267822523e-05, + "loss": 11.6478, + "step": 21700 + }, + { + "epoch": 1.6, + "learning_rate": 7.865415716567388e-05, + "loss": 11.2943, + "step": 21750 + }, + { + "epoch": 1.61, + "learning_rate": 7.860393165312252e-05, + "loss": 11.5788, + "step": 21800 + }, + { + "epoch": 1.61, + "learning_rate": 7.855370614057116e-05, + "loss": 10.638, + "step": 21850 + }, + { + "epoch": 1.62, + "learning_rate": 7.850348062801981e-05, + "loss": 9.2895, + "step": 21900 + }, + { + "epoch": 1.62, + "learning_rate": 7.845325511546847e-05, + "loss": 11.4984, + "step": 21950 + }, + { + "epoch": 1.62, + "learning_rate": 7.84030296029171e-05, + "loss": 10.3685, + "step": 22000 + }, + { + "epoch": 1.63, + "learning_rate": 7.835280409036575e-05, + "loss": 10.0115, + "step": 22050 + }, + { + "epoch": 1.63, + "learning_rate": 7.830257857781439e-05, + "loss": 10.2941, + "step": 22100 + }, + { + "epoch": 1.63, + "learning_rate": 7.825235306526304e-05, + "loss": 10.8751, + "step": 22150 + }, + { + "epoch": 1.64, + "learning_rate": 7.820212755271167e-05, + "loss": 10.7477, + "step": 22200 + }, + { + "epoch": 1.64, + "learning_rate": 7.815190204016032e-05, + "loss": 12.2573, + "step": 22250 + }, + { + "epoch": 1.64, + "learning_rate": 7.810167652760897e-05, + "loss": 10.1055, + "step": 22300 + }, + { + "epoch": 1.65, + "learning_rate": 7.80514510150576e-05, + "loss": 10.7913, + "step": 22350 + }, + { + "epoch": 1.65, + "learning_rate": 7.800122550250626e-05, + "loss": 9.4701, + "step": 22400 + }, + { + "epoch": 1.66, + "learning_rate": 7.79509999899549e-05, + "loss": 9.9434, + "step": 22450 + }, + { + "epoch": 1.66, + "learning_rate": 7.790077447740355e-05, + "loss": 10.9016, + "step": 22500 + }, + { + "epoch": 1.66, + "learning_rate": 7.785054896485218e-05, + "loss": 10.1733, + "step": 22550 + }, + { + "epoch": 1.67, + "learning_rate": 7.780032345230084e-05, + "loss": 11.0693, + "step": 22600 + }, + { + "epoch": 1.67, + "learning_rate": 7.775009793974948e-05, + "loss": 10.4538, + "step": 22650 + }, + { + "epoch": 1.67, + "learning_rate": 7.769987242719813e-05, + "loss": 10.5127, + "step": 22700 + }, + { + "epoch": 1.68, + "learning_rate": 7.764964691464677e-05, + "loss": 10.1074, + "step": 22750 + }, + { + "epoch": 1.68, + "learning_rate": 7.75994214020954e-05, + "loss": 11.2803, + "step": 22800 + }, + { + "epoch": 1.69, + "learning_rate": 7.754919588954406e-05, + "loss": 10.9954, + "step": 22850 + }, + { + "epoch": 1.69, + "learning_rate": 7.749897037699269e-05, + "loss": 10.1006, + "step": 22900 + }, + { + "epoch": 1.69, + "learning_rate": 7.744874486444135e-05, + "loss": 10.9978, + "step": 22950 + }, + { + "epoch": 1.7, + "learning_rate": 7.739851935188999e-05, + "loss": 10.5885, + "step": 23000 + }, + { + "epoch": 1.7, + "learning_rate": 7.734829383933864e-05, + "loss": 10.5676, + "step": 23050 + }, + { + "epoch": 1.7, + "learning_rate": 7.729806832678728e-05, + "loss": 11.3204, + "step": 23100 + }, + { + "epoch": 1.71, + "learning_rate": 7.724784281423592e-05, + "loss": 10.5388, + "step": 23150 + }, + { + "epoch": 1.71, + "learning_rate": 7.719761730168457e-05, + "loss": 10.7915, + "step": 23200 + }, + { + "epoch": 1.71, + "learning_rate": 7.714739178913321e-05, + "loss": 11.9486, + "step": 23250 + }, + { + "epoch": 1.72, + "learning_rate": 7.709716627658186e-05, + "loss": 11.6693, + "step": 23300 + }, + { + "epoch": 1.72, + "learning_rate": 7.70469407640305e-05, + "loss": 9.2664, + "step": 23350 + }, + { + "epoch": 1.73, + "learning_rate": 7.699671525147914e-05, + "loss": 12.1429, + "step": 23400 + }, + { + "epoch": 1.73, + "learning_rate": 7.694648973892779e-05, + "loss": 10.1155, + "step": 23450 + }, + { + "epoch": 1.73, + "learning_rate": 7.689626422637643e-05, + "loss": 10.1562, + "step": 23500 + }, + { + "epoch": 1.74, + "learning_rate": 7.684603871382508e-05, + "loss": 11.3484, + "step": 23550 + }, + { + "epoch": 1.74, + "learning_rate": 7.679581320127372e-05, + "loss": 9.5912, + "step": 23600 + }, + { + "epoch": 1.74, + "learning_rate": 7.674558768872236e-05, + "loss": 11.1067, + "step": 23650 + }, + { + "epoch": 1.75, + "learning_rate": 7.669536217617101e-05, + "loss": 11.7182, + "step": 23700 + }, + { + "epoch": 1.75, + "learning_rate": 7.664513666361965e-05, + "loss": 10.1444, + "step": 23750 + }, + { + "epoch": 1.76, + "learning_rate": 7.659491115106831e-05, + "loss": 11.2671, + "step": 23800 + }, + { + "epoch": 1.76, + "learning_rate": 7.654468563851694e-05, + "loss": 10.9027, + "step": 23850 + }, + { + "epoch": 1.76, + "learning_rate": 7.64944601259656e-05, + "loss": 10.9078, + "step": 23900 + }, + { + "epoch": 1.77, + "learning_rate": 7.644423461341423e-05, + "loss": 10.5441, + "step": 23950 + }, + { + "epoch": 1.77, + "learning_rate": 7.639400910086287e-05, + "loss": 9.8617, + "step": 24000 + }, + { + "epoch": 1.77, + "learning_rate": 7.634378358831153e-05, + "loss": 10.8022, + "step": 24050 + }, + { + "epoch": 1.78, + "learning_rate": 7.629355807576016e-05, + "loss": 10.3082, + "step": 24100 + }, + { + "epoch": 1.78, + "learning_rate": 7.624333256320882e-05, + "loss": 9.8398, + "step": 24150 + }, + { + "epoch": 1.79, + "learning_rate": 7.619310705065745e-05, + "loss": 10.3631, + "step": 24200 + }, + { + "epoch": 1.79, + "learning_rate": 7.61428815381061e-05, + "loss": 10.6078, + "step": 24250 + }, + { + "epoch": 1.79, + "learning_rate": 7.609265602555474e-05, + "loss": 11.366, + "step": 24300 + }, + { + "epoch": 1.8, + "learning_rate": 7.60424305130034e-05, + "loss": 12.1154, + "step": 24350 + }, + { + "epoch": 1.8, + "learning_rate": 7.599220500045204e-05, + "loss": 11.3429, + "step": 24400 + }, + { + "epoch": 1.8, + "learning_rate": 7.594197948790068e-05, + "loss": 9.135, + "step": 24450 + }, + { + "epoch": 1.81, + "learning_rate": 7.589175397534933e-05, + "loss": 10.3796, + "step": 24500 + }, + { + "epoch": 1.81, + "learning_rate": 7.584152846279796e-05, + "loss": 10.6452, + "step": 24550 + }, + { + "epoch": 1.81, + "learning_rate": 7.579130295024661e-05, + "loss": 9.6237, + "step": 24600 + }, + { + "epoch": 1.82, + "learning_rate": 7.574107743769525e-05, + "loss": 10.7158, + "step": 24650 + }, + { + "epoch": 1.82, + "learning_rate": 7.56908519251439e-05, + "loss": 9.8296, + "step": 24700 + }, + { + "epoch": 1.83, + "learning_rate": 7.564062641259255e-05, + "loss": 10.1654, + "step": 24750 + }, + { + "epoch": 1.83, + "learning_rate": 7.559040090004119e-05, + "loss": 10.395, + "step": 24800 + }, + { + "epoch": 1.83, + "learning_rate": 7.554017538748984e-05, + "loss": 10.3067, + "step": 24850 + }, + { + "epoch": 1.84, + "learning_rate": 7.548994987493848e-05, + "loss": 10.7243, + "step": 24900 + }, + { + "epoch": 1.84, + "learning_rate": 7.543972436238712e-05, + "loss": 10.4022, + "step": 24950 + }, + { + "epoch": 1.84, + "learning_rate": 7.538949884983577e-05, + "loss": 10.5045, + "step": 25000 + }, + { + "epoch": 1.85, + "learning_rate": 7.533927333728441e-05, + "loss": 11.2205, + "step": 25050 + }, + { + "epoch": 1.85, + "learning_rate": 7.528904782473306e-05, + "loss": 10.5375, + "step": 25100 + }, + { + "epoch": 1.86, + "learning_rate": 7.52388223121817e-05, + "loss": 10.4876, + "step": 25150 + }, + { + "epoch": 1.86, + "learning_rate": 7.518859679963034e-05, + "loss": 9.2096, + "step": 25200 + }, + { + "epoch": 1.86, + "learning_rate": 7.513837128707899e-05, + "loss": 10.0442, + "step": 25250 + }, + { + "epoch": 1.87, + "learning_rate": 7.508814577452763e-05, + "loss": 9.8174, + "step": 25300 + }, + { + "epoch": 1.87, + "learning_rate": 7.503792026197628e-05, + "loss": 10.8789, + "step": 25350 + }, + { + "epoch": 1.87, + "learning_rate": 7.498769474942492e-05, + "loss": 9.8789, + "step": 25400 + }, + { + "epoch": 1.88, + "learning_rate": 7.493746923687356e-05, + "loss": 11.1431, + "step": 25450 + }, + { + "epoch": 1.88, + "learning_rate": 7.488724372432221e-05, + "loss": 10.4659, + "step": 25500 + }, + { + "epoch": 1.88, + "learning_rate": 7.483701821177087e-05, + "loss": 10.7342, + "step": 25550 + }, + { + "epoch": 1.89, + "learning_rate": 7.47867926992195e-05, + "loss": 10.7841, + "step": 25600 + }, + { + "epoch": 1.89, + "learning_rate": 7.473656718666814e-05, + "loss": 9.6162, + "step": 25650 + }, + { + "epoch": 1.9, + "learning_rate": 7.468634167411678e-05, + "loss": 10.3568, + "step": 25700 + }, + { + "epoch": 1.9, + "learning_rate": 7.463611616156543e-05, + "loss": 9.6701, + "step": 25750 + }, + { + "epoch": 1.9, + "learning_rate": 7.458589064901407e-05, + "loss": 9.4003, + "step": 25800 + }, + { + "epoch": 1.91, + "learning_rate": 7.453566513646272e-05, + "loss": 9.6621, + "step": 25850 + }, + { + "epoch": 1.91, + "learning_rate": 7.448543962391137e-05, + "loss": 10.1086, + "step": 25900 + }, + { + "epoch": 1.91, + "learning_rate": 7.443521411136e-05, + "loss": 11.5655, + "step": 25950 + }, + { + "epoch": 1.92, + "learning_rate": 7.438498859880866e-05, + "loss": 8.9418, + "step": 26000 + }, + { + "epoch": 1.92, + "learning_rate": 7.433476308625729e-05, + "loss": 9.2415, + "step": 26050 + }, + { + "epoch": 1.93, + "learning_rate": 7.428453757370595e-05, + "loss": 9.4192, + "step": 26100 + }, + { + "epoch": 1.93, + "learning_rate": 7.423431206115458e-05, + "loss": 9.1755, + "step": 26150 + }, + { + "epoch": 1.93, + "learning_rate": 7.418408654860322e-05, + "loss": 9.6327, + "step": 26200 + }, + { + "epoch": 1.94, + "learning_rate": 7.413386103605188e-05, + "loss": 10.3333, + "step": 26250 + }, + { + "epoch": 1.94, + "learning_rate": 7.408363552350051e-05, + "loss": 10.298, + "step": 26300 + }, + { + "epoch": 1.94, + "learning_rate": 7.403341001094917e-05, + "loss": 10.7038, + "step": 26350 + }, + { + "epoch": 1.95, + "learning_rate": 7.39831844983978e-05, + "loss": 10.5099, + "step": 26400 + }, + { + "epoch": 1.95, + "learning_rate": 7.393295898584646e-05, + "loss": 9.8063, + "step": 26450 + }, + { + "epoch": 1.95, + "learning_rate": 7.38827334732951e-05, + "loss": 9.5784, + "step": 26500 + }, + { + "epoch": 1.96, + "learning_rate": 7.383250796074375e-05, + "loss": 10.1958, + "step": 26550 + }, + { + "epoch": 1.96, + "learning_rate": 7.378228244819239e-05, + "loss": 9.6869, + "step": 26600 + }, + { + "epoch": 1.97, + "learning_rate": 7.373205693564103e-05, + "loss": 10.3761, + "step": 26650 + }, + { + "epoch": 1.97, + "learning_rate": 7.368183142308968e-05, + "loss": 11.6806, + "step": 26700 + }, + { + "epoch": 1.97, + "learning_rate": 7.363160591053832e-05, + "loss": 10.3183, + "step": 26750 + }, + { + "epoch": 1.98, + "learning_rate": 7.358138039798697e-05, + "loss": 11.041, + "step": 26800 + }, + { + "epoch": 1.98, + "learning_rate": 7.353115488543561e-05, + "loss": 9.6997, + "step": 26850 + }, + { + "epoch": 1.98, + "learning_rate": 7.348092937288425e-05, + "loss": 9.6029, + "step": 26900 + }, + { + "epoch": 1.99, + "learning_rate": 7.34307038603329e-05, + "loss": 10.3322, + "step": 26950 + }, + { + "epoch": 1.99, + "learning_rate": 7.338047834778154e-05, + "loss": 9.9009, + "step": 27000 + }, + { + "epoch": 2.0, + "learning_rate": 7.333025283523019e-05, + "loss": 10.4815, + "step": 27050 + }, + { + "epoch": 2.0, + "learning_rate": 7.328002732267883e-05, + "loss": 11.7049, + "step": 27100 + }, + { + "epoch": 2.0, + "learning_rate": 7.322980181012748e-05, + "loss": 10.7831, + "step": 27150 + }, + { + "epoch": 2.01, + "learning_rate": 7.317957629757612e-05, + "loss": 8.735, + "step": 27200 + }, + { + "epoch": 2.01, + "learning_rate": 7.312935078502476e-05, + "loss": 9.4056, + "step": 27250 + }, + { + "epoch": 2.01, + "learning_rate": 7.307912527247342e-05, + "loss": 10.7689, + "step": 27300 + }, + { + "epoch": 2.02, + "learning_rate": 7.302889975992205e-05, + "loss": 9.5266, + "step": 27350 + }, + { + "epoch": 2.02, + "learning_rate": 7.29786742473707e-05, + "loss": 8.2467, + "step": 27400 + }, + { + "epoch": 2.02, + "learning_rate": 7.292844873481934e-05, + "loss": 8.6572, + "step": 27450 + }, + { + "epoch": 2.03, + "learning_rate": 7.287822322226798e-05, + "loss": 8.4693, + "step": 27500 + }, + { + "epoch": 2.03, + "learning_rate": 7.282799770971663e-05, + "loss": 10.4867, + "step": 27550 + }, + { + "epoch": 2.04, + "learning_rate": 7.277777219716527e-05, + "loss": 8.9364, + "step": 27600 + }, + { + "epoch": 2.04, + "learning_rate": 7.272754668461393e-05, + "loss": 10.0109, + "step": 27650 + }, + { + "epoch": 2.04, + "learning_rate": 7.267732117206256e-05, + "loss": 9.5535, + "step": 27700 + }, + { + "epoch": 2.05, + "learning_rate": 7.262709565951122e-05, + "loss": 9.3029, + "step": 27750 + }, + { + "epoch": 2.05, + "learning_rate": 7.257687014695985e-05, + "loss": 9.854, + "step": 27800 + }, + { + "epoch": 2.05, + "learning_rate": 7.25266446344085e-05, + "loss": 9.5327, + "step": 27850 + }, + { + "epoch": 2.06, + "learning_rate": 7.247641912185714e-05, + "loss": 9.8255, + "step": 27900 + }, + { + "epoch": 2.06, + "learning_rate": 7.242619360930578e-05, + "loss": 9.9737, + "step": 27950 + }, + { + "epoch": 2.07, + "learning_rate": 7.237596809675444e-05, + "loss": 9.0471, + "step": 28000 + }, + { + "epoch": 2.07, + "learning_rate": 7.232574258420307e-05, + "loss": 10.0566, + "step": 28050 + }, + { + "epoch": 2.07, + "learning_rate": 7.227551707165173e-05, + "loss": 9.4781, + "step": 28100 + }, + { + "epoch": 2.08, + "learning_rate": 7.222529155910036e-05, + "loss": 8.7599, + "step": 28150 + }, + { + "epoch": 2.08, + "learning_rate": 7.217506604654901e-05, + "loss": 8.7605, + "step": 28200 + }, + { + "epoch": 2.08, + "learning_rate": 7.212484053399764e-05, + "loss": 10.061, + "step": 28250 + }, + { + "epoch": 2.09, + "learning_rate": 7.20746150214463e-05, + "loss": 9.6124, + "step": 28300 + }, + { + "epoch": 2.09, + "learning_rate": 7.202438950889495e-05, + "loss": 10.4776, + "step": 28350 + }, + { + "epoch": 2.09, + "learning_rate": 7.197416399634359e-05, + "loss": 9.2169, + "step": 28400 + }, + { + "epoch": 2.1, + "learning_rate": 7.192393848379223e-05, + "loss": 9.3654, + "step": 28450 + }, + { + "epoch": 2.1, + "learning_rate": 7.187371297124086e-05, + "loss": 9.4445, + "step": 28500 + }, + { + "epoch": 2.11, + "learning_rate": 7.182348745868952e-05, + "loss": 8.3614, + "step": 28550 + }, + { + "epoch": 2.11, + "learning_rate": 7.177326194613815e-05, + "loss": 9.1661, + "step": 28600 + }, + { + "epoch": 2.11, + "learning_rate": 7.172303643358681e-05, + "loss": 9.4976, + "step": 28650 + }, + { + "epoch": 2.12, + "learning_rate": 7.167281092103545e-05, + "loss": 9.125, + "step": 28700 + }, + { + "epoch": 2.12, + "learning_rate": 7.16225854084841e-05, + "loss": 8.9051, + "step": 28750 + }, + { + "epoch": 2.12, + "learning_rate": 7.157235989593274e-05, + "loss": 8.9753, + "step": 28800 + }, + { + "epoch": 2.13, + "learning_rate": 7.152213438338139e-05, + "loss": 9.133, + "step": 28850 + }, + { + "epoch": 2.13, + "learning_rate": 7.147190887083003e-05, + "loss": 9.9677, + "step": 28900 + }, + { + "epoch": 2.14, + "learning_rate": 7.142168335827867e-05, + "loss": 8.725, + "step": 28950 + }, + { + "epoch": 2.14, + "learning_rate": 7.137145784572732e-05, + "loss": 8.831, + "step": 29000 + }, + { + "epoch": 2.14, + "learning_rate": 7.132123233317596e-05, + "loss": 7.8207, + "step": 29050 + }, + { + "epoch": 2.15, + "learning_rate": 7.127100682062461e-05, + "loss": 9.3707, + "step": 29100 + }, + { + "epoch": 2.15, + "learning_rate": 7.122078130807325e-05, + "loss": 10.4259, + "step": 29150 + }, + { + "epoch": 2.15, + "learning_rate": 7.11705557955219e-05, + "loss": 8.1836, + "step": 29200 + }, + { + "epoch": 2.16, + "learning_rate": 7.112033028297054e-05, + "loss": 9.0874, + "step": 29250 + }, + { + "epoch": 2.16, + "learning_rate": 7.107010477041918e-05, + "loss": 9.5957, + "step": 29300 + }, + { + "epoch": 2.16, + "learning_rate": 7.101987925786783e-05, + "loss": 8.7545, + "step": 29350 + }, + { + "epoch": 2.17, + "learning_rate": 7.096965374531647e-05, + "loss": 8.4478, + "step": 29400 + }, + { + "epoch": 2.17, + "learning_rate": 7.091942823276512e-05, + "loss": 8.601, + "step": 29450 + }, + { + "epoch": 2.18, + "learning_rate": 7.086920272021377e-05, + "loss": 9.6172, + "step": 29500 + }, + { + "epoch": 2.18, + "learning_rate": 7.08189772076624e-05, + "loss": 9.0805, + "step": 29550 + }, + { + "epoch": 2.18, + "learning_rate": 7.076875169511106e-05, + "loss": 9.6039, + "step": 29600 + }, + { + "epoch": 2.19, + "learning_rate": 7.071852618255969e-05, + "loss": 9.3622, + "step": 29650 + }, + { + "epoch": 2.19, + "learning_rate": 7.066830067000834e-05, + "loss": 8.8765, + "step": 29700 + }, + { + "epoch": 2.19, + "learning_rate": 7.061807515745699e-05, + "loss": 8.992, + "step": 29750 + }, + { + "epoch": 2.2, + "learning_rate": 7.056784964490562e-05, + "loss": 10.3564, + "step": 29800 + }, + { + "epoch": 2.2, + "learning_rate": 7.051762413235428e-05, + "loss": 8.8092, + "step": 29850 + }, + { + "epoch": 2.21, + "learning_rate": 7.046739861980291e-05, + "loss": 9.8373, + "step": 29900 + }, + { + "epoch": 2.21, + "learning_rate": 7.041717310725157e-05, + "loss": 8.004, + "step": 29950 + }, + { + "epoch": 2.21, + "learning_rate": 7.03669475947002e-05, + "loss": 9.4461, + "step": 30000 + }, + { + "epoch": 2.22, + "learning_rate": 7.031672208214886e-05, + "loss": 8.4964, + "step": 30050 + }, + { + "epoch": 2.22, + "learning_rate": 7.02664965695975e-05, + "loss": 10.3181, + "step": 30100 + }, + { + "epoch": 2.22, + "learning_rate": 7.021627105704615e-05, + "loss": 8.6637, + "step": 30150 + }, + { + "epoch": 2.23, + "learning_rate": 7.016604554449479e-05, + "loss": 10.1703, + "step": 30200 + }, + { + "epoch": 2.23, + "learning_rate": 7.011582003194342e-05, + "loss": 9.2846, + "step": 30250 + }, + { + "epoch": 2.24, + "learning_rate": 7.006559451939208e-05, + "loss": 8.5913, + "step": 30300 + }, + { + "epoch": 2.24, + "learning_rate": 7.001536900684071e-05, + "loss": 9.1308, + "step": 30350 + }, + { + "epoch": 2.24, + "learning_rate": 6.996514349428937e-05, + "loss": 11.2229, + "step": 30400 + }, + { + "epoch": 2.25, + "learning_rate": 6.991491798173801e-05, + "loss": 8.5923, + "step": 30450 + }, + { + "epoch": 2.25, + "learning_rate": 6.986469246918665e-05, + "loss": 9.9826, + "step": 30500 + }, + { + "epoch": 2.25, + "learning_rate": 6.98144669566353e-05, + "loss": 8.4765, + "step": 30550 + }, + { + "epoch": 2.26, + "learning_rate": 6.976424144408394e-05, + "loss": 8.7624, + "step": 30600 + }, + { + "epoch": 2.26, + "learning_rate": 6.971401593153259e-05, + "loss": 9.238, + "step": 30650 + }, + { + "epoch": 2.26, + "learning_rate": 6.966379041898123e-05, + "loss": 8.4976, + "step": 30700 + }, + { + "epoch": 2.27, + "learning_rate": 6.961356490642987e-05, + "loss": 9.1886, + "step": 30750 + }, + { + "epoch": 2.27, + "learning_rate": 6.956333939387852e-05, + "loss": 8.4443, + "step": 30800 + }, + { + "epoch": 2.28, + "learning_rate": 6.951311388132716e-05, + "loss": 8.3648, + "step": 30850 + }, + { + "epoch": 2.28, + "learning_rate": 6.94628883687758e-05, + "loss": 9.2509, + "step": 30900 + }, + { + "epoch": 2.28, + "learning_rate": 6.941266285622445e-05, + "loss": 8.3765, + "step": 30950 + }, + { + "epoch": 2.29, + "learning_rate": 6.93624373436731e-05, + "loss": 9.6616, + "step": 31000 + }, + { + "epoch": 2.29, + "learning_rate": 6.931221183112174e-05, + "loss": 9.658, + "step": 31050 + }, + { + "epoch": 2.29, + "learning_rate": 6.926198631857038e-05, + "loss": 8.7527, + "step": 31100 + }, + { + "epoch": 2.3, + "learning_rate": 6.921176080601903e-05, + "loss": 8.7148, + "step": 31150 + }, + { + "epoch": 2.3, + "learning_rate": 6.916153529346767e-05, + "loss": 8.5962, + "step": 31200 + }, + { + "epoch": 2.31, + "learning_rate": 6.911130978091633e-05, + "loss": 9.2625, + "step": 31250 + }, + { + "epoch": 2.31, + "learning_rate": 6.906108426836496e-05, + "loss": 8.8352, + "step": 31300 + }, + { + "epoch": 2.31, + "learning_rate": 6.90108587558136e-05, + "loss": 7.3991, + "step": 31350 + }, + { + "epoch": 2.32, + "learning_rate": 6.896063324326225e-05, + "loss": 9.9391, + "step": 31400 + }, + { + "epoch": 2.32, + "learning_rate": 6.891040773071089e-05, + "loss": 8.9575, + "step": 31450 + }, + { + "epoch": 2.32, + "learning_rate": 6.886018221815954e-05, + "loss": 7.9103, + "step": 31500 + }, + { + "epoch": 2.33, + "learning_rate": 6.880995670560818e-05, + "loss": 8.5276, + "step": 31550 + }, + { + "epoch": 2.33, + "learning_rate": 6.875973119305684e-05, + "loss": 8.5427, + "step": 31600 + }, + { + "epoch": 2.33, + "learning_rate": 6.870950568050547e-05, + "loss": 8.4672, + "step": 31650 + }, + { + "epoch": 2.34, + "learning_rate": 6.865928016795412e-05, + "loss": 8.9638, + "step": 31700 + }, + { + "epoch": 2.34, + "learning_rate": 6.860905465540276e-05, + "loss": 8.3136, + "step": 31750 + }, + { + "epoch": 2.35, + "learning_rate": 6.855882914285141e-05, + "loss": 8.8076, + "step": 31800 + }, + { + "epoch": 2.35, + "learning_rate": 6.850860363030004e-05, + "loss": 8.6041, + "step": 31850 + }, + { + "epoch": 2.35, + "learning_rate": 6.845837811774869e-05, + "loss": 9.1751, + "step": 31900 + }, + { + "epoch": 2.36, + "learning_rate": 6.840815260519735e-05, + "loss": 8.5955, + "step": 31950 + }, + { + "epoch": 2.36, + "learning_rate": 6.835792709264598e-05, + "loss": 9.0927, + "step": 32000 + }, + { + "epoch": 2.36, + "learning_rate": 6.830770158009463e-05, + "loss": 7.9647, + "step": 32050 + }, + { + "epoch": 2.37, + "learning_rate": 6.825747606754326e-05, + "loss": 10.2647, + "step": 32100 + }, + { + "epoch": 2.37, + "learning_rate": 6.820725055499192e-05, + "loss": 8.3442, + "step": 32150 + }, + { + "epoch": 2.38, + "learning_rate": 6.815702504244057e-05, + "loss": 9.2019, + "step": 32200 + }, + { + "epoch": 2.38, + "learning_rate": 6.810679952988921e-05, + "loss": 8.345, + "step": 32250 + }, + { + "epoch": 2.38, + "learning_rate": 6.805657401733785e-05, + "loss": 9.1835, + "step": 32300 + }, + { + "epoch": 2.39, + "learning_rate": 6.80063485047865e-05, + "loss": 9.1846, + "step": 32350 + }, + { + "epoch": 2.39, + "learning_rate": 6.795612299223514e-05, + "loss": 9.0015, + "step": 32400 + }, + { + "epoch": 2.39, + "learning_rate": 6.790589747968379e-05, + "loss": 8.2404, + "step": 32450 + }, + { + "epoch": 2.4, + "learning_rate": 6.785567196713243e-05, + "loss": 8.8715, + "step": 32500 + }, + { + "epoch": 2.4, + "learning_rate": 6.780544645458107e-05, + "loss": 8.817, + "step": 32550 + }, + { + "epoch": 2.4, + "learning_rate": 6.775522094202972e-05, + "loss": 9.2154, + "step": 32600 + }, + { + "epoch": 2.41, + "learning_rate": 6.770499542947836e-05, + "loss": 9.1914, + "step": 32650 + }, + { + "epoch": 2.41, + "learning_rate": 6.7654769916927e-05, + "loss": 9.2804, + "step": 32700 + }, + { + "epoch": 2.42, + "learning_rate": 6.760454440437565e-05, + "loss": 9.177, + "step": 32750 + }, + { + "epoch": 2.42, + "learning_rate": 6.75543188918243e-05, + "loss": 8.8259, + "step": 32800 + }, + { + "epoch": 2.42, + "learning_rate": 6.750409337927294e-05, + "loss": 8.6121, + "step": 32850 + }, + { + "epoch": 2.43, + "learning_rate": 6.745386786672158e-05, + "loss": 8.644, + "step": 32900 + }, + { + "epoch": 2.43, + "learning_rate": 6.740364235417023e-05, + "loss": 8.5743, + "step": 32950 + }, + { + "epoch": 2.43, + "learning_rate": 6.735341684161888e-05, + "loss": 8.7636, + "step": 33000 + }, + { + "epoch": 2.44, + "learning_rate": 6.730319132906751e-05, + "loss": 8.3064, + "step": 33050 + }, + { + "epoch": 2.44, + "learning_rate": 6.725296581651616e-05, + "loss": 8.8806, + "step": 33100 + }, + { + "epoch": 2.45, + "learning_rate": 6.72027403039648e-05, + "loss": 8.8212, + "step": 33150 + }, + { + "epoch": 2.45, + "learning_rate": 6.715251479141345e-05, + "loss": 9.5261, + "step": 33200 + }, + { + "epoch": 2.45, + "learning_rate": 6.710228927886209e-05, + "loss": 9.0764, + "step": 33250 + }, + { + "epoch": 2.46, + "learning_rate": 6.705206376631073e-05, + "loss": 7.399, + "step": 33300 + }, + { + "epoch": 2.46, + "learning_rate": 6.700183825375939e-05, + "loss": 9.4119, + "step": 33350 + }, + { + "epoch": 2.46, + "learning_rate": 6.695161274120802e-05, + "loss": 8.4576, + "step": 33400 + }, + { + "epoch": 2.47, + "learning_rate": 6.690138722865668e-05, + "loss": 8.024, + "step": 33450 + }, + { + "epoch": 2.47, + "learning_rate": 6.685116171610531e-05, + "loss": 9.1605, + "step": 33500 + }, + { + "epoch": 2.47, + "learning_rate": 6.680093620355397e-05, + "loss": 8.3661, + "step": 33550 + }, + { + "epoch": 2.48, + "learning_rate": 6.67507106910026e-05, + "loss": 8.4145, + "step": 33600 + }, + { + "epoch": 2.48, + "learning_rate": 6.670048517845124e-05, + "loss": 7.824, + "step": 33650 + }, + { + "epoch": 2.49, + "learning_rate": 6.66502596658999e-05, + "loss": 9.129, + "step": 33700 + }, + { + "epoch": 2.49, + "learning_rate": 6.660003415334853e-05, + "loss": 9.0876, + "step": 33750 + }, + { + "epoch": 2.49, + "learning_rate": 6.654980864079719e-05, + "loss": 8.6961, + "step": 33800 + }, + { + "epoch": 2.5, + "learning_rate": 6.649958312824582e-05, + "loss": 8.1584, + "step": 33850 + }, + { + "epoch": 2.5, + "learning_rate": 6.644935761569448e-05, + "loss": 8.6587, + "step": 33900 + }, + { + "epoch": 2.5, + "learning_rate": 6.639913210314311e-05, + "loss": 8.1059, + "step": 33950 + }, + { + "epoch": 2.51, + "learning_rate": 6.634890659059176e-05, + "loss": 9.2588, + "step": 34000 + }, + { + "epoch": 2.51, + "learning_rate": 6.629868107804041e-05, + "loss": 8.6443, + "step": 34050 + }, + { + "epoch": 2.52, + "learning_rate": 6.624845556548905e-05, + "loss": 8.8006, + "step": 34100 + }, + { + "epoch": 2.52, + "learning_rate": 6.61982300529377e-05, + "loss": 9.2288, + "step": 34150 + }, + { + "epoch": 2.52, + "learning_rate": 6.614800454038633e-05, + "loss": 9.0328, + "step": 34200 + }, + { + "epoch": 2.53, + "learning_rate": 6.609777902783499e-05, + "loss": 7.8269, + "step": 34250 + }, + { + "epoch": 2.53, + "learning_rate": 6.604755351528362e-05, + "loss": 8.5883, + "step": 34300 + }, + { + "epoch": 2.53, + "learning_rate": 6.599732800273227e-05, + "loss": 9.9388, + "step": 34350 + }, + { + "epoch": 2.54, + "learning_rate": 6.594710249018092e-05, + "loss": 8.6776, + "step": 34400 + }, + { + "epoch": 2.54, + "learning_rate": 6.589687697762956e-05, + "loss": 7.2287, + "step": 34450 + }, + { + "epoch": 2.54, + "learning_rate": 6.58466514650782e-05, + "loss": 7.7042, + "step": 34500 + }, + { + "epoch": 2.55, + "learning_rate": 6.579642595252685e-05, + "loss": 9.0004, + "step": 34550 + }, + { + "epoch": 2.55, + "learning_rate": 6.57462004399755e-05, + "loss": 9.3279, + "step": 34600 + }, + { + "epoch": 2.56, + "learning_rate": 6.569597492742414e-05, + "loss": 8.9144, + "step": 34650 + }, + { + "epoch": 2.56, + "learning_rate": 6.564574941487278e-05, + "loss": 9.3319, + "step": 34700 + }, + { + "epoch": 2.56, + "learning_rate": 6.559552390232143e-05, + "loss": 9.4986, + "step": 34750 + }, + { + "epoch": 2.57, + "learning_rate": 6.554529838977007e-05, + "loss": 9.002, + "step": 34800 + }, + { + "epoch": 2.57, + "learning_rate": 6.549507287721871e-05, + "loss": 8.6061, + "step": 34850 + }, + { + "epoch": 2.57, + "learning_rate": 6.544484736466736e-05, + "loss": 7.4598, + "step": 34900 + }, + { + "epoch": 2.58, + "learning_rate": 6.5394621852116e-05, + "loss": 8.6618, + "step": 34950 + }, + { + "epoch": 2.58, + "learning_rate": 6.534439633956465e-05, + "loss": 9.0226, + "step": 35000 + }, + { + "epoch": 2.59, + "learning_rate": 6.529417082701329e-05, + "loss": 7.9738, + "step": 35050 + }, + { + "epoch": 2.59, + "learning_rate": 6.524394531446193e-05, + "loss": 8.7871, + "step": 35100 + }, + { + "epoch": 2.59, + "learning_rate": 6.519371980191058e-05, + "loss": 8.8744, + "step": 35150 + }, + { + "epoch": 2.6, + "learning_rate": 6.514349428935924e-05, + "loss": 8.3771, + "step": 35200 + }, + { + "epoch": 2.6, + "learning_rate": 6.509326877680787e-05, + "loss": 8.058, + "step": 35250 + }, + { + "epoch": 2.6, + "learning_rate": 6.504304326425652e-05, + "loss": 8.2627, + "step": 35300 + }, + { + "epoch": 2.61, + "learning_rate": 6.499281775170515e-05, + "loss": 8.1643, + "step": 35350 + }, + { + "epoch": 2.61, + "learning_rate": 6.49425922391538e-05, + "loss": 7.888, + "step": 35400 + }, + { + "epoch": 2.61, + "learning_rate": 6.489236672660246e-05, + "loss": 7.9235, + "step": 35450 + }, + { + "epoch": 2.62, + "learning_rate": 6.484214121405109e-05, + "loss": 8.1139, + "step": 35500 + }, + { + "epoch": 2.62, + "learning_rate": 6.479191570149974e-05, + "loss": 8.7467, + "step": 35550 + }, + { + "epoch": 2.63, + "learning_rate": 6.474169018894837e-05, + "loss": 7.4693, + "step": 35600 + }, + { + "epoch": 2.63, + "learning_rate": 6.469146467639703e-05, + "loss": 8.5167, + "step": 35650 + }, + { + "epoch": 2.63, + "learning_rate": 6.464123916384566e-05, + "loss": 9.5274, + "step": 35700 + }, + { + "epoch": 2.64, + "learning_rate": 6.459101365129432e-05, + "loss": 8.9735, + "step": 35750 + }, + { + "epoch": 2.64, + "learning_rate": 6.454078813874296e-05, + "loss": 8.1756, + "step": 35800 + }, + { + "epoch": 2.64, + "learning_rate": 6.449056262619161e-05, + "loss": 7.8084, + "step": 35850 + }, + { + "epoch": 2.65, + "learning_rate": 6.444033711364025e-05, + "loss": 8.2671, + "step": 35900 + }, + { + "epoch": 2.65, + "learning_rate": 6.439011160108888e-05, + "loss": 8.6628, + "step": 35950 + }, + { + "epoch": 2.66, + "learning_rate": 6.433988608853754e-05, + "loss": 9.8654, + "step": 36000 + }, + { + "epoch": 2.66, + "learning_rate": 6.428966057598617e-05, + "loss": 9.104, + "step": 36050 + }, + { + "epoch": 2.66, + "learning_rate": 6.423943506343483e-05, + "loss": 9.4156, + "step": 36100 + }, + { + "epoch": 2.67, + "learning_rate": 6.418920955088347e-05, + "loss": 8.9803, + "step": 36150 + }, + { + "epoch": 2.67, + "learning_rate": 6.413898403833212e-05, + "loss": 8.9584, + "step": 36200 + }, + { + "epoch": 2.67, + "learning_rate": 6.408875852578076e-05, + "loss": 7.3683, + "step": 36250 + }, + { + "epoch": 2.68, + "learning_rate": 6.40385330132294e-05, + "loss": 8.3277, + "step": 36300 + }, + { + "epoch": 2.68, + "learning_rate": 6.398830750067805e-05, + "loss": 9.3236, + "step": 36350 + }, + { + "epoch": 2.68, + "learning_rate": 6.393808198812669e-05, + "loss": 8.6918, + "step": 36400 + }, + { + "epoch": 2.69, + "learning_rate": 6.388785647557534e-05, + "loss": 8.9422, + "step": 36450 + }, + { + "epoch": 2.69, + "learning_rate": 6.383763096302398e-05, + "loss": 8.8438, + "step": 36500 + }, + { + "epoch": 2.7, + "learning_rate": 6.378740545047263e-05, + "loss": 8.7752, + "step": 36550 + }, + { + "epoch": 2.7, + "learning_rate": 6.373717993792127e-05, + "loss": 8.6483, + "step": 36600 + }, + { + "epoch": 2.7, + "learning_rate": 6.368695442536991e-05, + "loss": 8.5753, + "step": 36650 + }, + { + "epoch": 2.71, + "learning_rate": 6.363672891281856e-05, + "loss": 8.1893, + "step": 36700 + }, + { + "epoch": 2.71, + "learning_rate": 6.35865034002672e-05, + "loss": 8.189, + "step": 36750 + }, + { + "epoch": 2.71, + "learning_rate": 6.353627788771585e-05, + "loss": 8.2979, + "step": 36800 + }, + { + "epoch": 2.72, + "learning_rate": 6.348605237516449e-05, + "loss": 8.3904, + "step": 36850 + }, + { + "epoch": 2.72, + "learning_rate": 6.343582686261313e-05, + "loss": 9.3274, + "step": 36900 + }, + { + "epoch": 2.73, + "learning_rate": 6.338560135006179e-05, + "loss": 7.7663, + "step": 36950 + }, + { + "epoch": 2.73, + "learning_rate": 6.333537583751042e-05, + "loss": 8.2105, + "step": 37000 + }, + { + "epoch": 2.73, + "learning_rate": 6.328515032495907e-05, + "loss": 8.035, + "step": 37050 + }, + { + "epoch": 2.74, + "learning_rate": 6.323492481240771e-05, + "loss": 9.5032, + "step": 37100 + }, + { + "epoch": 2.74, + "learning_rate": 6.318469929985635e-05, + "loss": 8.3856, + "step": 37150 + }, + { + "epoch": 2.74, + "learning_rate": 6.3134473787305e-05, + "loss": 8.9941, + "step": 37200 + }, + { + "epoch": 2.75, + "learning_rate": 6.308424827475364e-05, + "loss": 8.3987, + "step": 37250 + }, + { + "epoch": 2.75, + "learning_rate": 6.30340227622023e-05, + "loss": 9.1753, + "step": 37300 + }, + { + "epoch": 2.76, + "learning_rate": 6.298379724965093e-05, + "loss": 7.9557, + "step": 37350 + }, + { + "epoch": 2.76, + "learning_rate": 6.293357173709959e-05, + "loss": 7.725, + "step": 37400 + }, + { + "epoch": 2.76, + "learning_rate": 6.288334622454822e-05, + "loss": 8.0807, + "step": 37450 + }, + { + "epoch": 2.77, + "learning_rate": 6.283312071199688e-05, + "loss": 8.6492, + "step": 37500 + }, + { + "epoch": 2.77, + "learning_rate": 6.27828951994455e-05, + "loss": 8.4716, + "step": 37550 + }, + { + "epoch": 2.77, + "learning_rate": 6.273266968689416e-05, + "loss": 8.7209, + "step": 37600 + }, + { + "epoch": 2.78, + "learning_rate": 6.268244417434281e-05, + "loss": 8.4902, + "step": 37650 + }, + { + "epoch": 2.78, + "learning_rate": 6.263221866179144e-05, + "loss": 7.9589, + "step": 37700 + }, + { + "epoch": 2.78, + "learning_rate": 6.25819931492401e-05, + "loss": 9.3285, + "step": 37750 + }, + { + "epoch": 2.79, + "learning_rate": 6.253176763668873e-05, + "loss": 9.0506, + "step": 37800 + }, + { + "epoch": 2.79, + "learning_rate": 6.248154212413738e-05, + "loss": 7.9992, + "step": 37850 + }, + { + "epoch": 2.8, + "learning_rate": 6.243131661158603e-05, + "loss": 8.029, + "step": 37900 + }, + { + "epoch": 2.8, + "learning_rate": 6.238109109903467e-05, + "loss": 8.6681, + "step": 37950 + }, + { + "epoch": 2.8, + "learning_rate": 6.233086558648332e-05, + "loss": 8.5906, + "step": 38000 + }, + { + "epoch": 2.81, + "learning_rate": 6.228064007393196e-05, + "loss": 10.4719, + "step": 38050 + }, + { + "epoch": 2.81, + "learning_rate": 6.22304145613806e-05, + "loss": 8.2759, + "step": 38100 + }, + { + "epoch": 2.81, + "learning_rate": 6.218018904882925e-05, + "loss": 8.2633, + "step": 38150 + }, + { + "epoch": 2.82, + "learning_rate": 6.212996353627789e-05, + "loss": 8.5218, + "step": 38200 + }, + { + "epoch": 2.82, + "learning_rate": 6.207973802372654e-05, + "loss": 8.0609, + "step": 38250 + }, + { + "epoch": 2.83, + "learning_rate": 6.202951251117518e-05, + "loss": 9.3672, + "step": 38300 + }, + { + "epoch": 2.83, + "learning_rate": 6.197928699862382e-05, + "loss": 10.1768, + "step": 38350 + }, + { + "epoch": 2.83, + "learning_rate": 6.192906148607247e-05, + "loss": 9.4389, + "step": 38400 + }, + { + "epoch": 2.84, + "learning_rate": 6.187883597352111e-05, + "loss": 7.6737, + "step": 38450 + }, + { + "epoch": 2.84, + "learning_rate": 6.182861046096976e-05, + "loss": 9.2337, + "step": 38500 + }, + { + "epoch": 2.84, + "learning_rate": 6.17783849484184e-05, + "loss": 8.7846, + "step": 38550 + }, + { + "epoch": 2.85, + "learning_rate": 6.172815943586704e-05, + "loss": 7.8709, + "step": 38600 + }, + { + "epoch": 2.85, + "learning_rate": 6.167793392331569e-05, + "loss": 8.8688, + "step": 38650 + }, + { + "epoch": 2.85, + "learning_rate": 6.162770841076435e-05, + "loss": 8.4087, + "step": 38700 + }, + { + "epoch": 2.86, + "learning_rate": 6.157748289821298e-05, + "loss": 7.7129, + "step": 38750 + }, + { + "epoch": 2.86, + "learning_rate": 6.152725738566162e-05, + "loss": 9.3196, + "step": 38800 + }, + { + "epoch": 2.87, + "learning_rate": 6.147703187311027e-05, + "loss": 8.8242, + "step": 38850 + }, + { + "epoch": 2.87, + "learning_rate": 6.142680636055891e-05, + "loss": 8.4237, + "step": 38900 + }, + { + "epoch": 2.87, + "learning_rate": 6.137658084800755e-05, + "loss": 8.9383, + "step": 38950 + }, + { + "epoch": 2.88, + "learning_rate": 6.13263553354562e-05, + "loss": 8.3749, + "step": 39000 + }, + { + "epoch": 2.88, + "learning_rate": 6.127612982290485e-05, + "loss": 8.8894, + "step": 39050 + }, + { + "epoch": 2.88, + "learning_rate": 6.122590431035349e-05, + "loss": 8.2975, + "step": 39100 + }, + { + "epoch": 2.89, + "learning_rate": 6.117567879780214e-05, + "loss": 8.0517, + "step": 39150 + }, + { + "epoch": 2.89, + "learning_rate": 6.112545328525077e-05, + "loss": 8.0154, + "step": 39200 + }, + { + "epoch": 2.9, + "learning_rate": 6.107522777269943e-05, + "loss": 8.4887, + "step": 39250 + }, + { + "epoch": 2.9, + "learning_rate": 6.102500226014807e-05, + "loss": 8.7064, + "step": 39300 + }, + { + "epoch": 2.9, + "learning_rate": 6.0974776747596706e-05, + "loss": 9.7375, + "step": 39350 + }, + { + "epoch": 2.91, + "learning_rate": 6.0924551235045357e-05, + "loss": 8.8614, + "step": 39400 + }, + { + "epoch": 2.91, + "learning_rate": 6.0874325722493994e-05, + "loss": 8.302, + "step": 39450 + }, + { + "epoch": 2.91, + "learning_rate": 6.0824100209942645e-05, + "loss": 7.8469, + "step": 39500 + }, + { + "epoch": 2.92, + "learning_rate": 6.077387469739129e-05, + "loss": 9.0706, + "step": 39550 + }, + { + "epoch": 2.92, + "learning_rate": 6.072364918483994e-05, + "loss": 9.1398, + "step": 39600 + }, + { + "epoch": 2.92, + "learning_rate": 6.067342367228858e-05, + "loss": 8.1838, + "step": 39650 + }, + { + "epoch": 2.93, + "learning_rate": 6.062319815973723e-05, + "loss": 9.2303, + "step": 39700 + }, + { + "epoch": 2.93, + "learning_rate": 6.0572972647185865e-05, + "loss": 8.3715, + "step": 39750 + }, + { + "epoch": 2.94, + "learning_rate": 6.0522747134634516e-05, + "loss": 8.409, + "step": 39800 + }, + { + "epoch": 2.94, + "learning_rate": 6.047252162208315e-05, + "loss": 8.6441, + "step": 39850 + }, + { + "epoch": 2.94, + "learning_rate": 6.04222961095318e-05, + "loss": 9.0975, + "step": 39900 + }, + { + "epoch": 2.95, + "learning_rate": 6.037207059698045e-05, + "loss": 8.0691, + "step": 39950 + }, + { + "epoch": 2.95, + "learning_rate": 6.0321845084429085e-05, + "loss": 8.6646, + "step": 40000 + }, + { + "epoch": 2.95, + "eval_loss": 8.163222312927246, + "eval_runtime": 957.6189, + "eval_samples_per_second": 13.678, + "eval_steps_per_second": 3.42, + "eval_wer": 0.22493805384066187, + "step": 40000 + } + ], + "max_steps": 100051, + "num_train_epochs": 8, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-40000/training_args.bin b/checkpoint-40000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dbc064046a36220dd960e955c565bc3e2c9e3abd --- /dev/null +++ b/checkpoint-40000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b64c669f66dd7a2e54d3001ce7e31c26cc60dd58136e8ce90e6055bd0ae15eb +size 3503 diff --git a/checkpoint-60000/optimizer.pt b/checkpoint-60000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ae050385684b89e6c0ca791b8b61f5eaca60225a --- /dev/null +++ b/checkpoint-60000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:406e36deb47741922cd59f748cd1876112106ea059c820e699c269fe0d635c2b +size 5154563651 diff --git a/checkpoint-60000/rng_state.pth b/checkpoint-60000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3e8d6aae184b2c90683b21f4fd5417456d82dcf5 --- /dev/null +++ b/checkpoint-60000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3707b4b4d63eda9f45abb91e6157a5777abe5bcccebdf82df707bae7df65cf9e +size 14503 diff --git a/checkpoint-60000/scheduler.pt b/checkpoint-60000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..585b5d0470a30e8ede34629904bce7b1c7930f99 --- /dev/null +++ b/checkpoint-60000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4edddf9241e66e2708bca7527dec737063f80262825a1b055e50529066c54390 +size 623 diff --git a/checkpoint-60000/stt_en_conformer_transducer_xlarge.nemo b/checkpoint-60000/stt_en_conformer_transducer_xlarge.nemo new file mode 100644 index 0000000000000000000000000000000000000000..f6bab2466cee954a0d980fdd6528795a93bed701 --- /dev/null +++ b/checkpoint-60000/stt_en_conformer_transducer_xlarge.nemo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be893728d43d533cf97573378f9587552441031cf01aa9fdc25c779e733140f1 +size 2577971200 diff --git a/checkpoint-60000/trainer_state.json b/checkpoint-60000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..402e719b1b483dc4096b2a1e98f5671043bcb4ce --- /dev/null +++ b/checkpoint-60000/trainer_state.json @@ -0,0 +1,7243 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.4257579110422665, + "global_step": 60000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 178.9465, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 164.9707, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 3e-05, + "loss": 142.2782, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4e-05, + "loss": 121.5122, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 91.8622, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 6e-05, + "loss": 82.2062, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 7e-05, + "loss": 72.6893, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 8e-05, + "loss": 71.8709, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 9e-05, + "loss": 69.9995, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001, + "loss": 70.6458, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 9.994977448744865e-05, + "loss": 73.9929, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 9.989954897489729e-05, + "loss": 66.52, + "step": 600 + }, + { + "epoch": 0.05, + "learning_rate": 9.984932346234594e-05, + "loss": 65.8947, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 9.979909794979458e-05, + "loss": 62.5809, + "step": 700 + }, + { + "epoch": 0.06, + "learning_rate": 9.974887243724323e-05, + "loss": 61.212, + "step": 750 + }, + { + "epoch": 0.06, + "learning_rate": 9.969864692469187e-05, + "loss": 68.2408, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 9.964842141214051e-05, + "loss": 61.5308, + "step": 850 + }, + { + "epoch": 0.07, + "learning_rate": 9.959819589958916e-05, + "loss": 58.9116, + "step": 900 + }, + { + "epoch": 0.07, + "learning_rate": 9.95479703870378e-05, + "loss": 60.0702, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 9.949774487448646e-05, + "loss": 57.6135, + "step": 1000 + }, + { + "epoch": 0.08, + "learning_rate": 9.944751936193509e-05, + "loss": 50.9231, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 9.939729384938373e-05, + "loss": 51.187, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 9.934706833683238e-05, + "loss": 52.1127, + "step": 1150 + }, + { + "epoch": 0.09, + "learning_rate": 9.929684282428102e-05, + "loss": 47.4608, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 9.924661731172968e-05, + "loss": 51.6108, + "step": 1250 + }, + { + "epoch": 0.1, + "learning_rate": 9.919639179917831e-05, + "loss": 46.5874, + "step": 1300 + }, + { + "epoch": 0.1, + "learning_rate": 9.914616628662697e-05, + "loss": 41.4706, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 9.90959407740756e-05, + "loss": 43.7544, + "step": 1400 + }, + { + "epoch": 0.11, + "learning_rate": 9.904571526152426e-05, + "loss": 44.6039, + "step": 1450 + }, + { + "epoch": 0.11, + "learning_rate": 9.899548974897289e-05, + "loss": 41.4384, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 9.894526423642154e-05, + "loss": 42.8289, + "step": 1550 + }, + { + "epoch": 0.12, + "learning_rate": 9.889503872387019e-05, + "loss": 39.9726, + "step": 1600 + }, + { + "epoch": 0.12, + "learning_rate": 9.884481321131882e-05, + "loss": 43.9533, + "step": 1650 + }, + { + "epoch": 0.13, + "learning_rate": 9.879458769876748e-05, + "loss": 38.7605, + "step": 1700 + }, + { + "epoch": 0.13, + "learning_rate": 9.87443621862161e-05, + "loss": 39.5425, + "step": 1750 + }, + { + "epoch": 0.13, + "learning_rate": 9.869413667366476e-05, + "loss": 37.588, + "step": 1800 + }, + { + "epoch": 0.14, + "learning_rate": 9.86439111611134e-05, + "loss": 39.7744, + "step": 1850 + }, + { + "epoch": 0.14, + "learning_rate": 9.859368564856205e-05, + "loss": 38.2154, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 9.85434601360107e-05, + "loss": 35.0806, + "step": 1950 + }, + { + "epoch": 0.15, + "learning_rate": 9.849323462345934e-05, + "loss": 39.061, + "step": 2000 + }, + { + "epoch": 0.15, + "learning_rate": 9.844300911090798e-05, + "loss": 35.1544, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 9.839278359835663e-05, + "loss": 38.123, + "step": 2100 + }, + { + "epoch": 0.16, + "learning_rate": 9.834255808580527e-05, + "loss": 33.1144, + "step": 2150 + }, + { + "epoch": 0.16, + "learning_rate": 9.829233257325392e-05, + "loss": 34.3476, + "step": 2200 + }, + { + "epoch": 0.17, + "learning_rate": 9.824210706070256e-05, + "loss": 29.5665, + "step": 2250 + }, + { + "epoch": 0.17, + "learning_rate": 9.81918815481512e-05, + "loss": 35.8756, + "step": 2300 + }, + { + "epoch": 0.17, + "learning_rate": 9.814165603559985e-05, + "loss": 37.2579, + "step": 2350 + }, + { + "epoch": 0.18, + "learning_rate": 9.809143052304849e-05, + "loss": 33.6245, + "step": 2400 + }, + { + "epoch": 0.18, + "learning_rate": 9.804120501049714e-05, + "loss": 35.6543, + "step": 2450 + }, + { + "epoch": 0.18, + "learning_rate": 9.799097949794578e-05, + "loss": 36.7847, + "step": 2500 + }, + { + "epoch": 0.19, + "learning_rate": 9.794075398539442e-05, + "loss": 33.463, + "step": 2550 + }, + { + "epoch": 0.19, + "learning_rate": 9.789052847284307e-05, + "loss": 32.2215, + "step": 2600 + }, + { + "epoch": 0.2, + "learning_rate": 9.784030296029171e-05, + "loss": 33.4301, + "step": 2650 + }, + { + "epoch": 0.2, + "learning_rate": 9.779007744774036e-05, + "loss": 29.9579, + "step": 2700 + }, + { + "epoch": 0.2, + "learning_rate": 9.773985193518901e-05, + "loss": 31.9141, + "step": 2750 + }, + { + "epoch": 0.21, + "learning_rate": 9.768962642263764e-05, + "loss": 33.2049, + "step": 2800 + }, + { + "epoch": 0.21, + "learning_rate": 9.763940091008629e-05, + "loss": 32.8774, + "step": 2850 + }, + { + "epoch": 0.21, + "learning_rate": 9.758917539753493e-05, + "loss": 29.0858, + "step": 2900 + }, + { + "epoch": 0.22, + "learning_rate": 9.753894988498358e-05, + "loss": 30.1145, + "step": 2950 + }, + { + "epoch": 0.22, + "learning_rate": 9.748872437243222e-05, + "loss": 27.6986, + "step": 3000 + }, + { + "epoch": 0.22, + "learning_rate": 9.743849885988087e-05, + "loss": 31.7807, + "step": 3050 + }, + { + "epoch": 0.23, + "learning_rate": 9.738827334732952e-05, + "loss": 30.5108, + "step": 3100 + }, + { + "epoch": 0.23, + "learning_rate": 9.733804783477815e-05, + "loss": 31.0909, + "step": 3150 + }, + { + "epoch": 0.24, + "learning_rate": 9.728782232222681e-05, + "loss": 27.9057, + "step": 3200 + }, + { + "epoch": 0.24, + "learning_rate": 9.723759680967544e-05, + "loss": 29.7323, + "step": 3250 + }, + { + "epoch": 0.24, + "learning_rate": 9.71873712971241e-05, + "loss": 29.7527, + "step": 3300 + }, + { + "epoch": 0.25, + "learning_rate": 9.713714578457273e-05, + "loss": 29.1442, + "step": 3350 + }, + { + "epoch": 0.25, + "learning_rate": 9.708692027202137e-05, + "loss": 30.8906, + "step": 3400 + }, + { + "epoch": 0.25, + "learning_rate": 9.703669475947003e-05, + "loss": 26.8419, + "step": 3450 + }, + { + "epoch": 0.26, + "learning_rate": 9.698646924691866e-05, + "loss": 29.2181, + "step": 3500 + }, + { + "epoch": 0.26, + "learning_rate": 9.693624373436732e-05, + "loss": 27.6549, + "step": 3550 + }, + { + "epoch": 0.27, + "learning_rate": 9.688601822181595e-05, + "loss": 34.0701, + "step": 3600 + }, + { + "epoch": 0.27, + "learning_rate": 9.683579270926461e-05, + "loss": 24.7487, + "step": 3650 + }, + { + "epoch": 0.27, + "learning_rate": 9.678556719671325e-05, + "loss": 30.0266, + "step": 3700 + }, + { + "epoch": 0.28, + "learning_rate": 9.67353416841619e-05, + "loss": 25.5011, + "step": 3750 + }, + { + "epoch": 0.28, + "learning_rate": 9.668511617161054e-05, + "loss": 26.1437, + "step": 3800 + }, + { + "epoch": 0.28, + "learning_rate": 9.663489065905918e-05, + "loss": 23.2303, + "step": 3850 + }, + { + "epoch": 0.29, + "learning_rate": 9.658466514650783e-05, + "loss": 26.357, + "step": 3900 + }, + { + "epoch": 0.29, + "learning_rate": 9.653443963395646e-05, + "loss": 27.2201, + "step": 3950 + }, + { + "epoch": 0.3, + "learning_rate": 9.648421412140512e-05, + "loss": 25.5695, + "step": 4000 + }, + { + "epoch": 0.3, + "learning_rate": 9.643398860885376e-05, + "loss": 24.8346, + "step": 4050 + }, + { + "epoch": 0.3, + "learning_rate": 9.63837630963024e-05, + "loss": 22.3957, + "step": 4100 + }, + { + "epoch": 0.31, + "learning_rate": 9.633353758375105e-05, + "loss": 24.9532, + "step": 4150 + }, + { + "epoch": 0.31, + "learning_rate": 9.628331207119969e-05, + "loss": 23.1574, + "step": 4200 + }, + { + "epoch": 0.31, + "learning_rate": 9.623308655864834e-05, + "loss": 23.7018, + "step": 4250 + }, + { + "epoch": 0.32, + "learning_rate": 9.618286104609698e-05, + "loss": 25.1433, + "step": 4300 + }, + { + "epoch": 0.32, + "learning_rate": 9.613263553354562e-05, + "loss": 25.0571, + "step": 4350 + }, + { + "epoch": 0.32, + "learning_rate": 9.608241002099427e-05, + "loss": 24.2231, + "step": 4400 + }, + { + "epoch": 0.33, + "learning_rate": 9.603218450844291e-05, + "loss": 23.0983, + "step": 4450 + }, + { + "epoch": 0.33, + "learning_rate": 9.598195899589156e-05, + "loss": 25.0078, + "step": 4500 + }, + { + "epoch": 0.34, + "learning_rate": 9.59317334833402e-05, + "loss": 20.6933, + "step": 4550 + }, + { + "epoch": 0.34, + "learning_rate": 9.588150797078884e-05, + "loss": 23.6196, + "step": 4600 + }, + { + "epoch": 0.34, + "learning_rate": 9.583128245823749e-05, + "loss": 25.2331, + "step": 4650 + }, + { + "epoch": 0.35, + "learning_rate": 9.578105694568613e-05, + "loss": 24.7932, + "step": 4700 + }, + { + "epoch": 0.35, + "learning_rate": 9.573083143313478e-05, + "loss": 24.3586, + "step": 4750 + }, + { + "epoch": 0.35, + "learning_rate": 9.568060592058342e-05, + "loss": 22.7161, + "step": 4800 + }, + { + "epoch": 0.36, + "learning_rate": 9.563038040803208e-05, + "loss": 22.4188, + "step": 4850 + }, + { + "epoch": 0.36, + "learning_rate": 9.558015489548071e-05, + "loss": 21.6516, + "step": 4900 + }, + { + "epoch": 0.37, + "learning_rate": 9.552992938292937e-05, + "loss": 21.78, + "step": 4950 + }, + { + "epoch": 0.37, + "learning_rate": 9.5479703870378e-05, + "loss": 21.0172, + "step": 5000 + }, + { + "epoch": 0.37, + "learning_rate": 9.542947835782665e-05, + "loss": 22.4624, + "step": 5050 + }, + { + "epoch": 0.38, + "learning_rate": 9.537925284527528e-05, + "loss": 23.6615, + "step": 5100 + }, + { + "epoch": 0.38, + "learning_rate": 9.532902733272393e-05, + "loss": 21.8091, + "step": 5150 + }, + { + "epoch": 0.38, + "learning_rate": 9.527880182017259e-05, + "loss": 21.4173, + "step": 5200 + }, + { + "epoch": 0.39, + "learning_rate": 9.522857630762122e-05, + "loss": 20.5415, + "step": 5250 + }, + { + "epoch": 0.39, + "learning_rate": 9.517835079506987e-05, + "loss": 21.0639, + "step": 5300 + }, + { + "epoch": 0.39, + "learning_rate": 9.51281252825185e-05, + "loss": 21.6078, + "step": 5350 + }, + { + "epoch": 0.4, + "learning_rate": 9.507789976996716e-05, + "loss": 19.4142, + "step": 5400 + }, + { + "epoch": 0.4, + "learning_rate": 9.50276742574158e-05, + "loss": 20.2504, + "step": 5450 + }, + { + "epoch": 0.41, + "learning_rate": 9.497744874486445e-05, + "loss": 23.8683, + "step": 5500 + }, + { + "epoch": 0.41, + "learning_rate": 9.49272232323131e-05, + "loss": 19.7559, + "step": 5550 + }, + { + "epoch": 0.41, + "learning_rate": 9.487699771976174e-05, + "loss": 21.1743, + "step": 5600 + }, + { + "epoch": 0.42, + "learning_rate": 9.482677220721038e-05, + "loss": 21.1908, + "step": 5650 + }, + { + "epoch": 0.42, + "learning_rate": 9.477654669465901e-05, + "loss": 20.9591, + "step": 5700 + }, + { + "epoch": 0.42, + "learning_rate": 9.472632118210767e-05, + "loss": 20.9036, + "step": 5750 + }, + { + "epoch": 0.43, + "learning_rate": 9.46760956695563e-05, + "loss": 22.249, + "step": 5800 + }, + { + "epoch": 0.43, + "learning_rate": 9.462587015700496e-05, + "loss": 19.1093, + "step": 5850 + }, + { + "epoch": 0.44, + "learning_rate": 9.45756446444536e-05, + "loss": 21.2714, + "step": 5900 + }, + { + "epoch": 0.44, + "learning_rate": 9.452541913190225e-05, + "loss": 21.3794, + "step": 5950 + }, + { + "epoch": 0.44, + "learning_rate": 9.447519361935089e-05, + "loss": 20.0326, + "step": 6000 + }, + { + "epoch": 0.45, + "learning_rate": 9.442496810679954e-05, + "loss": 19.8004, + "step": 6050 + }, + { + "epoch": 0.45, + "learning_rate": 9.437474259424818e-05, + "loss": 19.0229, + "step": 6100 + }, + { + "epoch": 0.45, + "learning_rate": 9.432451708169682e-05, + "loss": 17.6587, + "step": 6150 + }, + { + "epoch": 0.46, + "learning_rate": 9.427429156914547e-05, + "loss": 21.9247, + "step": 6200 + }, + { + "epoch": 0.46, + "learning_rate": 9.422406605659411e-05, + "loss": 19.743, + "step": 6250 + }, + { + "epoch": 0.46, + "learning_rate": 9.417384054404276e-05, + "loss": 22.9746, + "step": 6300 + }, + { + "epoch": 0.47, + "learning_rate": 9.41236150314914e-05, + "loss": 19.6693, + "step": 6350 + }, + { + "epoch": 0.47, + "learning_rate": 9.407338951894004e-05, + "loss": 19.1141, + "step": 6400 + }, + { + "epoch": 0.48, + "learning_rate": 9.402316400638869e-05, + "loss": 18.3847, + "step": 6450 + }, + { + "epoch": 0.48, + "learning_rate": 9.397293849383733e-05, + "loss": 18.9357, + "step": 6500 + }, + { + "epoch": 0.48, + "learning_rate": 9.392271298128598e-05, + "loss": 18.9316, + "step": 6550 + }, + { + "epoch": 0.49, + "learning_rate": 9.387248746873462e-05, + "loss": 20.9141, + "step": 6600 + }, + { + "epoch": 0.49, + "learning_rate": 9.382226195618326e-05, + "loss": 18.7472, + "step": 6650 + }, + { + "epoch": 0.49, + "learning_rate": 9.377203644363192e-05, + "loss": 18.8577, + "step": 6700 + }, + { + "epoch": 0.5, + "learning_rate": 9.372181093108055e-05, + "loss": 17.8061, + "step": 6750 + }, + { + "epoch": 0.5, + "learning_rate": 9.36715854185292e-05, + "loss": 19.4687, + "step": 6800 + }, + { + "epoch": 0.51, + "learning_rate": 9.362135990597784e-05, + "loss": 19.5103, + "step": 6850 + }, + { + "epoch": 0.51, + "learning_rate": 9.357113439342648e-05, + "loss": 18.5319, + "step": 6900 + }, + { + "epoch": 0.51, + "learning_rate": 9.352090888087514e-05, + "loss": 20.16, + "step": 6950 + }, + { + "epoch": 0.52, + "learning_rate": 9.347068336832377e-05, + "loss": 18.1913, + "step": 7000 + }, + { + "epoch": 0.52, + "learning_rate": 9.342045785577243e-05, + "loss": 21.341, + "step": 7050 + }, + { + "epoch": 0.52, + "learning_rate": 9.337023234322106e-05, + "loss": 16.7701, + "step": 7100 + }, + { + "epoch": 0.53, + "learning_rate": 9.332000683066972e-05, + "loss": 18.045, + "step": 7150 + }, + { + "epoch": 0.53, + "learning_rate": 9.326978131811835e-05, + "loss": 16.0393, + "step": 7200 + }, + { + "epoch": 0.53, + "learning_rate": 9.3219555805567e-05, + "loss": 17.4833, + "step": 7250 + }, + { + "epoch": 0.54, + "learning_rate": 9.316933029301565e-05, + "loss": 17.3978, + "step": 7300 + }, + { + "epoch": 0.54, + "learning_rate": 9.31191047804643e-05, + "loss": 18.2649, + "step": 7350 + }, + { + "epoch": 0.55, + "learning_rate": 9.306887926791294e-05, + "loss": 16.3891, + "step": 7400 + }, + { + "epoch": 0.55, + "learning_rate": 9.301865375536157e-05, + "loss": 21.4399, + "step": 7450 + }, + { + "epoch": 0.55, + "learning_rate": 9.296842824281023e-05, + "loss": 16.3082, + "step": 7500 + }, + { + "epoch": 0.56, + "learning_rate": 9.291820273025886e-05, + "loss": 14.8713, + "step": 7550 + }, + { + "epoch": 0.56, + "learning_rate": 9.286797721770751e-05, + "loss": 16.3099, + "step": 7600 + }, + { + "epoch": 0.56, + "learning_rate": 9.281775170515616e-05, + "loss": 17.8771, + "step": 7650 + }, + { + "epoch": 0.57, + "learning_rate": 9.27675261926048e-05, + "loss": 17.1421, + "step": 7700 + }, + { + "epoch": 0.57, + "learning_rate": 9.271730068005345e-05, + "loss": 16.6478, + "step": 7750 + }, + { + "epoch": 0.58, + "learning_rate": 9.266707516750209e-05, + "loss": 15.3247, + "step": 7800 + }, + { + "epoch": 0.58, + "learning_rate": 9.261684965495073e-05, + "loss": 17.6577, + "step": 7850 + }, + { + "epoch": 0.58, + "learning_rate": 9.256662414239938e-05, + "loss": 18.8549, + "step": 7900 + }, + { + "epoch": 0.59, + "learning_rate": 9.251639862984802e-05, + "loss": 17.4187, + "step": 7950 + }, + { + "epoch": 0.59, + "learning_rate": 9.246617311729667e-05, + "loss": 15.6643, + "step": 8000 + }, + { + "epoch": 0.59, + "learning_rate": 9.241594760474531e-05, + "loss": 17.1987, + "step": 8050 + }, + { + "epoch": 0.6, + "learning_rate": 9.236572209219396e-05, + "loss": 18.1712, + "step": 8100 + }, + { + "epoch": 0.6, + "learning_rate": 9.23154965796426e-05, + "loss": 15.8015, + "step": 8150 + }, + { + "epoch": 0.6, + "learning_rate": 9.226527106709124e-05, + "loss": 19.064, + "step": 8200 + }, + { + "epoch": 0.61, + "learning_rate": 9.221504555453989e-05, + "loss": 18.2748, + "step": 8250 + }, + { + "epoch": 0.61, + "learning_rate": 9.216482004198853e-05, + "loss": 15.0679, + "step": 8300 + }, + { + "epoch": 0.62, + "learning_rate": 9.211459452943718e-05, + "loss": 17.995, + "step": 8350 + }, + { + "epoch": 0.62, + "learning_rate": 9.206436901688582e-05, + "loss": 17.467, + "step": 8400 + }, + { + "epoch": 0.62, + "learning_rate": 9.201414350433448e-05, + "loss": 18.6665, + "step": 8450 + }, + { + "epoch": 0.63, + "learning_rate": 9.196391799178311e-05, + "loss": 17.2848, + "step": 8500 + }, + { + "epoch": 0.63, + "learning_rate": 9.191369247923175e-05, + "loss": 14.4767, + "step": 8550 + }, + { + "epoch": 0.63, + "learning_rate": 9.18634669666804e-05, + "loss": 17.5444, + "step": 8600 + }, + { + "epoch": 0.64, + "learning_rate": 9.181324145412904e-05, + "loss": 14.4661, + "step": 8650 + }, + { + "epoch": 0.64, + "learning_rate": 9.176301594157768e-05, + "loss": 16.3339, + "step": 8700 + }, + { + "epoch": 0.65, + "learning_rate": 9.171279042902633e-05, + "loss": 17.5122, + "step": 8750 + }, + { + "epoch": 0.65, + "learning_rate": 9.166256491647499e-05, + "loss": 16.7631, + "step": 8800 + }, + { + "epoch": 0.65, + "learning_rate": 9.161233940392362e-05, + "loss": 16.5193, + "step": 8850 + }, + { + "epoch": 0.66, + "learning_rate": 9.156211389137227e-05, + "loss": 17.8364, + "step": 8900 + }, + { + "epoch": 0.66, + "learning_rate": 9.15118883788209e-05, + "loss": 16.2916, + "step": 8950 + }, + { + "epoch": 0.66, + "learning_rate": 9.146166286626956e-05, + "loss": 14.1719, + "step": 9000 + }, + { + "epoch": 0.67, + "learning_rate": 9.141143735371819e-05, + "loss": 18.2987, + "step": 9050 + }, + { + "epoch": 0.67, + "learning_rate": 9.136121184116684e-05, + "loss": 17.4248, + "step": 9100 + }, + { + "epoch": 0.67, + "learning_rate": 9.13109863286155e-05, + "loss": 16.1862, + "step": 9150 + }, + { + "epoch": 0.68, + "learning_rate": 9.126076081606412e-05, + "loss": 16.3134, + "step": 9200 + }, + { + "epoch": 0.68, + "learning_rate": 9.121053530351278e-05, + "loss": 14.9158, + "step": 9250 + }, + { + "epoch": 0.69, + "learning_rate": 9.116030979096141e-05, + "loss": 15.2504, + "step": 9300 + }, + { + "epoch": 0.69, + "learning_rate": 9.111008427841007e-05, + "loss": 14.1967, + "step": 9350 + }, + { + "epoch": 0.69, + "learning_rate": 9.105985876585871e-05, + "loss": 17.3165, + "step": 9400 + }, + { + "epoch": 0.7, + "learning_rate": 9.100963325330736e-05, + "loss": 14.5912, + "step": 9450 + }, + { + "epoch": 0.7, + "learning_rate": 9.0959407740756e-05, + "loss": 17.5593, + "step": 9500 + }, + { + "epoch": 0.7, + "learning_rate": 9.090918222820465e-05, + "loss": 16.3421, + "step": 9550 + }, + { + "epoch": 0.71, + "learning_rate": 9.085895671565329e-05, + "loss": 16.2821, + "step": 9600 + }, + { + "epoch": 0.71, + "learning_rate": 9.080873120310192e-05, + "loss": 16.4985, + "step": 9650 + }, + { + "epoch": 0.72, + "learning_rate": 9.075850569055058e-05, + "loss": 16.1138, + "step": 9700 + }, + { + "epoch": 0.72, + "learning_rate": 9.070828017799922e-05, + "loss": 16.3997, + "step": 9750 + }, + { + "epoch": 0.72, + "learning_rate": 9.065805466544787e-05, + "loss": 15.518, + "step": 9800 + }, + { + "epoch": 0.73, + "learning_rate": 9.060782915289651e-05, + "loss": 13.8424, + "step": 9850 + }, + { + "epoch": 0.73, + "learning_rate": 9.055760364034515e-05, + "loss": 15.0784, + "step": 9900 + }, + { + "epoch": 0.73, + "learning_rate": 9.05073781277938e-05, + "loss": 14.0163, + "step": 9950 + }, + { + "epoch": 0.74, + "learning_rate": 9.045715261524244e-05, + "loss": 16.7863, + "step": 10000 + }, + { + "epoch": 0.74, + "learning_rate": 9.040692710269109e-05, + "loss": 13.6715, + "step": 10050 + }, + { + "epoch": 0.75, + "learning_rate": 9.035670159013973e-05, + "loss": 15.1071, + "step": 10100 + }, + { + "epoch": 0.75, + "learning_rate": 9.030647607758837e-05, + "loss": 14.2658, + "step": 10150 + }, + { + "epoch": 0.75, + "learning_rate": 9.025625056503703e-05, + "loss": 15.1115, + "step": 10200 + }, + { + "epoch": 0.76, + "learning_rate": 9.020602505248566e-05, + "loss": 14.028, + "step": 10250 + }, + { + "epoch": 0.76, + "learning_rate": 9.015579953993431e-05, + "loss": 13.3066, + "step": 10300 + }, + { + "epoch": 0.76, + "learning_rate": 9.010557402738295e-05, + "loss": 14.1185, + "step": 10350 + }, + { + "epoch": 0.77, + "learning_rate": 9.00553485148316e-05, + "loss": 14.061, + "step": 10400 + }, + { + "epoch": 0.77, + "learning_rate": 9.000512300228024e-05, + "loss": 15.2439, + "step": 10450 + }, + { + "epoch": 0.77, + "learning_rate": 8.995489748972888e-05, + "loss": 13.3617, + "step": 10500 + }, + { + "epoch": 0.78, + "learning_rate": 8.990467197717754e-05, + "loss": 14.5514, + "step": 10550 + }, + { + "epoch": 0.78, + "learning_rate": 8.985444646462617e-05, + "loss": 15.2426, + "step": 10600 + }, + { + "epoch": 0.79, + "learning_rate": 8.980422095207483e-05, + "loss": 16.6418, + "step": 10650 + }, + { + "epoch": 0.79, + "learning_rate": 8.975399543952346e-05, + "loss": 13.3146, + "step": 10700 + }, + { + "epoch": 0.79, + "learning_rate": 8.970376992697212e-05, + "loss": 14.9333, + "step": 10750 + }, + { + "epoch": 0.8, + "learning_rate": 8.965354441442075e-05, + "loss": 14.4502, + "step": 10800 + }, + { + "epoch": 0.8, + "learning_rate": 8.960331890186939e-05, + "loss": 14.7886, + "step": 10850 + }, + { + "epoch": 0.8, + "learning_rate": 8.955309338931805e-05, + "loss": 15.0266, + "step": 10900 + }, + { + "epoch": 0.81, + "learning_rate": 8.950286787676668e-05, + "loss": 14.543, + "step": 10950 + }, + { + "epoch": 0.81, + "learning_rate": 8.945264236421534e-05, + "loss": 15.8078, + "step": 11000 + }, + { + "epoch": 0.82, + "learning_rate": 8.940241685166397e-05, + "loss": 13.6052, + "step": 11050 + }, + { + "epoch": 0.82, + "learning_rate": 8.935219133911263e-05, + "loss": 14.2995, + "step": 11100 + }, + { + "epoch": 0.82, + "learning_rate": 8.930196582656126e-05, + "loss": 15.732, + "step": 11150 + }, + { + "epoch": 0.83, + "learning_rate": 8.925174031400991e-05, + "loss": 14.0573, + "step": 11200 + }, + { + "epoch": 0.83, + "learning_rate": 8.920151480145856e-05, + "loss": 17.5941, + "step": 11250 + }, + { + "epoch": 0.83, + "learning_rate": 8.91512892889072e-05, + "loss": 14.7829, + "step": 11300 + }, + { + "epoch": 0.84, + "learning_rate": 8.910106377635585e-05, + "loss": 14.6669, + "step": 11350 + }, + { + "epoch": 0.84, + "learning_rate": 8.905083826380448e-05, + "loss": 14.3315, + "step": 11400 + }, + { + "epoch": 0.84, + "learning_rate": 8.900061275125313e-05, + "loss": 14.2639, + "step": 11450 + }, + { + "epoch": 0.85, + "learning_rate": 8.895038723870176e-05, + "loss": 14.3226, + "step": 11500 + }, + { + "epoch": 0.85, + "learning_rate": 8.890016172615042e-05, + "loss": 14.4975, + "step": 11550 + }, + { + "epoch": 0.86, + "learning_rate": 8.884993621359907e-05, + "loss": 14.8436, + "step": 11600 + }, + { + "epoch": 0.86, + "learning_rate": 8.879971070104771e-05, + "loss": 13.8481, + "step": 11650 + }, + { + "epoch": 0.86, + "learning_rate": 8.874948518849635e-05, + "loss": 12.8151, + "step": 11700 + }, + { + "epoch": 0.87, + "learning_rate": 8.8699259675945e-05, + "loss": 13.1659, + "step": 11750 + }, + { + "epoch": 0.87, + "learning_rate": 8.864903416339364e-05, + "loss": 15.0919, + "step": 11800 + }, + { + "epoch": 0.87, + "learning_rate": 8.859880865084229e-05, + "loss": 14.4382, + "step": 11850 + }, + { + "epoch": 0.88, + "learning_rate": 8.854858313829093e-05, + "loss": 14.0989, + "step": 11900 + }, + { + "epoch": 0.88, + "learning_rate": 8.849835762573957e-05, + "loss": 14.5763, + "step": 11950 + }, + { + "epoch": 0.89, + "learning_rate": 8.844813211318822e-05, + "loss": 13.4144, + "step": 12000 + }, + { + "epoch": 0.89, + "learning_rate": 8.839790660063686e-05, + "loss": 15.6018, + "step": 12050 + }, + { + "epoch": 0.89, + "learning_rate": 8.83476810880855e-05, + "loss": 14.7849, + "step": 12100 + }, + { + "epoch": 0.9, + "learning_rate": 8.829745557553415e-05, + "loss": 14.441, + "step": 12150 + }, + { + "epoch": 0.9, + "learning_rate": 8.82472300629828e-05, + "loss": 14.2135, + "step": 12200 + }, + { + "epoch": 0.9, + "learning_rate": 8.819700455043144e-05, + "loss": 17.1245, + "step": 12250 + }, + { + "epoch": 0.91, + "learning_rate": 8.814677903788008e-05, + "loss": 14.6629, + "step": 12300 + }, + { + "epoch": 0.91, + "learning_rate": 8.809655352532873e-05, + "loss": 16.6715, + "step": 12350 + }, + { + "epoch": 0.91, + "learning_rate": 8.804632801277738e-05, + "loss": 13.0133, + "step": 12400 + }, + { + "epoch": 0.92, + "learning_rate": 8.799610250022601e-05, + "loss": 14.1551, + "step": 12450 + }, + { + "epoch": 0.92, + "learning_rate": 8.794587698767466e-05, + "loss": 14.019, + "step": 12500 + }, + { + "epoch": 0.93, + "learning_rate": 8.78956514751233e-05, + "loss": 14.4279, + "step": 12550 + }, + { + "epoch": 0.93, + "learning_rate": 8.784542596257195e-05, + "loss": 12.5293, + "step": 12600 + }, + { + "epoch": 0.93, + "learning_rate": 8.77952004500206e-05, + "loss": 15.0403, + "step": 12650 + }, + { + "epoch": 0.94, + "learning_rate": 8.774497493746924e-05, + "loss": 13.8193, + "step": 12700 + }, + { + "epoch": 0.94, + "learning_rate": 8.769474942491789e-05, + "loss": 13.1564, + "step": 12750 + }, + { + "epoch": 0.94, + "learning_rate": 8.764452391236652e-05, + "loss": 14.6415, + "step": 12800 + }, + { + "epoch": 0.95, + "learning_rate": 8.759429839981518e-05, + "loss": 12.2339, + "step": 12850 + }, + { + "epoch": 0.95, + "learning_rate": 8.754407288726381e-05, + "loss": 12.1604, + "step": 12900 + }, + { + "epoch": 0.96, + "learning_rate": 8.749384737471247e-05, + "loss": 15.4939, + "step": 12950 + }, + { + "epoch": 0.96, + "learning_rate": 8.744362186216111e-05, + "loss": 13.9713, + "step": 13000 + }, + { + "epoch": 0.96, + "learning_rate": 8.739339634960976e-05, + "loss": 14.0986, + "step": 13050 + }, + { + "epoch": 0.97, + "learning_rate": 8.73431708370584e-05, + "loss": 13.6334, + "step": 13100 + }, + { + "epoch": 0.97, + "learning_rate": 8.729294532450703e-05, + "loss": 13.5201, + "step": 13150 + }, + { + "epoch": 0.97, + "learning_rate": 8.724271981195569e-05, + "loss": 14.3793, + "step": 13200 + }, + { + "epoch": 0.98, + "learning_rate": 8.719249429940432e-05, + "loss": 13.1741, + "step": 13250 + }, + { + "epoch": 0.98, + "learning_rate": 8.714226878685298e-05, + "loss": 11.7782, + "step": 13300 + }, + { + "epoch": 0.98, + "learning_rate": 8.709204327430162e-05, + "loss": 12.2758, + "step": 13350 + }, + { + "epoch": 0.99, + "learning_rate": 8.704181776175027e-05, + "loss": 13.1723, + "step": 13400 + }, + { + "epoch": 0.99, + "learning_rate": 8.699159224919891e-05, + "loss": 14.0858, + "step": 13450 + }, + { + "epoch": 1.0, + "learning_rate": 8.694136673664755e-05, + "loss": 11.2836, + "step": 13500 + }, + { + "epoch": 1.0, + "learning_rate": 8.68911412240962e-05, + "loss": 15.7226, + "step": 13550 + }, + { + "epoch": 1.0, + "learning_rate": 8.684091571154484e-05, + "loss": 15.8889, + "step": 13600 + }, + { + "epoch": 1.01, + "learning_rate": 8.679069019899349e-05, + "loss": 12.2185, + "step": 13650 + }, + { + "epoch": 1.01, + "learning_rate": 8.674046468644213e-05, + "loss": 11.4647, + "step": 13700 + }, + { + "epoch": 1.01, + "learning_rate": 8.669023917389077e-05, + "loss": 13.1238, + "step": 13750 + }, + { + "epoch": 1.02, + "learning_rate": 8.664001366133942e-05, + "loss": 11.909, + "step": 13800 + }, + { + "epoch": 1.02, + "learning_rate": 8.658978814878806e-05, + "loss": 12.5478, + "step": 13850 + }, + { + "epoch": 1.03, + "learning_rate": 8.65395626362367e-05, + "loss": 13.017, + "step": 13900 + }, + { + "epoch": 1.03, + "learning_rate": 8.648933712368535e-05, + "loss": 12.9134, + "step": 13950 + }, + { + "epoch": 1.03, + "learning_rate": 8.6439111611134e-05, + "loss": 13.3485, + "step": 14000 + }, + { + "epoch": 1.04, + "learning_rate": 8.638888609858264e-05, + "loss": 11.4706, + "step": 14050 + }, + { + "epoch": 1.04, + "learning_rate": 8.633866058603128e-05, + "loss": 11.1063, + "step": 14100 + }, + { + "epoch": 1.04, + "learning_rate": 8.628843507347994e-05, + "loss": 12.7408, + "step": 14150 + }, + { + "epoch": 1.05, + "learning_rate": 8.623820956092857e-05, + "loss": 12.0689, + "step": 14200 + }, + { + "epoch": 1.05, + "learning_rate": 8.618798404837721e-05, + "loss": 11.0724, + "step": 14250 + }, + { + "epoch": 1.05, + "learning_rate": 8.613775853582586e-05, + "loss": 12.5685, + "step": 14300 + }, + { + "epoch": 1.06, + "learning_rate": 8.60875330232745e-05, + "loss": 12.7776, + "step": 14350 + }, + { + "epoch": 1.06, + "learning_rate": 8.603730751072315e-05, + "loss": 11.3066, + "step": 14400 + }, + { + "epoch": 1.07, + "learning_rate": 8.598708199817179e-05, + "loss": 13.06, + "step": 14450 + }, + { + "epoch": 1.07, + "learning_rate": 8.593685648562045e-05, + "loss": 15.6523, + "step": 14500 + }, + { + "epoch": 1.07, + "learning_rate": 8.588663097306908e-05, + "loss": 12.019, + "step": 14550 + }, + { + "epoch": 1.08, + "learning_rate": 8.583640546051774e-05, + "loss": 11.0941, + "step": 14600 + }, + { + "epoch": 1.08, + "learning_rate": 8.578617994796637e-05, + "loss": 12.4755, + "step": 14650 + }, + { + "epoch": 1.08, + "learning_rate": 8.573595443541502e-05, + "loss": 13.7012, + "step": 14700 + }, + { + "epoch": 1.09, + "learning_rate": 8.568572892286366e-05, + "loss": 12.2024, + "step": 14750 + }, + { + "epoch": 1.09, + "learning_rate": 8.56355034103123e-05, + "loss": 12.4744, + "step": 14800 + }, + { + "epoch": 1.1, + "learning_rate": 8.558527789776096e-05, + "loss": 12.3234, + "step": 14850 + }, + { + "epoch": 1.1, + "learning_rate": 8.553505238520959e-05, + "loss": 12.5616, + "step": 14900 + }, + { + "epoch": 1.1, + "learning_rate": 8.548482687265824e-05, + "loss": 11.9559, + "step": 14950 + }, + { + "epoch": 1.11, + "learning_rate": 8.543460136010688e-05, + "loss": 12.0734, + "step": 15000 + }, + { + "epoch": 1.11, + "learning_rate": 8.538437584755553e-05, + "loss": 13.0341, + "step": 15050 + }, + { + "epoch": 1.11, + "learning_rate": 8.533415033500418e-05, + "loss": 12.7406, + "step": 15100 + }, + { + "epoch": 1.12, + "learning_rate": 8.528392482245282e-05, + "loss": 11.7258, + "step": 15150 + }, + { + "epoch": 1.12, + "learning_rate": 8.523369930990147e-05, + "loss": 11.8709, + "step": 15200 + }, + { + "epoch": 1.12, + "learning_rate": 8.518347379735011e-05, + "loss": 11.7021, + "step": 15250 + }, + { + "epoch": 1.13, + "learning_rate": 8.513324828479875e-05, + "loss": 13.2674, + "step": 15300 + }, + { + "epoch": 1.13, + "learning_rate": 8.508302277224738e-05, + "loss": 11.9099, + "step": 15350 + }, + { + "epoch": 1.14, + "learning_rate": 8.503279725969604e-05, + "loss": 11.7841, + "step": 15400 + }, + { + "epoch": 1.14, + "learning_rate": 8.498257174714469e-05, + "loss": 11.9573, + "step": 15450 + }, + { + "epoch": 1.14, + "learning_rate": 8.493234623459333e-05, + "loss": 11.7211, + "step": 15500 + }, + { + "epoch": 1.15, + "learning_rate": 8.488212072204197e-05, + "loss": 12.3513, + "step": 15550 + }, + { + "epoch": 1.15, + "learning_rate": 8.483189520949062e-05, + "loss": 11.0709, + "step": 15600 + }, + { + "epoch": 1.15, + "learning_rate": 8.478166969693926e-05, + "loss": 11.6544, + "step": 15650 + }, + { + "epoch": 1.16, + "learning_rate": 8.47314441843879e-05, + "loss": 11.8285, + "step": 15700 + }, + { + "epoch": 1.16, + "learning_rate": 8.468121867183655e-05, + "loss": 10.4208, + "step": 15750 + }, + { + "epoch": 1.17, + "learning_rate": 8.46309931592852e-05, + "loss": 10.7821, + "step": 15800 + }, + { + "epoch": 1.17, + "learning_rate": 8.458076764673384e-05, + "loss": 13.2724, + "step": 15850 + }, + { + "epoch": 1.17, + "learning_rate": 8.45305421341825e-05, + "loss": 10.9219, + "step": 15900 + }, + { + "epoch": 1.18, + "learning_rate": 8.448031662163113e-05, + "loss": 12.2532, + "step": 15950 + }, + { + "epoch": 1.18, + "learning_rate": 8.443009110907977e-05, + "loss": 11.0132, + "step": 16000 + }, + { + "epoch": 1.18, + "learning_rate": 8.437986559652841e-05, + "loss": 12.319, + "step": 16050 + }, + { + "epoch": 1.19, + "learning_rate": 8.432964008397706e-05, + "loss": 12.9871, + "step": 16100 + }, + { + "epoch": 1.19, + "learning_rate": 8.42794145714257e-05, + "loss": 12.0625, + "step": 16150 + }, + { + "epoch": 1.19, + "learning_rate": 8.422918905887435e-05, + "loss": 13.4629, + "step": 16200 + }, + { + "epoch": 1.2, + "learning_rate": 8.4178963546323e-05, + "loss": 10.9291, + "step": 16250 + }, + { + "epoch": 1.2, + "learning_rate": 8.412873803377163e-05, + "loss": 13.7719, + "step": 16300 + }, + { + "epoch": 1.21, + "learning_rate": 8.407851252122029e-05, + "loss": 11.3634, + "step": 16350 + }, + { + "epoch": 1.21, + "learning_rate": 8.402828700866892e-05, + "loss": 12.7941, + "step": 16400 + }, + { + "epoch": 1.21, + "learning_rate": 8.397806149611758e-05, + "loss": 11.8863, + "step": 16450 + }, + { + "epoch": 1.22, + "learning_rate": 8.392783598356621e-05, + "loss": 9.5225, + "step": 16500 + }, + { + "epoch": 1.22, + "learning_rate": 8.387761047101485e-05, + "loss": 12.983, + "step": 16550 + }, + { + "epoch": 1.22, + "learning_rate": 8.382738495846351e-05, + "loss": 11.8489, + "step": 16600 + }, + { + "epoch": 1.23, + "learning_rate": 8.377715944591214e-05, + "loss": 11.8122, + "step": 16650 + }, + { + "epoch": 1.23, + "learning_rate": 8.37269339333608e-05, + "loss": 12.3387, + "step": 16700 + }, + { + "epoch": 1.24, + "learning_rate": 8.367670842080943e-05, + "loss": 13.4648, + "step": 16750 + }, + { + "epoch": 1.24, + "learning_rate": 8.362648290825809e-05, + "loss": 10.2301, + "step": 16800 + }, + { + "epoch": 1.24, + "learning_rate": 8.357625739570672e-05, + "loss": 11.492, + "step": 16850 + }, + { + "epoch": 1.25, + "learning_rate": 8.352603188315538e-05, + "loss": 12.5997, + "step": 16900 + }, + { + "epoch": 1.25, + "learning_rate": 8.347580637060402e-05, + "loss": 11.5588, + "step": 16950 + }, + { + "epoch": 1.25, + "learning_rate": 8.342558085805266e-05, + "loss": 11.8627, + "step": 17000 + }, + { + "epoch": 1.26, + "learning_rate": 8.337535534550131e-05, + "loss": 13.2469, + "step": 17050 + }, + { + "epoch": 1.26, + "learning_rate": 8.332512983294994e-05, + "loss": 10.4327, + "step": 17100 + }, + { + "epoch": 1.27, + "learning_rate": 8.32749043203986e-05, + "loss": 12.7566, + "step": 17150 + }, + { + "epoch": 1.27, + "learning_rate": 8.322467880784723e-05, + "loss": 11.0729, + "step": 17200 + }, + { + "epoch": 1.27, + "learning_rate": 8.317445329529588e-05, + "loss": 12.3484, + "step": 17250 + }, + { + "epoch": 1.28, + "learning_rate": 8.312422778274453e-05, + "loss": 10.5193, + "step": 17300 + }, + { + "epoch": 1.28, + "learning_rate": 8.307400227019317e-05, + "loss": 12.2369, + "step": 17350 + }, + { + "epoch": 1.28, + "learning_rate": 8.302377675764182e-05, + "loss": 12.2976, + "step": 17400 + }, + { + "epoch": 1.29, + "learning_rate": 8.297355124509046e-05, + "loss": 12.3852, + "step": 17450 + }, + { + "epoch": 1.29, + "learning_rate": 8.29233257325391e-05, + "loss": 11.2137, + "step": 17500 + }, + { + "epoch": 1.29, + "learning_rate": 8.287310021998775e-05, + "loss": 11.609, + "step": 17550 + }, + { + "epoch": 1.3, + "learning_rate": 8.282287470743639e-05, + "loss": 13.3339, + "step": 17600 + }, + { + "epoch": 1.3, + "learning_rate": 8.277264919488504e-05, + "loss": 11.4263, + "step": 17650 + }, + { + "epoch": 1.31, + "learning_rate": 8.272242368233368e-05, + "loss": 12.6949, + "step": 17700 + }, + { + "epoch": 1.31, + "learning_rate": 8.267219816978233e-05, + "loss": 11.4767, + "step": 17750 + }, + { + "epoch": 1.31, + "learning_rate": 8.262197265723097e-05, + "loss": 12.2225, + "step": 17800 + }, + { + "epoch": 1.32, + "learning_rate": 8.257174714467961e-05, + "loss": 11.0755, + "step": 17850 + }, + { + "epoch": 1.32, + "learning_rate": 8.252152163212826e-05, + "loss": 11.9677, + "step": 17900 + }, + { + "epoch": 1.32, + "learning_rate": 8.24712961195769e-05, + "loss": 11.098, + "step": 17950 + }, + { + "epoch": 1.33, + "learning_rate": 8.242107060702555e-05, + "loss": 11.1102, + "step": 18000 + }, + { + "epoch": 1.33, + "learning_rate": 8.237084509447419e-05, + "loss": 11.4985, + "step": 18050 + }, + { + "epoch": 1.34, + "learning_rate": 8.232061958192285e-05, + "loss": 11.7356, + "step": 18100 + }, + { + "epoch": 1.34, + "learning_rate": 8.227039406937148e-05, + "loss": 11.3336, + "step": 18150 + }, + { + "epoch": 1.34, + "learning_rate": 8.222016855682012e-05, + "loss": 11.0448, + "step": 18200 + }, + { + "epoch": 1.35, + "learning_rate": 8.216994304426877e-05, + "loss": 10.9986, + "step": 18250 + }, + { + "epoch": 1.35, + "learning_rate": 8.211971753171741e-05, + "loss": 10.768, + "step": 18300 + }, + { + "epoch": 1.35, + "learning_rate": 8.206949201916607e-05, + "loss": 11.6844, + "step": 18350 + }, + { + "epoch": 1.36, + "learning_rate": 8.20192665066147e-05, + "loss": 11.5615, + "step": 18400 + }, + { + "epoch": 1.36, + "learning_rate": 8.196904099406336e-05, + "loss": 11.4019, + "step": 18450 + }, + { + "epoch": 1.36, + "learning_rate": 8.191881548151199e-05, + "loss": 12.1784, + "step": 18500 + }, + { + "epoch": 1.37, + "learning_rate": 8.186858996896064e-05, + "loss": 12.4565, + "step": 18550 + }, + { + "epoch": 1.37, + "learning_rate": 8.181836445640927e-05, + "loss": 11.0557, + "step": 18600 + }, + { + "epoch": 1.38, + "learning_rate": 8.176813894385793e-05, + "loss": 12.1892, + "step": 18650 + }, + { + "epoch": 1.38, + "learning_rate": 8.171791343130658e-05, + "loss": 12.0531, + "step": 18700 + }, + { + "epoch": 1.38, + "learning_rate": 8.166768791875522e-05, + "loss": 10.1791, + "step": 18750 + }, + { + "epoch": 1.39, + "learning_rate": 8.161746240620386e-05, + "loss": 11.2501, + "step": 18800 + }, + { + "epoch": 1.39, + "learning_rate": 8.15672368936525e-05, + "loss": 9.92, + "step": 18850 + }, + { + "epoch": 1.39, + "learning_rate": 8.151701138110115e-05, + "loss": 10.0603, + "step": 18900 + }, + { + "epoch": 1.4, + "learning_rate": 8.146678586854978e-05, + "loss": 10.9477, + "step": 18950 + }, + { + "epoch": 1.4, + "learning_rate": 8.141656035599844e-05, + "loss": 9.7579, + "step": 19000 + }, + { + "epoch": 1.41, + "learning_rate": 8.136633484344708e-05, + "loss": 11.243, + "step": 19050 + }, + { + "epoch": 1.41, + "learning_rate": 8.131610933089573e-05, + "loss": 11.0069, + "step": 19100 + }, + { + "epoch": 1.41, + "learning_rate": 8.126588381834437e-05, + "loss": 9.7387, + "step": 19150 + }, + { + "epoch": 1.42, + "learning_rate": 8.121565830579302e-05, + "loss": 11.4624, + "step": 19200 + }, + { + "epoch": 1.42, + "learning_rate": 8.116543279324166e-05, + "loss": 12.1299, + "step": 19250 + }, + { + "epoch": 1.42, + "learning_rate": 8.11152072806903e-05, + "loss": 12.2796, + "step": 19300 + }, + { + "epoch": 1.43, + "learning_rate": 8.106498176813895e-05, + "loss": 10.3295, + "step": 19350 + }, + { + "epoch": 1.43, + "learning_rate": 8.101475625558759e-05, + "loss": 10.0709, + "step": 19400 + }, + { + "epoch": 1.43, + "learning_rate": 8.096453074303624e-05, + "loss": 11.0725, + "step": 19450 + }, + { + "epoch": 1.44, + "learning_rate": 8.091430523048488e-05, + "loss": 10.7882, + "step": 19500 + }, + { + "epoch": 1.44, + "learning_rate": 8.086407971793352e-05, + "loss": 11.4124, + "step": 19550 + }, + { + "epoch": 1.45, + "learning_rate": 8.081385420538217e-05, + "loss": 10.4941, + "step": 19600 + }, + { + "epoch": 1.45, + "learning_rate": 8.076362869283081e-05, + "loss": 11.8687, + "step": 19650 + }, + { + "epoch": 1.45, + "learning_rate": 8.071340318027946e-05, + "loss": 11.3221, + "step": 19700 + }, + { + "epoch": 1.46, + "learning_rate": 8.06631776677281e-05, + "loss": 10.2167, + "step": 19750 + }, + { + "epoch": 1.46, + "learning_rate": 8.061295215517675e-05, + "loss": 10.5425, + "step": 19800 + }, + { + "epoch": 1.46, + "learning_rate": 8.05627266426254e-05, + "loss": 11.2982, + "step": 19850 + }, + { + "epoch": 1.47, + "learning_rate": 8.051250113007403e-05, + "loss": 12.0685, + "step": 19900 + }, + { + "epoch": 1.47, + "learning_rate": 8.046227561752268e-05, + "loss": 10.6613, + "step": 19950 + }, + { + "epoch": 1.48, + "learning_rate": 8.041205010497132e-05, + "loss": 10.8245, + "step": 20000 + }, + { + "epoch": 1.48, + "eval_loss": 10.409339904785156, + "eval_runtime": 890.9956, + "eval_samples_per_second": 14.7, + "eval_steps_per_second": 3.676, + "eval_wer": 0.2624627273109067, + "step": 20000 + }, + { + "epoch": 1.48, + "learning_rate": 8.036182459241997e-05, + "loss": 10.671, + "step": 20050 + }, + { + "epoch": 1.48, + "learning_rate": 8.031159907986861e-05, + "loss": 11.0263, + "step": 20100 + }, + { + "epoch": 1.49, + "learning_rate": 8.026137356731725e-05, + "loss": 11.0571, + "step": 20150 + }, + { + "epoch": 1.49, + "learning_rate": 8.021114805476591e-05, + "loss": 13.0778, + "step": 20200 + }, + { + "epoch": 1.49, + "learning_rate": 8.016092254221454e-05, + "loss": 11.0495, + "step": 20250 + }, + { + "epoch": 1.5, + "learning_rate": 8.01106970296632e-05, + "loss": 10.6039, + "step": 20300 + }, + { + "epoch": 1.5, + "learning_rate": 8.006047151711183e-05, + "loss": 11.4221, + "step": 20350 + }, + { + "epoch": 1.5, + "learning_rate": 8.001024600456049e-05, + "loss": 10.7975, + "step": 20400 + }, + { + "epoch": 1.51, + "learning_rate": 7.996002049200912e-05, + "loss": 10.1123, + "step": 20450 + }, + { + "epoch": 1.51, + "learning_rate": 7.990979497945776e-05, + "loss": 10.2241, + "step": 20500 + }, + { + "epoch": 1.52, + "learning_rate": 7.985956946690642e-05, + "loss": 10.0191, + "step": 20550 + }, + { + "epoch": 1.52, + "learning_rate": 7.980934395435505e-05, + "loss": 10.649, + "step": 20600 + }, + { + "epoch": 1.52, + "learning_rate": 7.975911844180371e-05, + "loss": 9.6091, + "step": 20650 + }, + { + "epoch": 1.53, + "learning_rate": 7.970889292925234e-05, + "loss": 9.9386, + "step": 20700 + }, + { + "epoch": 1.53, + "learning_rate": 7.9658667416701e-05, + "loss": 11.2646, + "step": 20750 + }, + { + "epoch": 1.53, + "learning_rate": 7.960844190414964e-05, + "loss": 10.0181, + "step": 20800 + }, + { + "epoch": 1.54, + "learning_rate": 7.955821639159828e-05, + "loss": 11.9437, + "step": 20850 + }, + { + "epoch": 1.54, + "learning_rate": 7.950799087904693e-05, + "loss": 10.9254, + "step": 20900 + }, + { + "epoch": 1.55, + "learning_rate": 7.945776536649557e-05, + "loss": 11.7954, + "step": 20950 + }, + { + "epoch": 1.55, + "learning_rate": 7.940753985394422e-05, + "loss": 9.6569, + "step": 21000 + }, + { + "epoch": 1.55, + "learning_rate": 7.935731434139286e-05, + "loss": 10.6546, + "step": 21050 + }, + { + "epoch": 1.56, + "learning_rate": 7.93070888288415e-05, + "loss": 10.2795, + "step": 21100 + }, + { + "epoch": 1.56, + "learning_rate": 7.925686331629015e-05, + "loss": 10.4595, + "step": 21150 + }, + { + "epoch": 1.56, + "learning_rate": 7.920663780373879e-05, + "loss": 9.2921, + "step": 21200 + }, + { + "epoch": 1.57, + "learning_rate": 7.915641229118744e-05, + "loss": 10.1245, + "step": 21250 + }, + { + "epoch": 1.57, + "learning_rate": 7.910618677863608e-05, + "loss": 11.2896, + "step": 21300 + }, + { + "epoch": 1.57, + "learning_rate": 7.905596126608472e-05, + "loss": 11.3328, + "step": 21350 + }, + { + "epoch": 1.58, + "learning_rate": 7.900573575353337e-05, + "loss": 10.0718, + "step": 21400 + }, + { + "epoch": 1.58, + "learning_rate": 7.895551024098201e-05, + "loss": 10.8954, + "step": 21450 + }, + { + "epoch": 1.59, + "learning_rate": 7.890528472843066e-05, + "loss": 10.2921, + "step": 21500 + }, + { + "epoch": 1.59, + "learning_rate": 7.88550592158793e-05, + "loss": 9.4609, + "step": 21550 + }, + { + "epoch": 1.59, + "learning_rate": 7.880483370332796e-05, + "loss": 11.4751, + "step": 21600 + }, + { + "epoch": 1.6, + "learning_rate": 7.875460819077659e-05, + "loss": 10.1189, + "step": 21650 + }, + { + "epoch": 1.6, + "learning_rate": 7.870438267822523e-05, + "loss": 11.6478, + "step": 21700 + }, + { + "epoch": 1.6, + "learning_rate": 7.865415716567388e-05, + "loss": 11.2943, + "step": 21750 + }, + { + "epoch": 1.61, + "learning_rate": 7.860393165312252e-05, + "loss": 11.5788, + "step": 21800 + }, + { + "epoch": 1.61, + "learning_rate": 7.855370614057116e-05, + "loss": 10.638, + "step": 21850 + }, + { + "epoch": 1.62, + "learning_rate": 7.850348062801981e-05, + "loss": 9.2895, + "step": 21900 + }, + { + "epoch": 1.62, + "learning_rate": 7.845325511546847e-05, + "loss": 11.4984, + "step": 21950 + }, + { + "epoch": 1.62, + "learning_rate": 7.84030296029171e-05, + "loss": 10.3685, + "step": 22000 + }, + { + "epoch": 1.63, + "learning_rate": 7.835280409036575e-05, + "loss": 10.0115, + "step": 22050 + }, + { + "epoch": 1.63, + "learning_rate": 7.830257857781439e-05, + "loss": 10.2941, + "step": 22100 + }, + { + "epoch": 1.63, + "learning_rate": 7.825235306526304e-05, + "loss": 10.8751, + "step": 22150 + }, + { + "epoch": 1.64, + "learning_rate": 7.820212755271167e-05, + "loss": 10.7477, + "step": 22200 + }, + { + "epoch": 1.64, + "learning_rate": 7.815190204016032e-05, + "loss": 12.2573, + "step": 22250 + }, + { + "epoch": 1.64, + "learning_rate": 7.810167652760897e-05, + "loss": 10.1055, + "step": 22300 + }, + { + "epoch": 1.65, + "learning_rate": 7.80514510150576e-05, + "loss": 10.7913, + "step": 22350 + }, + { + "epoch": 1.65, + "learning_rate": 7.800122550250626e-05, + "loss": 9.4701, + "step": 22400 + }, + { + "epoch": 1.66, + "learning_rate": 7.79509999899549e-05, + "loss": 9.9434, + "step": 22450 + }, + { + "epoch": 1.66, + "learning_rate": 7.790077447740355e-05, + "loss": 10.9016, + "step": 22500 + }, + { + "epoch": 1.66, + "learning_rate": 7.785054896485218e-05, + "loss": 10.1733, + "step": 22550 + }, + { + "epoch": 1.67, + "learning_rate": 7.780032345230084e-05, + "loss": 11.0693, + "step": 22600 + }, + { + "epoch": 1.67, + "learning_rate": 7.775009793974948e-05, + "loss": 10.4538, + "step": 22650 + }, + { + "epoch": 1.67, + "learning_rate": 7.769987242719813e-05, + "loss": 10.5127, + "step": 22700 + }, + { + "epoch": 1.68, + "learning_rate": 7.764964691464677e-05, + "loss": 10.1074, + "step": 22750 + }, + { + "epoch": 1.68, + "learning_rate": 7.75994214020954e-05, + "loss": 11.2803, + "step": 22800 + }, + { + "epoch": 1.69, + "learning_rate": 7.754919588954406e-05, + "loss": 10.9954, + "step": 22850 + }, + { + "epoch": 1.69, + "learning_rate": 7.749897037699269e-05, + "loss": 10.1006, + "step": 22900 + }, + { + "epoch": 1.69, + "learning_rate": 7.744874486444135e-05, + "loss": 10.9978, + "step": 22950 + }, + { + "epoch": 1.7, + "learning_rate": 7.739851935188999e-05, + "loss": 10.5885, + "step": 23000 + }, + { + "epoch": 1.7, + "learning_rate": 7.734829383933864e-05, + "loss": 10.5676, + "step": 23050 + }, + { + "epoch": 1.7, + "learning_rate": 7.729806832678728e-05, + "loss": 11.3204, + "step": 23100 + }, + { + "epoch": 1.71, + "learning_rate": 7.724784281423592e-05, + "loss": 10.5388, + "step": 23150 + }, + { + "epoch": 1.71, + "learning_rate": 7.719761730168457e-05, + "loss": 10.7915, + "step": 23200 + }, + { + "epoch": 1.71, + "learning_rate": 7.714739178913321e-05, + "loss": 11.9486, + "step": 23250 + }, + { + "epoch": 1.72, + "learning_rate": 7.709716627658186e-05, + "loss": 11.6693, + "step": 23300 + }, + { + "epoch": 1.72, + "learning_rate": 7.70469407640305e-05, + "loss": 9.2664, + "step": 23350 + }, + { + "epoch": 1.73, + "learning_rate": 7.699671525147914e-05, + "loss": 12.1429, + "step": 23400 + }, + { + "epoch": 1.73, + "learning_rate": 7.694648973892779e-05, + "loss": 10.1155, + "step": 23450 + }, + { + "epoch": 1.73, + "learning_rate": 7.689626422637643e-05, + "loss": 10.1562, + "step": 23500 + }, + { + "epoch": 1.74, + "learning_rate": 7.684603871382508e-05, + "loss": 11.3484, + "step": 23550 + }, + { + "epoch": 1.74, + "learning_rate": 7.679581320127372e-05, + "loss": 9.5912, + "step": 23600 + }, + { + "epoch": 1.74, + "learning_rate": 7.674558768872236e-05, + "loss": 11.1067, + "step": 23650 + }, + { + "epoch": 1.75, + "learning_rate": 7.669536217617101e-05, + "loss": 11.7182, + "step": 23700 + }, + { + "epoch": 1.75, + "learning_rate": 7.664513666361965e-05, + "loss": 10.1444, + "step": 23750 + }, + { + "epoch": 1.76, + "learning_rate": 7.659491115106831e-05, + "loss": 11.2671, + "step": 23800 + }, + { + "epoch": 1.76, + "learning_rate": 7.654468563851694e-05, + "loss": 10.9027, + "step": 23850 + }, + { + "epoch": 1.76, + "learning_rate": 7.64944601259656e-05, + "loss": 10.9078, + "step": 23900 + }, + { + "epoch": 1.77, + "learning_rate": 7.644423461341423e-05, + "loss": 10.5441, + "step": 23950 + }, + { + "epoch": 1.77, + "learning_rate": 7.639400910086287e-05, + "loss": 9.8617, + "step": 24000 + }, + { + "epoch": 1.77, + "learning_rate": 7.634378358831153e-05, + "loss": 10.8022, + "step": 24050 + }, + { + "epoch": 1.78, + "learning_rate": 7.629355807576016e-05, + "loss": 10.3082, + "step": 24100 + }, + { + "epoch": 1.78, + "learning_rate": 7.624333256320882e-05, + "loss": 9.8398, + "step": 24150 + }, + { + "epoch": 1.79, + "learning_rate": 7.619310705065745e-05, + "loss": 10.3631, + "step": 24200 + }, + { + "epoch": 1.79, + "learning_rate": 7.61428815381061e-05, + "loss": 10.6078, + "step": 24250 + }, + { + "epoch": 1.79, + "learning_rate": 7.609265602555474e-05, + "loss": 11.366, + "step": 24300 + }, + { + "epoch": 1.8, + "learning_rate": 7.60424305130034e-05, + "loss": 12.1154, + "step": 24350 + }, + { + "epoch": 1.8, + "learning_rate": 7.599220500045204e-05, + "loss": 11.3429, + "step": 24400 + }, + { + "epoch": 1.8, + "learning_rate": 7.594197948790068e-05, + "loss": 9.135, + "step": 24450 + }, + { + "epoch": 1.81, + "learning_rate": 7.589175397534933e-05, + "loss": 10.3796, + "step": 24500 + }, + { + "epoch": 1.81, + "learning_rate": 7.584152846279796e-05, + "loss": 10.6452, + "step": 24550 + }, + { + "epoch": 1.81, + "learning_rate": 7.579130295024661e-05, + "loss": 9.6237, + "step": 24600 + }, + { + "epoch": 1.82, + "learning_rate": 7.574107743769525e-05, + "loss": 10.7158, + "step": 24650 + }, + { + "epoch": 1.82, + "learning_rate": 7.56908519251439e-05, + "loss": 9.8296, + "step": 24700 + }, + { + "epoch": 1.83, + "learning_rate": 7.564062641259255e-05, + "loss": 10.1654, + "step": 24750 + }, + { + "epoch": 1.83, + "learning_rate": 7.559040090004119e-05, + "loss": 10.395, + "step": 24800 + }, + { + "epoch": 1.83, + "learning_rate": 7.554017538748984e-05, + "loss": 10.3067, + "step": 24850 + }, + { + "epoch": 1.84, + "learning_rate": 7.548994987493848e-05, + "loss": 10.7243, + "step": 24900 + }, + { + "epoch": 1.84, + "learning_rate": 7.543972436238712e-05, + "loss": 10.4022, + "step": 24950 + }, + { + "epoch": 1.84, + "learning_rate": 7.538949884983577e-05, + "loss": 10.5045, + "step": 25000 + }, + { + "epoch": 1.85, + "learning_rate": 7.533927333728441e-05, + "loss": 11.2205, + "step": 25050 + }, + { + "epoch": 1.85, + "learning_rate": 7.528904782473306e-05, + "loss": 10.5375, + "step": 25100 + }, + { + "epoch": 1.86, + "learning_rate": 7.52388223121817e-05, + "loss": 10.4876, + "step": 25150 + }, + { + "epoch": 1.86, + "learning_rate": 7.518859679963034e-05, + "loss": 9.2096, + "step": 25200 + }, + { + "epoch": 1.86, + "learning_rate": 7.513837128707899e-05, + "loss": 10.0442, + "step": 25250 + }, + { + "epoch": 1.87, + "learning_rate": 7.508814577452763e-05, + "loss": 9.8174, + "step": 25300 + }, + { + "epoch": 1.87, + "learning_rate": 7.503792026197628e-05, + "loss": 10.8789, + "step": 25350 + }, + { + "epoch": 1.87, + "learning_rate": 7.498769474942492e-05, + "loss": 9.8789, + "step": 25400 + }, + { + "epoch": 1.88, + "learning_rate": 7.493746923687356e-05, + "loss": 11.1431, + "step": 25450 + }, + { + "epoch": 1.88, + "learning_rate": 7.488724372432221e-05, + "loss": 10.4659, + "step": 25500 + }, + { + "epoch": 1.88, + "learning_rate": 7.483701821177087e-05, + "loss": 10.7342, + "step": 25550 + }, + { + "epoch": 1.89, + "learning_rate": 7.47867926992195e-05, + "loss": 10.7841, + "step": 25600 + }, + { + "epoch": 1.89, + "learning_rate": 7.473656718666814e-05, + "loss": 9.6162, + "step": 25650 + }, + { + "epoch": 1.9, + "learning_rate": 7.468634167411678e-05, + "loss": 10.3568, + "step": 25700 + }, + { + "epoch": 1.9, + "learning_rate": 7.463611616156543e-05, + "loss": 9.6701, + "step": 25750 + }, + { + "epoch": 1.9, + "learning_rate": 7.458589064901407e-05, + "loss": 9.4003, + "step": 25800 + }, + { + "epoch": 1.91, + "learning_rate": 7.453566513646272e-05, + "loss": 9.6621, + "step": 25850 + }, + { + "epoch": 1.91, + "learning_rate": 7.448543962391137e-05, + "loss": 10.1086, + "step": 25900 + }, + { + "epoch": 1.91, + "learning_rate": 7.443521411136e-05, + "loss": 11.5655, + "step": 25950 + }, + { + "epoch": 1.92, + "learning_rate": 7.438498859880866e-05, + "loss": 8.9418, + "step": 26000 + }, + { + "epoch": 1.92, + "learning_rate": 7.433476308625729e-05, + "loss": 9.2415, + "step": 26050 + }, + { + "epoch": 1.93, + "learning_rate": 7.428453757370595e-05, + "loss": 9.4192, + "step": 26100 + }, + { + "epoch": 1.93, + "learning_rate": 7.423431206115458e-05, + "loss": 9.1755, + "step": 26150 + }, + { + "epoch": 1.93, + "learning_rate": 7.418408654860322e-05, + "loss": 9.6327, + "step": 26200 + }, + { + "epoch": 1.94, + "learning_rate": 7.413386103605188e-05, + "loss": 10.3333, + "step": 26250 + }, + { + "epoch": 1.94, + "learning_rate": 7.408363552350051e-05, + "loss": 10.298, + "step": 26300 + }, + { + "epoch": 1.94, + "learning_rate": 7.403341001094917e-05, + "loss": 10.7038, + "step": 26350 + }, + { + "epoch": 1.95, + "learning_rate": 7.39831844983978e-05, + "loss": 10.5099, + "step": 26400 + }, + { + "epoch": 1.95, + "learning_rate": 7.393295898584646e-05, + "loss": 9.8063, + "step": 26450 + }, + { + "epoch": 1.95, + "learning_rate": 7.38827334732951e-05, + "loss": 9.5784, + "step": 26500 + }, + { + "epoch": 1.96, + "learning_rate": 7.383250796074375e-05, + "loss": 10.1958, + "step": 26550 + }, + { + "epoch": 1.96, + "learning_rate": 7.378228244819239e-05, + "loss": 9.6869, + "step": 26600 + }, + { + "epoch": 1.97, + "learning_rate": 7.373205693564103e-05, + "loss": 10.3761, + "step": 26650 + }, + { + "epoch": 1.97, + "learning_rate": 7.368183142308968e-05, + "loss": 11.6806, + "step": 26700 + }, + { + "epoch": 1.97, + "learning_rate": 7.363160591053832e-05, + "loss": 10.3183, + "step": 26750 + }, + { + "epoch": 1.98, + "learning_rate": 7.358138039798697e-05, + "loss": 11.041, + "step": 26800 + }, + { + "epoch": 1.98, + "learning_rate": 7.353115488543561e-05, + "loss": 9.6997, + "step": 26850 + }, + { + "epoch": 1.98, + "learning_rate": 7.348092937288425e-05, + "loss": 9.6029, + "step": 26900 + }, + { + "epoch": 1.99, + "learning_rate": 7.34307038603329e-05, + "loss": 10.3322, + "step": 26950 + }, + { + "epoch": 1.99, + "learning_rate": 7.338047834778154e-05, + "loss": 9.9009, + "step": 27000 + }, + { + "epoch": 2.0, + "learning_rate": 7.333025283523019e-05, + "loss": 10.4815, + "step": 27050 + }, + { + "epoch": 2.0, + "learning_rate": 7.328002732267883e-05, + "loss": 11.7049, + "step": 27100 + }, + { + "epoch": 2.0, + "learning_rate": 7.322980181012748e-05, + "loss": 10.7831, + "step": 27150 + }, + { + "epoch": 2.01, + "learning_rate": 7.317957629757612e-05, + "loss": 8.735, + "step": 27200 + }, + { + "epoch": 2.01, + "learning_rate": 7.312935078502476e-05, + "loss": 9.4056, + "step": 27250 + }, + { + "epoch": 2.01, + "learning_rate": 7.307912527247342e-05, + "loss": 10.7689, + "step": 27300 + }, + { + "epoch": 2.02, + "learning_rate": 7.302889975992205e-05, + "loss": 9.5266, + "step": 27350 + }, + { + "epoch": 2.02, + "learning_rate": 7.29786742473707e-05, + "loss": 8.2467, + "step": 27400 + }, + { + "epoch": 2.02, + "learning_rate": 7.292844873481934e-05, + "loss": 8.6572, + "step": 27450 + }, + { + "epoch": 2.03, + "learning_rate": 7.287822322226798e-05, + "loss": 8.4693, + "step": 27500 + }, + { + "epoch": 2.03, + "learning_rate": 7.282799770971663e-05, + "loss": 10.4867, + "step": 27550 + }, + { + "epoch": 2.04, + "learning_rate": 7.277777219716527e-05, + "loss": 8.9364, + "step": 27600 + }, + { + "epoch": 2.04, + "learning_rate": 7.272754668461393e-05, + "loss": 10.0109, + "step": 27650 + }, + { + "epoch": 2.04, + "learning_rate": 7.267732117206256e-05, + "loss": 9.5535, + "step": 27700 + }, + { + "epoch": 2.05, + "learning_rate": 7.262709565951122e-05, + "loss": 9.3029, + "step": 27750 + }, + { + "epoch": 2.05, + "learning_rate": 7.257687014695985e-05, + "loss": 9.854, + "step": 27800 + }, + { + "epoch": 2.05, + "learning_rate": 7.25266446344085e-05, + "loss": 9.5327, + "step": 27850 + }, + { + "epoch": 2.06, + "learning_rate": 7.247641912185714e-05, + "loss": 9.8255, + "step": 27900 + }, + { + "epoch": 2.06, + "learning_rate": 7.242619360930578e-05, + "loss": 9.9737, + "step": 27950 + }, + { + "epoch": 2.07, + "learning_rate": 7.237596809675444e-05, + "loss": 9.0471, + "step": 28000 + }, + { + "epoch": 2.07, + "learning_rate": 7.232574258420307e-05, + "loss": 10.0566, + "step": 28050 + }, + { + "epoch": 2.07, + "learning_rate": 7.227551707165173e-05, + "loss": 9.4781, + "step": 28100 + }, + { + "epoch": 2.08, + "learning_rate": 7.222529155910036e-05, + "loss": 8.7599, + "step": 28150 + }, + { + "epoch": 2.08, + "learning_rate": 7.217506604654901e-05, + "loss": 8.7605, + "step": 28200 + }, + { + "epoch": 2.08, + "learning_rate": 7.212484053399764e-05, + "loss": 10.061, + "step": 28250 + }, + { + "epoch": 2.09, + "learning_rate": 7.20746150214463e-05, + "loss": 9.6124, + "step": 28300 + }, + { + "epoch": 2.09, + "learning_rate": 7.202438950889495e-05, + "loss": 10.4776, + "step": 28350 + }, + { + "epoch": 2.09, + "learning_rate": 7.197416399634359e-05, + "loss": 9.2169, + "step": 28400 + }, + { + "epoch": 2.1, + "learning_rate": 7.192393848379223e-05, + "loss": 9.3654, + "step": 28450 + }, + { + "epoch": 2.1, + "learning_rate": 7.187371297124086e-05, + "loss": 9.4445, + "step": 28500 + }, + { + "epoch": 2.11, + "learning_rate": 7.182348745868952e-05, + "loss": 8.3614, + "step": 28550 + }, + { + "epoch": 2.11, + "learning_rate": 7.177326194613815e-05, + "loss": 9.1661, + "step": 28600 + }, + { + "epoch": 2.11, + "learning_rate": 7.172303643358681e-05, + "loss": 9.4976, + "step": 28650 + }, + { + "epoch": 2.12, + "learning_rate": 7.167281092103545e-05, + "loss": 9.125, + "step": 28700 + }, + { + "epoch": 2.12, + "learning_rate": 7.16225854084841e-05, + "loss": 8.9051, + "step": 28750 + }, + { + "epoch": 2.12, + "learning_rate": 7.157235989593274e-05, + "loss": 8.9753, + "step": 28800 + }, + { + "epoch": 2.13, + "learning_rate": 7.152213438338139e-05, + "loss": 9.133, + "step": 28850 + }, + { + "epoch": 2.13, + "learning_rate": 7.147190887083003e-05, + "loss": 9.9677, + "step": 28900 + }, + { + "epoch": 2.14, + "learning_rate": 7.142168335827867e-05, + "loss": 8.725, + "step": 28950 + }, + { + "epoch": 2.14, + "learning_rate": 7.137145784572732e-05, + "loss": 8.831, + "step": 29000 + }, + { + "epoch": 2.14, + "learning_rate": 7.132123233317596e-05, + "loss": 7.8207, + "step": 29050 + }, + { + "epoch": 2.15, + "learning_rate": 7.127100682062461e-05, + "loss": 9.3707, + "step": 29100 + }, + { + "epoch": 2.15, + "learning_rate": 7.122078130807325e-05, + "loss": 10.4259, + "step": 29150 + }, + { + "epoch": 2.15, + "learning_rate": 7.11705557955219e-05, + "loss": 8.1836, + "step": 29200 + }, + { + "epoch": 2.16, + "learning_rate": 7.112033028297054e-05, + "loss": 9.0874, + "step": 29250 + }, + { + "epoch": 2.16, + "learning_rate": 7.107010477041918e-05, + "loss": 9.5957, + "step": 29300 + }, + { + "epoch": 2.16, + "learning_rate": 7.101987925786783e-05, + "loss": 8.7545, + "step": 29350 + }, + { + "epoch": 2.17, + "learning_rate": 7.096965374531647e-05, + "loss": 8.4478, + "step": 29400 + }, + { + "epoch": 2.17, + "learning_rate": 7.091942823276512e-05, + "loss": 8.601, + "step": 29450 + }, + { + "epoch": 2.18, + "learning_rate": 7.086920272021377e-05, + "loss": 9.6172, + "step": 29500 + }, + { + "epoch": 2.18, + "learning_rate": 7.08189772076624e-05, + "loss": 9.0805, + "step": 29550 + }, + { + "epoch": 2.18, + "learning_rate": 7.076875169511106e-05, + "loss": 9.6039, + "step": 29600 + }, + { + "epoch": 2.19, + "learning_rate": 7.071852618255969e-05, + "loss": 9.3622, + "step": 29650 + }, + { + "epoch": 2.19, + "learning_rate": 7.066830067000834e-05, + "loss": 8.8765, + "step": 29700 + }, + { + "epoch": 2.19, + "learning_rate": 7.061807515745699e-05, + "loss": 8.992, + "step": 29750 + }, + { + "epoch": 2.2, + "learning_rate": 7.056784964490562e-05, + "loss": 10.3564, + "step": 29800 + }, + { + "epoch": 2.2, + "learning_rate": 7.051762413235428e-05, + "loss": 8.8092, + "step": 29850 + }, + { + "epoch": 2.21, + "learning_rate": 7.046739861980291e-05, + "loss": 9.8373, + "step": 29900 + }, + { + "epoch": 2.21, + "learning_rate": 7.041717310725157e-05, + "loss": 8.004, + "step": 29950 + }, + { + "epoch": 2.21, + "learning_rate": 7.03669475947002e-05, + "loss": 9.4461, + "step": 30000 + }, + { + "epoch": 2.22, + "learning_rate": 7.031672208214886e-05, + "loss": 8.4964, + "step": 30050 + }, + { + "epoch": 2.22, + "learning_rate": 7.02664965695975e-05, + "loss": 10.3181, + "step": 30100 + }, + { + "epoch": 2.22, + "learning_rate": 7.021627105704615e-05, + "loss": 8.6637, + "step": 30150 + }, + { + "epoch": 2.23, + "learning_rate": 7.016604554449479e-05, + "loss": 10.1703, + "step": 30200 + }, + { + "epoch": 2.23, + "learning_rate": 7.011582003194342e-05, + "loss": 9.2846, + "step": 30250 + }, + { + "epoch": 2.24, + "learning_rate": 7.006559451939208e-05, + "loss": 8.5913, + "step": 30300 + }, + { + "epoch": 2.24, + "learning_rate": 7.001536900684071e-05, + "loss": 9.1308, + "step": 30350 + }, + { + "epoch": 2.24, + "learning_rate": 6.996514349428937e-05, + "loss": 11.2229, + "step": 30400 + }, + { + "epoch": 2.25, + "learning_rate": 6.991491798173801e-05, + "loss": 8.5923, + "step": 30450 + }, + { + "epoch": 2.25, + "learning_rate": 6.986469246918665e-05, + "loss": 9.9826, + "step": 30500 + }, + { + "epoch": 2.25, + "learning_rate": 6.98144669566353e-05, + "loss": 8.4765, + "step": 30550 + }, + { + "epoch": 2.26, + "learning_rate": 6.976424144408394e-05, + "loss": 8.7624, + "step": 30600 + }, + { + "epoch": 2.26, + "learning_rate": 6.971401593153259e-05, + "loss": 9.238, + "step": 30650 + }, + { + "epoch": 2.26, + "learning_rate": 6.966379041898123e-05, + "loss": 8.4976, + "step": 30700 + }, + { + "epoch": 2.27, + "learning_rate": 6.961356490642987e-05, + "loss": 9.1886, + "step": 30750 + }, + { + "epoch": 2.27, + "learning_rate": 6.956333939387852e-05, + "loss": 8.4443, + "step": 30800 + }, + { + "epoch": 2.28, + "learning_rate": 6.951311388132716e-05, + "loss": 8.3648, + "step": 30850 + }, + { + "epoch": 2.28, + "learning_rate": 6.94628883687758e-05, + "loss": 9.2509, + "step": 30900 + }, + { + "epoch": 2.28, + "learning_rate": 6.941266285622445e-05, + "loss": 8.3765, + "step": 30950 + }, + { + "epoch": 2.29, + "learning_rate": 6.93624373436731e-05, + "loss": 9.6616, + "step": 31000 + }, + { + "epoch": 2.29, + "learning_rate": 6.931221183112174e-05, + "loss": 9.658, + "step": 31050 + }, + { + "epoch": 2.29, + "learning_rate": 6.926198631857038e-05, + "loss": 8.7527, + "step": 31100 + }, + { + "epoch": 2.3, + "learning_rate": 6.921176080601903e-05, + "loss": 8.7148, + "step": 31150 + }, + { + "epoch": 2.3, + "learning_rate": 6.916153529346767e-05, + "loss": 8.5962, + "step": 31200 + }, + { + "epoch": 2.31, + "learning_rate": 6.911130978091633e-05, + "loss": 9.2625, + "step": 31250 + }, + { + "epoch": 2.31, + "learning_rate": 6.906108426836496e-05, + "loss": 8.8352, + "step": 31300 + }, + { + "epoch": 2.31, + "learning_rate": 6.90108587558136e-05, + "loss": 7.3991, + "step": 31350 + }, + { + "epoch": 2.32, + "learning_rate": 6.896063324326225e-05, + "loss": 9.9391, + "step": 31400 + }, + { + "epoch": 2.32, + "learning_rate": 6.891040773071089e-05, + "loss": 8.9575, + "step": 31450 + }, + { + "epoch": 2.32, + "learning_rate": 6.886018221815954e-05, + "loss": 7.9103, + "step": 31500 + }, + { + "epoch": 2.33, + "learning_rate": 6.880995670560818e-05, + "loss": 8.5276, + "step": 31550 + }, + { + "epoch": 2.33, + "learning_rate": 6.875973119305684e-05, + "loss": 8.5427, + "step": 31600 + }, + { + "epoch": 2.33, + "learning_rate": 6.870950568050547e-05, + "loss": 8.4672, + "step": 31650 + }, + { + "epoch": 2.34, + "learning_rate": 6.865928016795412e-05, + "loss": 8.9638, + "step": 31700 + }, + { + "epoch": 2.34, + "learning_rate": 6.860905465540276e-05, + "loss": 8.3136, + "step": 31750 + }, + { + "epoch": 2.35, + "learning_rate": 6.855882914285141e-05, + "loss": 8.8076, + "step": 31800 + }, + { + "epoch": 2.35, + "learning_rate": 6.850860363030004e-05, + "loss": 8.6041, + "step": 31850 + }, + { + "epoch": 2.35, + "learning_rate": 6.845837811774869e-05, + "loss": 9.1751, + "step": 31900 + }, + { + "epoch": 2.36, + "learning_rate": 6.840815260519735e-05, + "loss": 8.5955, + "step": 31950 + }, + { + "epoch": 2.36, + "learning_rate": 6.835792709264598e-05, + "loss": 9.0927, + "step": 32000 + }, + { + "epoch": 2.36, + "learning_rate": 6.830770158009463e-05, + "loss": 7.9647, + "step": 32050 + }, + { + "epoch": 2.37, + "learning_rate": 6.825747606754326e-05, + "loss": 10.2647, + "step": 32100 + }, + { + "epoch": 2.37, + "learning_rate": 6.820725055499192e-05, + "loss": 8.3442, + "step": 32150 + }, + { + "epoch": 2.38, + "learning_rate": 6.815702504244057e-05, + "loss": 9.2019, + "step": 32200 + }, + { + "epoch": 2.38, + "learning_rate": 6.810679952988921e-05, + "loss": 8.345, + "step": 32250 + }, + { + "epoch": 2.38, + "learning_rate": 6.805657401733785e-05, + "loss": 9.1835, + "step": 32300 + }, + { + "epoch": 2.39, + "learning_rate": 6.80063485047865e-05, + "loss": 9.1846, + "step": 32350 + }, + { + "epoch": 2.39, + "learning_rate": 6.795612299223514e-05, + "loss": 9.0015, + "step": 32400 + }, + { + "epoch": 2.39, + "learning_rate": 6.790589747968379e-05, + "loss": 8.2404, + "step": 32450 + }, + { + "epoch": 2.4, + "learning_rate": 6.785567196713243e-05, + "loss": 8.8715, + "step": 32500 + }, + { + "epoch": 2.4, + "learning_rate": 6.780544645458107e-05, + "loss": 8.817, + "step": 32550 + }, + { + "epoch": 2.4, + "learning_rate": 6.775522094202972e-05, + "loss": 9.2154, + "step": 32600 + }, + { + "epoch": 2.41, + "learning_rate": 6.770499542947836e-05, + "loss": 9.1914, + "step": 32650 + }, + { + "epoch": 2.41, + "learning_rate": 6.7654769916927e-05, + "loss": 9.2804, + "step": 32700 + }, + { + "epoch": 2.42, + "learning_rate": 6.760454440437565e-05, + "loss": 9.177, + "step": 32750 + }, + { + "epoch": 2.42, + "learning_rate": 6.75543188918243e-05, + "loss": 8.8259, + "step": 32800 + }, + { + "epoch": 2.42, + "learning_rate": 6.750409337927294e-05, + "loss": 8.6121, + "step": 32850 + }, + { + "epoch": 2.43, + "learning_rate": 6.745386786672158e-05, + "loss": 8.644, + "step": 32900 + }, + { + "epoch": 2.43, + "learning_rate": 6.740364235417023e-05, + "loss": 8.5743, + "step": 32950 + }, + { + "epoch": 2.43, + "learning_rate": 6.735341684161888e-05, + "loss": 8.7636, + "step": 33000 + }, + { + "epoch": 2.44, + "learning_rate": 6.730319132906751e-05, + "loss": 8.3064, + "step": 33050 + }, + { + "epoch": 2.44, + "learning_rate": 6.725296581651616e-05, + "loss": 8.8806, + "step": 33100 + }, + { + "epoch": 2.45, + "learning_rate": 6.72027403039648e-05, + "loss": 8.8212, + "step": 33150 + }, + { + "epoch": 2.45, + "learning_rate": 6.715251479141345e-05, + "loss": 9.5261, + "step": 33200 + }, + { + "epoch": 2.45, + "learning_rate": 6.710228927886209e-05, + "loss": 9.0764, + "step": 33250 + }, + { + "epoch": 2.46, + "learning_rate": 6.705206376631073e-05, + "loss": 7.399, + "step": 33300 + }, + { + "epoch": 2.46, + "learning_rate": 6.700183825375939e-05, + "loss": 9.4119, + "step": 33350 + }, + { + "epoch": 2.46, + "learning_rate": 6.695161274120802e-05, + "loss": 8.4576, + "step": 33400 + }, + { + "epoch": 2.47, + "learning_rate": 6.690138722865668e-05, + "loss": 8.024, + "step": 33450 + }, + { + "epoch": 2.47, + "learning_rate": 6.685116171610531e-05, + "loss": 9.1605, + "step": 33500 + }, + { + "epoch": 2.47, + "learning_rate": 6.680093620355397e-05, + "loss": 8.3661, + "step": 33550 + }, + { + "epoch": 2.48, + "learning_rate": 6.67507106910026e-05, + "loss": 8.4145, + "step": 33600 + }, + { + "epoch": 2.48, + "learning_rate": 6.670048517845124e-05, + "loss": 7.824, + "step": 33650 + }, + { + "epoch": 2.49, + "learning_rate": 6.66502596658999e-05, + "loss": 9.129, + "step": 33700 + }, + { + "epoch": 2.49, + "learning_rate": 6.660003415334853e-05, + "loss": 9.0876, + "step": 33750 + }, + { + "epoch": 2.49, + "learning_rate": 6.654980864079719e-05, + "loss": 8.6961, + "step": 33800 + }, + { + "epoch": 2.5, + "learning_rate": 6.649958312824582e-05, + "loss": 8.1584, + "step": 33850 + }, + { + "epoch": 2.5, + "learning_rate": 6.644935761569448e-05, + "loss": 8.6587, + "step": 33900 + }, + { + "epoch": 2.5, + "learning_rate": 6.639913210314311e-05, + "loss": 8.1059, + "step": 33950 + }, + { + "epoch": 2.51, + "learning_rate": 6.634890659059176e-05, + "loss": 9.2588, + "step": 34000 + }, + { + "epoch": 2.51, + "learning_rate": 6.629868107804041e-05, + "loss": 8.6443, + "step": 34050 + }, + { + "epoch": 2.52, + "learning_rate": 6.624845556548905e-05, + "loss": 8.8006, + "step": 34100 + }, + { + "epoch": 2.52, + "learning_rate": 6.61982300529377e-05, + "loss": 9.2288, + "step": 34150 + }, + { + "epoch": 2.52, + "learning_rate": 6.614800454038633e-05, + "loss": 9.0328, + "step": 34200 + }, + { + "epoch": 2.53, + "learning_rate": 6.609777902783499e-05, + "loss": 7.8269, + "step": 34250 + }, + { + "epoch": 2.53, + "learning_rate": 6.604755351528362e-05, + "loss": 8.5883, + "step": 34300 + }, + { + "epoch": 2.53, + "learning_rate": 6.599732800273227e-05, + "loss": 9.9388, + "step": 34350 + }, + { + "epoch": 2.54, + "learning_rate": 6.594710249018092e-05, + "loss": 8.6776, + "step": 34400 + }, + { + "epoch": 2.54, + "learning_rate": 6.589687697762956e-05, + "loss": 7.2287, + "step": 34450 + }, + { + "epoch": 2.54, + "learning_rate": 6.58466514650782e-05, + "loss": 7.7042, + "step": 34500 + }, + { + "epoch": 2.55, + "learning_rate": 6.579642595252685e-05, + "loss": 9.0004, + "step": 34550 + }, + { + "epoch": 2.55, + "learning_rate": 6.57462004399755e-05, + "loss": 9.3279, + "step": 34600 + }, + { + "epoch": 2.56, + "learning_rate": 6.569597492742414e-05, + "loss": 8.9144, + "step": 34650 + }, + { + "epoch": 2.56, + "learning_rate": 6.564574941487278e-05, + "loss": 9.3319, + "step": 34700 + }, + { + "epoch": 2.56, + "learning_rate": 6.559552390232143e-05, + "loss": 9.4986, + "step": 34750 + }, + { + "epoch": 2.57, + "learning_rate": 6.554529838977007e-05, + "loss": 9.002, + "step": 34800 + }, + { + "epoch": 2.57, + "learning_rate": 6.549507287721871e-05, + "loss": 8.6061, + "step": 34850 + }, + { + "epoch": 2.57, + "learning_rate": 6.544484736466736e-05, + "loss": 7.4598, + "step": 34900 + }, + { + "epoch": 2.58, + "learning_rate": 6.5394621852116e-05, + "loss": 8.6618, + "step": 34950 + }, + { + "epoch": 2.58, + "learning_rate": 6.534439633956465e-05, + "loss": 9.0226, + "step": 35000 + }, + { + "epoch": 2.59, + "learning_rate": 6.529417082701329e-05, + "loss": 7.9738, + "step": 35050 + }, + { + "epoch": 2.59, + "learning_rate": 6.524394531446193e-05, + "loss": 8.7871, + "step": 35100 + }, + { + "epoch": 2.59, + "learning_rate": 6.519371980191058e-05, + "loss": 8.8744, + "step": 35150 + }, + { + "epoch": 2.6, + "learning_rate": 6.514349428935924e-05, + "loss": 8.3771, + "step": 35200 + }, + { + "epoch": 2.6, + "learning_rate": 6.509326877680787e-05, + "loss": 8.058, + "step": 35250 + }, + { + "epoch": 2.6, + "learning_rate": 6.504304326425652e-05, + "loss": 8.2627, + "step": 35300 + }, + { + "epoch": 2.61, + "learning_rate": 6.499281775170515e-05, + "loss": 8.1643, + "step": 35350 + }, + { + "epoch": 2.61, + "learning_rate": 6.49425922391538e-05, + "loss": 7.888, + "step": 35400 + }, + { + "epoch": 2.61, + "learning_rate": 6.489236672660246e-05, + "loss": 7.9235, + "step": 35450 + }, + { + "epoch": 2.62, + "learning_rate": 6.484214121405109e-05, + "loss": 8.1139, + "step": 35500 + }, + { + "epoch": 2.62, + "learning_rate": 6.479191570149974e-05, + "loss": 8.7467, + "step": 35550 + }, + { + "epoch": 2.63, + "learning_rate": 6.474169018894837e-05, + "loss": 7.4693, + "step": 35600 + }, + { + "epoch": 2.63, + "learning_rate": 6.469146467639703e-05, + "loss": 8.5167, + "step": 35650 + }, + { + "epoch": 2.63, + "learning_rate": 6.464123916384566e-05, + "loss": 9.5274, + "step": 35700 + }, + { + "epoch": 2.64, + "learning_rate": 6.459101365129432e-05, + "loss": 8.9735, + "step": 35750 + }, + { + "epoch": 2.64, + "learning_rate": 6.454078813874296e-05, + "loss": 8.1756, + "step": 35800 + }, + { + "epoch": 2.64, + "learning_rate": 6.449056262619161e-05, + "loss": 7.8084, + "step": 35850 + }, + { + "epoch": 2.65, + "learning_rate": 6.444033711364025e-05, + "loss": 8.2671, + "step": 35900 + }, + { + "epoch": 2.65, + "learning_rate": 6.439011160108888e-05, + "loss": 8.6628, + "step": 35950 + }, + { + "epoch": 2.66, + "learning_rate": 6.433988608853754e-05, + "loss": 9.8654, + "step": 36000 + }, + { + "epoch": 2.66, + "learning_rate": 6.428966057598617e-05, + "loss": 9.104, + "step": 36050 + }, + { + "epoch": 2.66, + "learning_rate": 6.423943506343483e-05, + "loss": 9.4156, + "step": 36100 + }, + { + "epoch": 2.67, + "learning_rate": 6.418920955088347e-05, + "loss": 8.9803, + "step": 36150 + }, + { + "epoch": 2.67, + "learning_rate": 6.413898403833212e-05, + "loss": 8.9584, + "step": 36200 + }, + { + "epoch": 2.67, + "learning_rate": 6.408875852578076e-05, + "loss": 7.3683, + "step": 36250 + }, + { + "epoch": 2.68, + "learning_rate": 6.40385330132294e-05, + "loss": 8.3277, + "step": 36300 + }, + { + "epoch": 2.68, + "learning_rate": 6.398830750067805e-05, + "loss": 9.3236, + "step": 36350 + }, + { + "epoch": 2.68, + "learning_rate": 6.393808198812669e-05, + "loss": 8.6918, + "step": 36400 + }, + { + "epoch": 2.69, + "learning_rate": 6.388785647557534e-05, + "loss": 8.9422, + "step": 36450 + }, + { + "epoch": 2.69, + "learning_rate": 6.383763096302398e-05, + "loss": 8.8438, + "step": 36500 + }, + { + "epoch": 2.7, + "learning_rate": 6.378740545047263e-05, + "loss": 8.7752, + "step": 36550 + }, + { + "epoch": 2.7, + "learning_rate": 6.373717993792127e-05, + "loss": 8.6483, + "step": 36600 + }, + { + "epoch": 2.7, + "learning_rate": 6.368695442536991e-05, + "loss": 8.5753, + "step": 36650 + }, + { + "epoch": 2.71, + "learning_rate": 6.363672891281856e-05, + "loss": 8.1893, + "step": 36700 + }, + { + "epoch": 2.71, + "learning_rate": 6.35865034002672e-05, + "loss": 8.189, + "step": 36750 + }, + { + "epoch": 2.71, + "learning_rate": 6.353627788771585e-05, + "loss": 8.2979, + "step": 36800 + }, + { + "epoch": 2.72, + "learning_rate": 6.348605237516449e-05, + "loss": 8.3904, + "step": 36850 + }, + { + "epoch": 2.72, + "learning_rate": 6.343582686261313e-05, + "loss": 9.3274, + "step": 36900 + }, + { + "epoch": 2.73, + "learning_rate": 6.338560135006179e-05, + "loss": 7.7663, + "step": 36950 + }, + { + "epoch": 2.73, + "learning_rate": 6.333537583751042e-05, + "loss": 8.2105, + "step": 37000 + }, + { + "epoch": 2.73, + "learning_rate": 6.328515032495907e-05, + "loss": 8.035, + "step": 37050 + }, + { + "epoch": 2.74, + "learning_rate": 6.323492481240771e-05, + "loss": 9.5032, + "step": 37100 + }, + { + "epoch": 2.74, + "learning_rate": 6.318469929985635e-05, + "loss": 8.3856, + "step": 37150 + }, + { + "epoch": 2.74, + "learning_rate": 6.3134473787305e-05, + "loss": 8.9941, + "step": 37200 + }, + { + "epoch": 2.75, + "learning_rate": 6.308424827475364e-05, + "loss": 8.3987, + "step": 37250 + }, + { + "epoch": 2.75, + "learning_rate": 6.30340227622023e-05, + "loss": 9.1753, + "step": 37300 + }, + { + "epoch": 2.76, + "learning_rate": 6.298379724965093e-05, + "loss": 7.9557, + "step": 37350 + }, + { + "epoch": 2.76, + "learning_rate": 6.293357173709959e-05, + "loss": 7.725, + "step": 37400 + }, + { + "epoch": 2.76, + "learning_rate": 6.288334622454822e-05, + "loss": 8.0807, + "step": 37450 + }, + { + "epoch": 2.77, + "learning_rate": 6.283312071199688e-05, + "loss": 8.6492, + "step": 37500 + }, + { + "epoch": 2.77, + "learning_rate": 6.27828951994455e-05, + "loss": 8.4716, + "step": 37550 + }, + { + "epoch": 2.77, + "learning_rate": 6.273266968689416e-05, + "loss": 8.7209, + "step": 37600 + }, + { + "epoch": 2.78, + "learning_rate": 6.268244417434281e-05, + "loss": 8.4902, + "step": 37650 + }, + { + "epoch": 2.78, + "learning_rate": 6.263221866179144e-05, + "loss": 7.9589, + "step": 37700 + }, + { + "epoch": 2.78, + "learning_rate": 6.25819931492401e-05, + "loss": 9.3285, + "step": 37750 + }, + { + "epoch": 2.79, + "learning_rate": 6.253176763668873e-05, + "loss": 9.0506, + "step": 37800 + }, + { + "epoch": 2.79, + "learning_rate": 6.248154212413738e-05, + "loss": 7.9992, + "step": 37850 + }, + { + "epoch": 2.8, + "learning_rate": 6.243131661158603e-05, + "loss": 8.029, + "step": 37900 + }, + { + "epoch": 2.8, + "learning_rate": 6.238109109903467e-05, + "loss": 8.6681, + "step": 37950 + }, + { + "epoch": 2.8, + "learning_rate": 6.233086558648332e-05, + "loss": 8.5906, + "step": 38000 + }, + { + "epoch": 2.81, + "learning_rate": 6.228064007393196e-05, + "loss": 10.4719, + "step": 38050 + }, + { + "epoch": 2.81, + "learning_rate": 6.22304145613806e-05, + "loss": 8.2759, + "step": 38100 + }, + { + "epoch": 2.81, + "learning_rate": 6.218018904882925e-05, + "loss": 8.2633, + "step": 38150 + }, + { + "epoch": 2.82, + "learning_rate": 6.212996353627789e-05, + "loss": 8.5218, + "step": 38200 + }, + { + "epoch": 2.82, + "learning_rate": 6.207973802372654e-05, + "loss": 8.0609, + "step": 38250 + }, + { + "epoch": 2.83, + "learning_rate": 6.202951251117518e-05, + "loss": 9.3672, + "step": 38300 + }, + { + "epoch": 2.83, + "learning_rate": 6.197928699862382e-05, + "loss": 10.1768, + "step": 38350 + }, + { + "epoch": 2.83, + "learning_rate": 6.192906148607247e-05, + "loss": 9.4389, + "step": 38400 + }, + { + "epoch": 2.84, + "learning_rate": 6.187883597352111e-05, + "loss": 7.6737, + "step": 38450 + }, + { + "epoch": 2.84, + "learning_rate": 6.182861046096976e-05, + "loss": 9.2337, + "step": 38500 + }, + { + "epoch": 2.84, + "learning_rate": 6.17783849484184e-05, + "loss": 8.7846, + "step": 38550 + }, + { + "epoch": 2.85, + "learning_rate": 6.172815943586704e-05, + "loss": 7.8709, + "step": 38600 + }, + { + "epoch": 2.85, + "learning_rate": 6.167793392331569e-05, + "loss": 8.8688, + "step": 38650 + }, + { + "epoch": 2.85, + "learning_rate": 6.162770841076435e-05, + "loss": 8.4087, + "step": 38700 + }, + { + "epoch": 2.86, + "learning_rate": 6.157748289821298e-05, + "loss": 7.7129, + "step": 38750 + }, + { + "epoch": 2.86, + "learning_rate": 6.152725738566162e-05, + "loss": 9.3196, + "step": 38800 + }, + { + "epoch": 2.87, + "learning_rate": 6.147703187311027e-05, + "loss": 8.8242, + "step": 38850 + }, + { + "epoch": 2.87, + "learning_rate": 6.142680636055891e-05, + "loss": 8.4237, + "step": 38900 + }, + { + "epoch": 2.87, + "learning_rate": 6.137658084800755e-05, + "loss": 8.9383, + "step": 38950 + }, + { + "epoch": 2.88, + "learning_rate": 6.13263553354562e-05, + "loss": 8.3749, + "step": 39000 + }, + { + "epoch": 2.88, + "learning_rate": 6.127612982290485e-05, + "loss": 8.8894, + "step": 39050 + }, + { + "epoch": 2.88, + "learning_rate": 6.122590431035349e-05, + "loss": 8.2975, + "step": 39100 + }, + { + "epoch": 2.89, + "learning_rate": 6.117567879780214e-05, + "loss": 8.0517, + "step": 39150 + }, + { + "epoch": 2.89, + "learning_rate": 6.112545328525077e-05, + "loss": 8.0154, + "step": 39200 + }, + { + "epoch": 2.9, + "learning_rate": 6.107522777269943e-05, + "loss": 8.4887, + "step": 39250 + }, + { + "epoch": 2.9, + "learning_rate": 6.102500226014807e-05, + "loss": 8.7064, + "step": 39300 + }, + { + "epoch": 2.9, + "learning_rate": 6.0974776747596706e-05, + "loss": 9.7375, + "step": 39350 + }, + { + "epoch": 2.91, + "learning_rate": 6.0924551235045357e-05, + "loss": 8.8614, + "step": 39400 + }, + { + "epoch": 2.91, + "learning_rate": 6.0874325722493994e-05, + "loss": 8.302, + "step": 39450 + }, + { + "epoch": 2.91, + "learning_rate": 6.0824100209942645e-05, + "loss": 7.8469, + "step": 39500 + }, + { + "epoch": 2.92, + "learning_rate": 6.077387469739129e-05, + "loss": 9.0706, + "step": 39550 + }, + { + "epoch": 2.92, + "learning_rate": 6.072364918483994e-05, + "loss": 9.1398, + "step": 39600 + }, + { + "epoch": 2.92, + "learning_rate": 6.067342367228858e-05, + "loss": 8.1838, + "step": 39650 + }, + { + "epoch": 2.93, + "learning_rate": 6.062319815973723e-05, + "loss": 9.2303, + "step": 39700 + }, + { + "epoch": 2.93, + "learning_rate": 6.0572972647185865e-05, + "loss": 8.3715, + "step": 39750 + }, + { + "epoch": 2.94, + "learning_rate": 6.0522747134634516e-05, + "loss": 8.409, + "step": 39800 + }, + { + "epoch": 2.94, + "learning_rate": 6.047252162208315e-05, + "loss": 8.6441, + "step": 39850 + }, + { + "epoch": 2.94, + "learning_rate": 6.04222961095318e-05, + "loss": 9.0975, + "step": 39900 + }, + { + "epoch": 2.95, + "learning_rate": 6.037207059698045e-05, + "loss": 8.0691, + "step": 39950 + }, + { + "epoch": 2.95, + "learning_rate": 6.0321845084429085e-05, + "loss": 8.6646, + "step": 40000 + }, + { + "epoch": 2.95, + "eval_loss": 8.163222312927246, + "eval_runtime": 957.6189, + "eval_samples_per_second": 13.678, + "eval_steps_per_second": 3.42, + "eval_wer": 0.22493805384066187, + "step": 40000 + }, + { + "epoch": 2.95, + "learning_rate": 6.0271619571877736e-05, + "loss": 8.4278, + "step": 40050 + }, + { + "epoch": 2.96, + "learning_rate": 6.022139405932637e-05, + "loss": 8.1656, + "step": 40100 + }, + { + "epoch": 2.96, + "learning_rate": 6.0171168546775024e-05, + "loss": 7.7975, + "step": 40150 + }, + { + "epoch": 2.97, + "learning_rate": 6.012094303422366e-05, + "loss": 7.5465, + "step": 40200 + }, + { + "epoch": 2.97, + "learning_rate": 6.007071752167231e-05, + "loss": 8.3986, + "step": 40250 + }, + { + "epoch": 2.97, + "learning_rate": 6.0020492009120956e-05, + "loss": 8.3762, + "step": 40300 + }, + { + "epoch": 2.98, + "learning_rate": 5.997026649656961e-05, + "loss": 8.6175, + "step": 40350 + }, + { + "epoch": 2.98, + "learning_rate": 5.9920040984018244e-05, + "loss": 8.5622, + "step": 40400 + }, + { + "epoch": 2.98, + "learning_rate": 5.9869815471466895e-05, + "loss": 8.1824, + "step": 40450 + }, + { + "epoch": 2.99, + "learning_rate": 5.981958995891553e-05, + "loss": 7.2886, + "step": 40500 + }, + { + "epoch": 2.99, + "learning_rate": 5.9769364446364177e-05, + "loss": 8.3469, + "step": 40550 + }, + { + "epoch": 2.99, + "learning_rate": 5.971913893381282e-05, + "loss": 8.6257, + "step": 40600 + }, + { + "epoch": 3.0, + "learning_rate": 5.9668913421261465e-05, + "loss": 7.7071, + "step": 40650 + }, + { + "epoch": 3.0, + "learning_rate": 5.9618687908710116e-05, + "loss": 7.8413, + "step": 40700 + }, + { + "epoch": 3.01, + "learning_rate": 5.956846239615875e-05, + "loss": 7.6704, + "step": 40750 + }, + { + "epoch": 3.01, + "learning_rate": 5.9518236883607404e-05, + "loss": 7.3902, + "step": 40800 + }, + { + "epoch": 3.01, + "learning_rate": 5.946801137105604e-05, + "loss": 8.3296, + "step": 40850 + }, + { + "epoch": 3.02, + "learning_rate": 5.941778585850469e-05, + "loss": 7.0884, + "step": 40900 + }, + { + "epoch": 3.02, + "learning_rate": 5.9367560345953336e-05, + "loss": 7.043, + "step": 40950 + }, + { + "epoch": 3.02, + "learning_rate": 5.931733483340198e-05, + "loss": 7.5367, + "step": 41000 + }, + { + "epoch": 3.03, + "learning_rate": 5.9267109320850624e-05, + "loss": 8.3064, + "step": 41050 + }, + { + "epoch": 3.03, + "learning_rate": 5.921688380829926e-05, + "loss": 7.6769, + "step": 41100 + }, + { + "epoch": 3.04, + "learning_rate": 5.916665829574791e-05, + "loss": 9.067, + "step": 41150 + }, + { + "epoch": 3.04, + "learning_rate": 5.911643278319655e-05, + "loss": 8.3565, + "step": 41200 + }, + { + "epoch": 3.04, + "learning_rate": 5.90662072706452e-05, + "loss": 7.8335, + "step": 41250 + }, + { + "epoch": 3.05, + "learning_rate": 5.9015981758093844e-05, + "loss": 7.9617, + "step": 41300 + }, + { + "epoch": 3.05, + "learning_rate": 5.8965756245542495e-05, + "loss": 8.6728, + "step": 41350 + }, + { + "epoch": 3.05, + "learning_rate": 5.891553073299113e-05, + "loss": 7.9142, + "step": 41400 + }, + { + "epoch": 3.06, + "learning_rate": 5.886530522043978e-05, + "loss": 7.7702, + "step": 41450 + }, + { + "epoch": 3.06, + "learning_rate": 5.881507970788842e-05, + "loss": 8.2997, + "step": 41500 + }, + { + "epoch": 3.06, + "learning_rate": 5.876485419533707e-05, + "loss": 8.1519, + "step": 41550 + }, + { + "epoch": 3.07, + "learning_rate": 5.871462868278571e-05, + "loss": 7.3762, + "step": 41600 + }, + { + "epoch": 3.07, + "learning_rate": 5.866440317023435e-05, + "loss": 7.5129, + "step": 41650 + }, + { + "epoch": 3.08, + "learning_rate": 5.8614177657683e-05, + "loss": 8.2537, + "step": 41700 + }, + { + "epoch": 3.08, + "learning_rate": 5.856395214513164e-05, + "loss": 8.4148, + "step": 41750 + }, + { + "epoch": 3.08, + "learning_rate": 5.851372663258029e-05, + "loss": 7.1737, + "step": 41800 + }, + { + "epoch": 3.09, + "learning_rate": 5.846350112002893e-05, + "loss": 7.2628, + "step": 41850 + }, + { + "epoch": 3.09, + "learning_rate": 5.841327560747758e-05, + "loss": 7.2933, + "step": 41900 + }, + { + "epoch": 3.09, + "learning_rate": 5.836305009492622e-05, + "loss": 7.7675, + "step": 41950 + }, + { + "epoch": 3.1, + "learning_rate": 5.831282458237487e-05, + "loss": 8.2344, + "step": 42000 + }, + { + "epoch": 3.1, + "learning_rate": 5.826259906982351e-05, + "loss": 7.1329, + "step": 42050 + }, + { + "epoch": 3.11, + "learning_rate": 5.821237355727216e-05, + "loss": 7.3924, + "step": 42100 + }, + { + "epoch": 3.11, + "learning_rate": 5.81621480447208e-05, + "loss": 6.6189, + "step": 42150 + }, + { + "epoch": 3.11, + "learning_rate": 5.811192253216944e-05, + "loss": 7.3457, + "step": 42200 + }, + { + "epoch": 3.12, + "learning_rate": 5.806169701961809e-05, + "loss": 8.9924, + "step": 42250 + }, + { + "epoch": 3.12, + "learning_rate": 5.8011471507066725e-05, + "loss": 7.6315, + "step": 42300 + }, + { + "epoch": 3.12, + "learning_rate": 5.7961245994515376e-05, + "loss": 8.4726, + "step": 42350 + }, + { + "epoch": 3.13, + "learning_rate": 5.791102048196402e-05, + "loss": 7.1755, + "step": 42400 + }, + { + "epoch": 3.13, + "learning_rate": 5.786079496941267e-05, + "loss": 7.5716, + "step": 42450 + }, + { + "epoch": 3.13, + "learning_rate": 5.781056945686131e-05, + "loss": 7.938, + "step": 42500 + }, + { + "epoch": 3.14, + "learning_rate": 5.776034394430996e-05, + "loss": 7.3833, + "step": 42550 + }, + { + "epoch": 3.14, + "learning_rate": 5.7710118431758596e-05, + "loss": 6.4276, + "step": 42600 + }, + { + "epoch": 3.15, + "learning_rate": 5.765989291920725e-05, + "loss": 6.8907, + "step": 42650 + }, + { + "epoch": 3.15, + "learning_rate": 5.7609667406655884e-05, + "loss": 7.7592, + "step": 42700 + }, + { + "epoch": 3.15, + "learning_rate": 5.755944189410453e-05, + "loss": 7.4997, + "step": 42750 + }, + { + "epoch": 3.16, + "learning_rate": 5.750921638155318e-05, + "loss": 7.2821, + "step": 42800 + }, + { + "epoch": 3.16, + "learning_rate": 5.745899086900182e-05, + "loss": 7.4861, + "step": 42850 + }, + { + "epoch": 3.16, + "learning_rate": 5.740876535645047e-05, + "loss": 7.9266, + "step": 42900 + }, + { + "epoch": 3.17, + "learning_rate": 5.7358539843899105e-05, + "loss": 7.6244, + "step": 42950 + }, + { + "epoch": 3.17, + "learning_rate": 5.7308314331347756e-05, + "loss": 7.382, + "step": 43000 + }, + { + "epoch": 3.18, + "learning_rate": 5.725808881879639e-05, + "loss": 8.1925, + "step": 43050 + }, + { + "epoch": 3.18, + "learning_rate": 5.7207863306245044e-05, + "loss": 8.3185, + "step": 43100 + }, + { + "epoch": 3.18, + "learning_rate": 5.715763779369369e-05, + "loss": 7.091, + "step": 43150 + }, + { + "epoch": 3.19, + "learning_rate": 5.710741228114234e-05, + "loss": 7.8352, + "step": 43200 + }, + { + "epoch": 3.19, + "learning_rate": 5.7057186768590976e-05, + "loss": 6.6085, + "step": 43250 + }, + { + "epoch": 3.19, + "learning_rate": 5.700696125603963e-05, + "loss": 7.8052, + "step": 43300 + }, + { + "epoch": 3.2, + "learning_rate": 5.6956735743488264e-05, + "loss": 8.1999, + "step": 43350 + }, + { + "epoch": 3.2, + "learning_rate": 5.690651023093691e-05, + "loss": 7.2801, + "step": 43400 + }, + { + "epoch": 3.2, + "learning_rate": 5.685628471838555e-05, + "loss": 7.6289, + "step": 43450 + }, + { + "epoch": 3.21, + "learning_rate": 5.6806059205834196e-05, + "loss": 6.8215, + "step": 43500 + }, + { + "epoch": 3.21, + "learning_rate": 5.675583369328285e-05, + "loss": 7.1678, + "step": 43550 + }, + { + "epoch": 3.22, + "learning_rate": 5.6705608180731484e-05, + "loss": 7.6612, + "step": 43600 + }, + { + "epoch": 3.22, + "learning_rate": 5.6655382668180135e-05, + "loss": 7.8899, + "step": 43650 + }, + { + "epoch": 3.22, + "learning_rate": 5.660515715562877e-05, + "loss": 7.8546, + "step": 43700 + }, + { + "epoch": 3.23, + "learning_rate": 5.655493164307742e-05, + "loss": 7.319, + "step": 43750 + }, + { + "epoch": 3.23, + "learning_rate": 5.650470613052607e-05, + "loss": 7.3317, + "step": 43800 + }, + { + "epoch": 3.23, + "learning_rate": 5.645448061797471e-05, + "loss": 7.8875, + "step": 43850 + }, + { + "epoch": 3.24, + "learning_rate": 5.6404255105423355e-05, + "loss": 7.8145, + "step": 43900 + }, + { + "epoch": 3.24, + "learning_rate": 5.635402959287199e-05, + "loss": 7.0667, + "step": 43950 + }, + { + "epoch": 3.25, + "learning_rate": 5.6303804080320643e-05, + "loss": 7.7603, + "step": 44000 + }, + { + "epoch": 3.25, + "learning_rate": 5.625357856776928e-05, + "loss": 7.6111, + "step": 44050 + }, + { + "epoch": 3.25, + "learning_rate": 5.620335305521793e-05, + "loss": 7.9858, + "step": 44100 + }, + { + "epoch": 3.26, + "learning_rate": 5.6153127542666576e-05, + "loss": 8.9896, + "step": 44150 + }, + { + "epoch": 3.26, + "learning_rate": 5.6102902030115226e-05, + "loss": 8.4081, + "step": 44200 + }, + { + "epoch": 3.26, + "learning_rate": 5.6052676517563864e-05, + "loss": 7.4748, + "step": 44250 + }, + { + "epoch": 3.27, + "learning_rate": 5.6002451005012515e-05, + "loss": 8.2133, + "step": 44300 + }, + { + "epoch": 3.27, + "learning_rate": 5.595222549246115e-05, + "loss": 7.3073, + "step": 44350 + }, + { + "epoch": 3.28, + "learning_rate": 5.59019999799098e-05, + "loss": 7.9638, + "step": 44400 + }, + { + "epoch": 3.28, + "learning_rate": 5.585177446735844e-05, + "loss": 7.9653, + "step": 44450 + }, + { + "epoch": 3.28, + "learning_rate": 5.5801548954807084e-05, + "loss": 7.8583, + "step": 44500 + }, + { + "epoch": 3.29, + "learning_rate": 5.5751323442255735e-05, + "loss": 8.0561, + "step": 44550 + }, + { + "epoch": 3.29, + "learning_rate": 5.570109792970437e-05, + "loss": 8.1276, + "step": 44600 + }, + { + "epoch": 3.29, + "learning_rate": 5.565087241715302e-05, + "loss": 7.7357, + "step": 44650 + }, + { + "epoch": 3.3, + "learning_rate": 5.560064690460166e-05, + "loss": 7.7529, + "step": 44700 + }, + { + "epoch": 3.3, + "learning_rate": 5.555042139205031e-05, + "loss": 7.2583, + "step": 44750 + }, + { + "epoch": 3.3, + "learning_rate": 5.550019587949895e-05, + "loss": 6.4675, + "step": 44800 + }, + { + "epoch": 3.31, + "learning_rate": 5.54499703669476e-05, + "loss": 7.3658, + "step": 44850 + }, + { + "epoch": 3.31, + "learning_rate": 5.539974485439624e-05, + "loss": 8.278, + "step": 44900 + }, + { + "epoch": 3.32, + "learning_rate": 5.5349519341844894e-05, + "loss": 7.3867, + "step": 44950 + }, + { + "epoch": 3.32, + "learning_rate": 5.529929382929353e-05, + "loss": 7.4187, + "step": 45000 + }, + { + "epoch": 3.32, + "learning_rate": 5.524906831674217e-05, + "loss": 7.5281, + "step": 45050 + }, + { + "epoch": 3.33, + "learning_rate": 5.519884280419082e-05, + "loss": 7.8815, + "step": 45100 + }, + { + "epoch": 3.33, + "learning_rate": 5.514861729163946e-05, + "loss": 7.2487, + "step": 45150 + }, + { + "epoch": 3.33, + "learning_rate": 5.509839177908811e-05, + "loss": 8.3441, + "step": 45200 + }, + { + "epoch": 3.34, + "learning_rate": 5.504816626653675e-05, + "loss": 7.4892, + "step": 45250 + }, + { + "epoch": 3.34, + "learning_rate": 5.49979407539854e-05, + "loss": 7.7789, + "step": 45300 + }, + { + "epoch": 3.35, + "learning_rate": 5.494771524143404e-05, + "loss": 7.3951, + "step": 45350 + }, + { + "epoch": 3.35, + "learning_rate": 5.489748972888269e-05, + "loss": 7.8756, + "step": 45400 + }, + { + "epoch": 3.35, + "learning_rate": 5.484726421633133e-05, + "loss": 7.9274, + "step": 45450 + }, + { + "epoch": 3.36, + "learning_rate": 5.479703870377998e-05, + "loss": 8.1525, + "step": 45500 + }, + { + "epoch": 3.36, + "learning_rate": 5.4746813191228616e-05, + "loss": 7.5597, + "step": 45550 + }, + { + "epoch": 3.36, + "learning_rate": 5.469658767867726e-05, + "loss": 7.8939, + "step": 45600 + }, + { + "epoch": 3.37, + "learning_rate": 5.464636216612591e-05, + "loss": 6.1451, + "step": 45650 + }, + { + "epoch": 3.37, + "learning_rate": 5.459613665357455e-05, + "loss": 7.224, + "step": 45700 + }, + { + "epoch": 3.37, + "learning_rate": 5.45459111410232e-05, + "loss": 7.2489, + "step": 45750 + }, + { + "epoch": 3.38, + "learning_rate": 5.4495685628471836e-05, + "loss": 7.4162, + "step": 45800 + }, + { + "epoch": 3.38, + "learning_rate": 5.444546011592049e-05, + "loss": 6.8503, + "step": 45850 + }, + { + "epoch": 3.39, + "learning_rate": 5.4395234603369124e-05, + "loss": 6.7087, + "step": 45900 + }, + { + "epoch": 3.39, + "learning_rate": 5.4345009090817775e-05, + "loss": 6.9697, + "step": 45950 + }, + { + "epoch": 3.39, + "learning_rate": 5.429478357826642e-05, + "loss": 7.8369, + "step": 46000 + }, + { + "epoch": 3.4, + "learning_rate": 5.424455806571507e-05, + "loss": 7.7567, + "step": 46050 + }, + { + "epoch": 3.4, + "learning_rate": 5.419433255316371e-05, + "loss": 6.6241, + "step": 46100 + }, + { + "epoch": 3.4, + "learning_rate": 5.414410704061236e-05, + "loss": 7.5218, + "step": 46150 + }, + { + "epoch": 3.41, + "learning_rate": 5.4093881528060995e-05, + "loss": 7.2338, + "step": 46200 + }, + { + "epoch": 3.41, + "learning_rate": 5.404365601550964e-05, + "loss": 7.0707, + "step": 46250 + }, + { + "epoch": 3.42, + "learning_rate": 5.3993430502958283e-05, + "loss": 7.6922, + "step": 46300 + }, + { + "epoch": 3.42, + "learning_rate": 5.394320499040693e-05, + "loss": 8.6056, + "step": 46350 + }, + { + "epoch": 3.42, + "learning_rate": 5.389297947785558e-05, + "loss": 7.4641, + "step": 46400 + }, + { + "epoch": 3.43, + "learning_rate": 5.3842753965304216e-05, + "loss": 7.1716, + "step": 46450 + }, + { + "epoch": 3.43, + "learning_rate": 5.3792528452752866e-05, + "loss": 7.6382, + "step": 46500 + }, + { + "epoch": 3.43, + "learning_rate": 5.3742302940201504e-05, + "loss": 7.0739, + "step": 46550 + }, + { + "epoch": 3.44, + "learning_rate": 5.3692077427650155e-05, + "loss": 7.8667, + "step": 46600 + }, + { + "epoch": 3.44, + "learning_rate": 5.36418519150988e-05, + "loss": 7.559, + "step": 46650 + }, + { + "epoch": 3.44, + "learning_rate": 5.359162640254744e-05, + "loss": 7.6078, + "step": 46700 + }, + { + "epoch": 3.45, + "learning_rate": 5.354140088999609e-05, + "loss": 7.7994, + "step": 46750 + }, + { + "epoch": 3.45, + "learning_rate": 5.3491175377444724e-05, + "loss": 7.0418, + "step": 46800 + }, + { + "epoch": 3.46, + "learning_rate": 5.3440949864893375e-05, + "loss": 7.3261, + "step": 46850 + }, + { + "epoch": 3.46, + "learning_rate": 5.339072435234201e-05, + "loss": 7.9914, + "step": 46900 + }, + { + "epoch": 3.46, + "learning_rate": 5.334049883979066e-05, + "loss": 7.1998, + "step": 46950 + }, + { + "epoch": 3.47, + "learning_rate": 5.329027332723931e-05, + "loss": 7.3343, + "step": 47000 + }, + { + "epoch": 3.47, + "learning_rate": 5.324004781468796e-05, + "loss": 8.1604, + "step": 47050 + }, + { + "epoch": 3.47, + "learning_rate": 5.3189822302136595e-05, + "loss": 7.5405, + "step": 47100 + }, + { + "epoch": 3.48, + "learning_rate": 5.3139596789585246e-05, + "loss": 7.9409, + "step": 47150 + }, + { + "epoch": 3.48, + "learning_rate": 5.308937127703388e-05, + "loss": 8.0573, + "step": 47200 + }, + { + "epoch": 3.49, + "learning_rate": 5.3039145764482534e-05, + "loss": 7.2927, + "step": 47250 + }, + { + "epoch": 3.49, + "learning_rate": 5.298892025193117e-05, + "loss": 6.9476, + "step": 47300 + }, + { + "epoch": 3.49, + "learning_rate": 5.2938694739379815e-05, + "loss": 7.1999, + "step": 47350 + }, + { + "epoch": 3.5, + "learning_rate": 5.2888469226828466e-05, + "loss": 7.8224, + "step": 47400 + }, + { + "epoch": 3.5, + "learning_rate": 5.2838243714277103e-05, + "loss": 8.1369, + "step": 47450 + }, + { + "epoch": 3.5, + "learning_rate": 5.2788018201725754e-05, + "loss": 6.7302, + "step": 47500 + }, + { + "epoch": 3.51, + "learning_rate": 5.273779268917439e-05, + "loss": 8.0819, + "step": 47550 + }, + { + "epoch": 3.51, + "learning_rate": 5.268756717662304e-05, + "loss": 7.8832, + "step": 47600 + }, + { + "epoch": 3.51, + "learning_rate": 5.263734166407168e-05, + "loss": 8.4479, + "step": 47650 + }, + { + "epoch": 3.52, + "learning_rate": 5.258711615152033e-05, + "loss": 7.7838, + "step": 47700 + }, + { + "epoch": 3.52, + "learning_rate": 5.2536890638968975e-05, + "loss": 8.3843, + "step": 47750 + }, + { + "epoch": 3.53, + "learning_rate": 5.2486665126417625e-05, + "loss": 6.9055, + "step": 47800 + }, + { + "epoch": 3.53, + "learning_rate": 5.243643961386626e-05, + "loss": 6.6339, + "step": 47850 + }, + { + "epoch": 3.53, + "learning_rate": 5.23862141013149e-05, + "loss": 7.0316, + "step": 47900 + }, + { + "epoch": 3.54, + "learning_rate": 5.233598858876355e-05, + "loss": 7.4569, + "step": 47950 + }, + { + "epoch": 3.54, + "learning_rate": 5.228576307621219e-05, + "loss": 7.6204, + "step": 48000 + }, + { + "epoch": 3.54, + "learning_rate": 5.223553756366084e-05, + "loss": 7.1085, + "step": 48050 + }, + { + "epoch": 3.55, + "learning_rate": 5.218531205110948e-05, + "loss": 7.7254, + "step": 48100 + }, + { + "epoch": 3.55, + "learning_rate": 5.2135086538558134e-05, + "loss": 7.1486, + "step": 48150 + }, + { + "epoch": 3.56, + "learning_rate": 5.208486102600677e-05, + "loss": 6.9297, + "step": 48200 + }, + { + "epoch": 3.56, + "learning_rate": 5.203463551345542e-05, + "loss": 7.5314, + "step": 48250 + }, + { + "epoch": 3.56, + "learning_rate": 5.198441000090406e-05, + "loss": 7.68, + "step": 48300 + }, + { + "epoch": 3.57, + "learning_rate": 5.193418448835271e-05, + "loss": 7.9467, + "step": 48350 + }, + { + "epoch": 3.57, + "learning_rate": 5.188395897580135e-05, + "loss": 6.7188, + "step": 48400 + }, + { + "epoch": 3.57, + "learning_rate": 5.183373346325e-05, + "loss": 7.7619, + "step": 48450 + }, + { + "epoch": 3.58, + "learning_rate": 5.178350795069864e-05, + "loss": 7.6537, + "step": 48500 + }, + { + "epoch": 3.58, + "learning_rate": 5.173328243814728e-05, + "loss": 6.9593, + "step": 48550 + }, + { + "epoch": 3.58, + "learning_rate": 5.168305692559593e-05, + "loss": 7.4834, + "step": 48600 + }, + { + "epoch": 3.59, + "learning_rate": 5.163283141304457e-05, + "loss": 8.2864, + "step": 48650 + }, + { + "epoch": 3.59, + "learning_rate": 5.158260590049322e-05, + "loss": 7.234, + "step": 48700 + }, + { + "epoch": 3.6, + "learning_rate": 5.1532380387941856e-05, + "loss": 7.2513, + "step": 48750 + }, + { + "epoch": 3.6, + "learning_rate": 5.1482154875390506e-05, + "loss": 7.508, + "step": 48800 + }, + { + "epoch": 3.6, + "learning_rate": 5.143192936283915e-05, + "loss": 7.1513, + "step": 48850 + }, + { + "epoch": 3.61, + "learning_rate": 5.13817038502878e-05, + "loss": 7.8882, + "step": 48900 + }, + { + "epoch": 3.61, + "learning_rate": 5.133147833773644e-05, + "loss": 8.0859, + "step": 48950 + }, + { + "epoch": 3.61, + "learning_rate": 5.128125282518509e-05, + "loss": 7.5506, + "step": 49000 + }, + { + "epoch": 3.62, + "learning_rate": 5.123102731263373e-05, + "loss": 7.9777, + "step": 49050 + }, + { + "epoch": 3.62, + "learning_rate": 5.118080180008237e-05, + "loss": 8.3599, + "step": 49100 + }, + { + "epoch": 3.63, + "learning_rate": 5.1130576287531015e-05, + "loss": 7.105, + "step": 49150 + }, + { + "epoch": 3.63, + "learning_rate": 5.108035077497966e-05, + "loss": 8.2692, + "step": 49200 + }, + { + "epoch": 3.63, + "learning_rate": 5.103012526242831e-05, + "loss": 7.9098, + "step": 49250 + }, + { + "epoch": 3.64, + "learning_rate": 5.097989974987695e-05, + "loss": 7.1698, + "step": 49300 + }, + { + "epoch": 3.64, + "learning_rate": 5.09296742373256e-05, + "loss": 7.406, + "step": 49350 + }, + { + "epoch": 3.64, + "learning_rate": 5.0879448724774235e-05, + "loss": 8.3276, + "step": 49400 + }, + { + "epoch": 3.65, + "learning_rate": 5.0829223212222886e-05, + "loss": 7.5714, + "step": 49450 + }, + { + "epoch": 3.65, + "learning_rate": 5.077899769967153e-05, + "loss": 7.0839, + "step": 49500 + }, + { + "epoch": 3.65, + "learning_rate": 5.0728772187120174e-05, + "loss": 7.0589, + "step": 49550 + }, + { + "epoch": 3.66, + "learning_rate": 5.067854667456882e-05, + "loss": 7.4998, + "step": 49600 + }, + { + "epoch": 3.66, + "learning_rate": 5.0628321162017455e-05, + "loss": 7.3495, + "step": 49650 + }, + { + "epoch": 3.67, + "learning_rate": 5.0578095649466106e-05, + "loss": 7.5101, + "step": 49700 + }, + { + "epoch": 3.67, + "learning_rate": 5.0527870136914743e-05, + "loss": 6.7707, + "step": 49750 + }, + { + "epoch": 3.67, + "learning_rate": 5.0477644624363394e-05, + "loss": 7.5822, + "step": 49800 + }, + { + "epoch": 3.68, + "learning_rate": 5.042741911181204e-05, + "loss": 6.5937, + "step": 49850 + }, + { + "epoch": 3.68, + "learning_rate": 5.037719359926069e-05, + "loss": 7.2497, + "step": 49900 + }, + { + "epoch": 3.68, + "learning_rate": 5.0326968086709326e-05, + "loss": 7.35, + "step": 49950 + }, + { + "epoch": 3.69, + "learning_rate": 5.027674257415798e-05, + "loss": 7.767, + "step": 50000 + }, + { + "epoch": 3.69, + "learning_rate": 5.0226517061606615e-05, + "loss": 8.3228, + "step": 50050 + }, + { + "epoch": 3.7, + "learning_rate": 5.0176291549055265e-05, + "loss": 7.6905, + "step": 50100 + }, + { + "epoch": 3.7, + "learning_rate": 5.01260660365039e-05, + "loss": 7.8275, + "step": 50150 + }, + { + "epoch": 3.7, + "learning_rate": 5.007584052395255e-05, + "loss": 8.0724, + "step": 50200 + }, + { + "epoch": 3.71, + "learning_rate": 5.00256150114012e-05, + "loss": 7.0501, + "step": 50250 + }, + { + "epoch": 3.71, + "learning_rate": 4.997538949884984e-05, + "loss": 7.4269, + "step": 50300 + }, + { + "epoch": 3.71, + "learning_rate": 4.9925163986298486e-05, + "loss": 7.5186, + "step": 50350 + }, + { + "epoch": 3.72, + "learning_rate": 4.987493847374713e-05, + "loss": 8.2606, + "step": 50400 + }, + { + "epoch": 3.72, + "learning_rate": 4.9824712961195774e-05, + "loss": 8.2097, + "step": 50450 + }, + { + "epoch": 3.73, + "learning_rate": 4.977448744864441e-05, + "loss": 7.468, + "step": 50500 + }, + { + "epoch": 3.73, + "learning_rate": 4.9724261936093055e-05, + "loss": 8.2075, + "step": 50550 + }, + { + "epoch": 3.73, + "learning_rate": 4.9674036423541706e-05, + "loss": 7.3928, + "step": 50600 + }, + { + "epoch": 3.74, + "learning_rate": 4.962381091099035e-05, + "loss": 7.2907, + "step": 50650 + }, + { + "epoch": 3.74, + "learning_rate": 4.9573585398438994e-05, + "loss": 7.706, + "step": 50700 + }, + { + "epoch": 3.74, + "learning_rate": 4.952335988588764e-05, + "loss": 7.301, + "step": 50750 + }, + { + "epoch": 3.75, + "learning_rate": 4.947313437333628e-05, + "loss": 6.9109, + "step": 50800 + }, + { + "epoch": 3.75, + "learning_rate": 4.9422908860784926e-05, + "loss": 6.6967, + "step": 50850 + }, + { + "epoch": 3.75, + "learning_rate": 4.937268334823357e-05, + "loss": 5.9484, + "step": 50900 + }, + { + "epoch": 3.76, + "learning_rate": 4.9322457835682214e-05, + "loss": 7.8288, + "step": 50950 + }, + { + "epoch": 3.76, + "learning_rate": 4.9272232323130865e-05, + "loss": 7.3987, + "step": 51000 + }, + { + "epoch": 3.77, + "learning_rate": 4.92220068105795e-05, + "loss": 7.3714, + "step": 51050 + }, + { + "epoch": 3.77, + "learning_rate": 4.9171781298028147e-05, + "loss": 7.258, + "step": 51100 + }, + { + "epoch": 3.77, + "learning_rate": 4.912155578547679e-05, + "loss": 6.8541, + "step": 51150 + }, + { + "epoch": 3.78, + "learning_rate": 4.9071330272925435e-05, + "loss": 7.085, + "step": 51200 + }, + { + "epoch": 3.78, + "learning_rate": 4.902110476037408e-05, + "loss": 6.7827, + "step": 51250 + }, + { + "epoch": 3.78, + "learning_rate": 4.897087924782273e-05, + "loss": 6.6806, + "step": 51300 + }, + { + "epoch": 3.79, + "learning_rate": 4.8920653735271374e-05, + "loss": 7.2918, + "step": 51350 + }, + { + "epoch": 3.79, + "learning_rate": 4.887042822272002e-05, + "loss": 7.9022, + "step": 51400 + }, + { + "epoch": 3.8, + "learning_rate": 4.882020271016866e-05, + "loss": 7.6094, + "step": 51450 + }, + { + "epoch": 3.8, + "learning_rate": 4.8769977197617306e-05, + "loss": 8.1048, + "step": 51500 + }, + { + "epoch": 3.8, + "learning_rate": 4.871975168506595e-05, + "loss": 6.9056, + "step": 51550 + }, + { + "epoch": 3.81, + "learning_rate": 4.866952617251459e-05, + "loss": 6.4347, + "step": 51600 + }, + { + "epoch": 3.81, + "learning_rate": 4.861930065996324e-05, + "loss": 7.307, + "step": 51650 + }, + { + "epoch": 3.81, + "learning_rate": 4.856907514741188e-05, + "loss": 7.649, + "step": 51700 + }, + { + "epoch": 3.82, + "learning_rate": 4.8518849634860526e-05, + "loss": 6.7706, + "step": 51750 + }, + { + "epoch": 3.82, + "learning_rate": 4.846862412230917e-05, + "loss": 6.7943, + "step": 51800 + }, + { + "epoch": 3.82, + "learning_rate": 4.8418398609757814e-05, + "loss": 7.654, + "step": 51850 + }, + { + "epoch": 3.83, + "learning_rate": 4.836817309720646e-05, + "loss": 7.6245, + "step": 51900 + }, + { + "epoch": 3.83, + "learning_rate": 4.83179475846551e-05, + "loss": 7.8284, + "step": 51950 + }, + { + "epoch": 3.84, + "learning_rate": 4.8267722072103746e-05, + "loss": 6.9516, + "step": 52000 + }, + { + "epoch": 3.84, + "learning_rate": 4.82174965595524e-05, + "loss": 7.1367, + "step": 52050 + }, + { + "epoch": 3.84, + "learning_rate": 4.816727104700104e-05, + "loss": 7.4153, + "step": 52100 + }, + { + "epoch": 3.85, + "learning_rate": 4.8117045534449685e-05, + "loss": 6.5358, + "step": 52150 + }, + { + "epoch": 3.85, + "learning_rate": 4.806682002189832e-05, + "loss": 7.5443, + "step": 52200 + }, + { + "epoch": 3.85, + "learning_rate": 4.8016594509346967e-05, + "loss": 7.8184, + "step": 52250 + }, + { + "epoch": 3.86, + "learning_rate": 4.796636899679561e-05, + "loss": 6.9702, + "step": 52300 + }, + { + "epoch": 3.86, + "learning_rate": 4.791614348424426e-05, + "loss": 8.3988, + "step": 52350 + }, + { + "epoch": 3.87, + "learning_rate": 4.7865917971692905e-05, + "loss": 8.1025, + "step": 52400 + }, + { + "epoch": 3.87, + "learning_rate": 4.781569245914155e-05, + "loss": 7.81, + "step": 52450 + }, + { + "epoch": 3.87, + "learning_rate": 4.7765466946590194e-05, + "loss": 6.6809, + "step": 52500 + }, + { + "epoch": 3.88, + "learning_rate": 4.771524143403884e-05, + "loss": 6.81, + "step": 52550 + }, + { + "epoch": 3.88, + "learning_rate": 4.766501592148748e-05, + "loss": 7.1717, + "step": 52600 + }, + { + "epoch": 3.88, + "learning_rate": 4.7614790408936126e-05, + "loss": 7.3114, + "step": 52650 + }, + { + "epoch": 3.89, + "learning_rate": 4.756456489638477e-05, + "loss": 7.2819, + "step": 52700 + }, + { + "epoch": 3.89, + "learning_rate": 4.7514339383833414e-05, + "loss": 6.6964, + "step": 52750 + }, + { + "epoch": 3.89, + "learning_rate": 4.746411387128206e-05, + "loss": 8.1118, + "step": 52800 + }, + { + "epoch": 3.9, + "learning_rate": 4.74138883587307e-05, + "loss": 8.1901, + "step": 52850 + }, + { + "epoch": 3.9, + "learning_rate": 4.7363662846179346e-05, + "loss": 6.8883, + "step": 52900 + }, + { + "epoch": 3.91, + "learning_rate": 4.731343733362799e-05, + "loss": 7.2554, + "step": 52950 + }, + { + "epoch": 3.91, + "learning_rate": 4.7263211821076634e-05, + "loss": 7.402, + "step": 53000 + }, + { + "epoch": 3.91, + "learning_rate": 4.721298630852528e-05, + "loss": 8.8808, + "step": 53050 + }, + { + "epoch": 3.92, + "learning_rate": 4.716276079597393e-05, + "loss": 7.1652, + "step": 53100 + }, + { + "epoch": 3.92, + "learning_rate": 4.711253528342257e-05, + "loss": 6.884, + "step": 53150 + }, + { + "epoch": 3.92, + "learning_rate": 4.706230977087122e-05, + "loss": 7.4472, + "step": 53200 + }, + { + "epoch": 3.93, + "learning_rate": 4.701208425831986e-05, + "loss": 6.8787, + "step": 53250 + }, + { + "epoch": 3.93, + "learning_rate": 4.6961858745768505e-05, + "loss": 6.9316, + "step": 53300 + }, + { + "epoch": 3.94, + "learning_rate": 4.691163323321714e-05, + "loss": 7.1614, + "step": 53350 + }, + { + "epoch": 3.94, + "learning_rate": 4.6861407720665787e-05, + "loss": 7.193, + "step": 53400 + }, + { + "epoch": 3.94, + "learning_rate": 4.681118220811444e-05, + "loss": 7.5875, + "step": 53450 + }, + { + "epoch": 3.95, + "learning_rate": 4.676095669556308e-05, + "loss": 7.0836, + "step": 53500 + }, + { + "epoch": 3.95, + "learning_rate": 4.6710731183011725e-05, + "loss": 7.2054, + "step": 53550 + }, + { + "epoch": 3.95, + "learning_rate": 4.666050567046037e-05, + "loss": 6.95, + "step": 53600 + }, + { + "epoch": 3.96, + "learning_rate": 4.6610280157909014e-05, + "loss": 6.6366, + "step": 53650 + }, + { + "epoch": 3.96, + "learning_rate": 4.656005464535766e-05, + "loss": 6.7976, + "step": 53700 + }, + { + "epoch": 3.96, + "learning_rate": 4.65098291328063e-05, + "loss": 7.1371, + "step": 53750 + }, + { + "epoch": 3.97, + "learning_rate": 4.6459603620254946e-05, + "loss": 6.7457, + "step": 53800 + }, + { + "epoch": 3.97, + "learning_rate": 4.6409378107703597e-05, + "loss": 6.6139, + "step": 53850 + }, + { + "epoch": 3.98, + "learning_rate": 4.6359152595152234e-05, + "loss": 7.9291, + "step": 53900 + }, + { + "epoch": 3.98, + "learning_rate": 4.630892708260088e-05, + "loss": 7.4235, + "step": 53950 + }, + { + "epoch": 3.98, + "learning_rate": 4.625870157004952e-05, + "loss": 6.8609, + "step": 54000 + }, + { + "epoch": 3.99, + "learning_rate": 4.6208476057498166e-05, + "loss": 6.6128, + "step": 54050 + }, + { + "epoch": 3.99, + "learning_rate": 4.615825054494681e-05, + "loss": 7.0313, + "step": 54100 + }, + { + "epoch": 3.99, + "learning_rate": 4.610802503239546e-05, + "loss": 7.3654, + "step": 54150 + }, + { + "epoch": 4.0, + "learning_rate": 4.6057799519844105e-05, + "loss": 8.266, + "step": 54200 + }, + { + "epoch": 4.0, + "learning_rate": 4.600757400729275e-05, + "loss": 7.9471, + "step": 54250 + }, + { + "epoch": 4.01, + "learning_rate": 4.595734849474139e-05, + "loss": 6.0877, + "step": 54300 + }, + { + "epoch": 4.01, + "learning_rate": 4.590712298219004e-05, + "loss": 6.7453, + "step": 54350 + }, + { + "epoch": 4.01, + "learning_rate": 4.585689746963868e-05, + "loss": 5.8985, + "step": 54400 + }, + { + "epoch": 4.02, + "learning_rate": 4.580667195708732e-05, + "loss": 7.4527, + "step": 54450 + }, + { + "epoch": 4.02, + "learning_rate": 4.575644644453597e-05, + "loss": 7.0419, + "step": 54500 + }, + { + "epoch": 4.02, + "learning_rate": 4.570622093198461e-05, + "loss": 6.281, + "step": 54550 + }, + { + "epoch": 4.03, + "learning_rate": 4.565599541943326e-05, + "loss": 6.6096, + "step": 54600 + }, + { + "epoch": 4.03, + "learning_rate": 4.56057699068819e-05, + "loss": 7.0341, + "step": 54650 + }, + { + "epoch": 4.03, + "learning_rate": 4.5555544394330545e-05, + "loss": 6.621, + "step": 54700 + }, + { + "epoch": 4.04, + "learning_rate": 4.550531888177919e-05, + "loss": 7.4405, + "step": 54750 + }, + { + "epoch": 4.04, + "learning_rate": 4.5455093369227834e-05, + "loss": 7.2506, + "step": 54800 + }, + { + "epoch": 4.05, + "learning_rate": 4.540486785667648e-05, + "loss": 7.1534, + "step": 54850 + }, + { + "epoch": 4.05, + "learning_rate": 4.535464234412513e-05, + "loss": 6.5829, + "step": 54900 + }, + { + "epoch": 4.05, + "learning_rate": 4.530441683157377e-05, + "loss": 7.0338, + "step": 54950 + }, + { + "epoch": 4.06, + "learning_rate": 4.5254191319022417e-05, + "loss": 6.6234, + "step": 55000 + }, + { + "epoch": 4.06, + "learning_rate": 4.5203965806471054e-05, + "loss": 6.2412, + "step": 55050 + }, + { + "epoch": 4.06, + "learning_rate": 4.51537402939197e-05, + "loss": 6.3439, + "step": 55100 + }, + { + "epoch": 4.07, + "learning_rate": 4.510351478136834e-05, + "loss": 6.8272, + "step": 55150 + }, + { + "epoch": 4.07, + "learning_rate": 4.5053289268816986e-05, + "loss": 6.4758, + "step": 55200 + }, + { + "epoch": 4.08, + "learning_rate": 4.500306375626564e-05, + "loss": 6.434, + "step": 55250 + }, + { + "epoch": 4.08, + "learning_rate": 4.495283824371428e-05, + "loss": 6.5471, + "step": 55300 + }, + { + "epoch": 4.08, + "learning_rate": 4.4902612731162925e-05, + "loss": 6.5088, + "step": 55350 + }, + { + "epoch": 4.09, + "learning_rate": 4.485238721861157e-05, + "loss": 6.6941, + "step": 55400 + }, + { + "epoch": 4.09, + "learning_rate": 4.480216170606021e-05, + "loss": 6.3248, + "step": 55450 + }, + { + "epoch": 4.09, + "learning_rate": 4.475193619350886e-05, + "loss": 7.2989, + "step": 55500 + }, + { + "epoch": 4.1, + "learning_rate": 4.47017106809575e-05, + "loss": 7.0947, + "step": 55550 + }, + { + "epoch": 4.1, + "learning_rate": 4.4651485168406145e-05, + "loss": 6.4896, + "step": 55600 + }, + { + "epoch": 4.1, + "learning_rate": 4.460125965585479e-05, + "loss": 5.9249, + "step": 55650 + }, + { + "epoch": 4.11, + "learning_rate": 4.455103414330343e-05, + "loss": 6.7801, + "step": 55700 + }, + { + "epoch": 4.11, + "learning_rate": 4.450080863075208e-05, + "loss": 6.2216, + "step": 55750 + }, + { + "epoch": 4.12, + "learning_rate": 4.445058311820072e-05, + "loss": 6.8346, + "step": 55800 + }, + { + "epoch": 4.12, + "learning_rate": 4.4400357605649366e-05, + "loss": 7.2863, + "step": 55850 + }, + { + "epoch": 4.12, + "learning_rate": 4.435013209309801e-05, + "loss": 7.406, + "step": 55900 + }, + { + "epoch": 4.13, + "learning_rate": 4.429990658054666e-05, + "loss": 6.0165, + "step": 55950 + }, + { + "epoch": 4.13, + "learning_rate": 4.4249681067995304e-05, + "loss": 6.8052, + "step": 56000 + }, + { + "epoch": 4.13, + "learning_rate": 4.419945555544395e-05, + "loss": 6.679, + "step": 56050 + }, + { + "epoch": 4.14, + "learning_rate": 4.414923004289259e-05, + "loss": 6.2087, + "step": 56100 + }, + { + "epoch": 4.14, + "learning_rate": 4.4099004530341237e-05, + "loss": 6.5904, + "step": 56150 + }, + { + "epoch": 4.15, + "learning_rate": 4.4048779017789874e-05, + "loss": 6.4147, + "step": 56200 + }, + { + "epoch": 4.15, + "learning_rate": 4.399855350523852e-05, + "loss": 6.6465, + "step": 56250 + }, + { + "epoch": 4.15, + "learning_rate": 4.394832799268717e-05, + "loss": 7.11, + "step": 56300 + }, + { + "epoch": 4.16, + "learning_rate": 4.389810248013581e-05, + "loss": 7.0558, + "step": 56350 + }, + { + "epoch": 4.16, + "learning_rate": 4.384787696758446e-05, + "loss": 6.922, + "step": 56400 + }, + { + "epoch": 4.16, + "learning_rate": 4.37976514550331e-05, + "loss": 7.2125, + "step": 56450 + }, + { + "epoch": 4.17, + "learning_rate": 4.3747425942481745e-05, + "loss": 6.4719, + "step": 56500 + }, + { + "epoch": 4.17, + "learning_rate": 4.369720042993039e-05, + "loss": 7.204, + "step": 56550 + }, + { + "epoch": 4.17, + "learning_rate": 4.364697491737903e-05, + "loss": 7.2371, + "step": 56600 + }, + { + "epoch": 4.18, + "learning_rate": 4.359674940482768e-05, + "loss": 6.6131, + "step": 56650 + }, + { + "epoch": 4.18, + "learning_rate": 4.354652389227633e-05, + "loss": 6.6349, + "step": 56700 + }, + { + "epoch": 4.19, + "learning_rate": 4.3496298379724965e-05, + "loss": 5.9137, + "step": 56750 + }, + { + "epoch": 4.19, + "learning_rate": 4.344607286717361e-05, + "loss": 6.5402, + "step": 56800 + }, + { + "epoch": 4.19, + "learning_rate": 4.339584735462225e-05, + "loss": 7.3351, + "step": 56850 + }, + { + "epoch": 4.2, + "learning_rate": 4.33456218420709e-05, + "loss": 8.1387, + "step": 56900 + }, + { + "epoch": 4.2, + "learning_rate": 4.329539632951954e-05, + "loss": 7.0783, + "step": 56950 + }, + { + "epoch": 4.2, + "learning_rate": 4.324517081696819e-05, + "loss": 6.947, + "step": 57000 + }, + { + "epoch": 4.21, + "learning_rate": 4.3194945304416836e-05, + "loss": 6.1526, + "step": 57050 + }, + { + "epoch": 4.21, + "learning_rate": 4.314471979186548e-05, + "loss": 7.273, + "step": 57100 + }, + { + "epoch": 4.22, + "learning_rate": 4.3094494279314124e-05, + "loss": 7.0958, + "step": 57150 + }, + { + "epoch": 4.22, + "learning_rate": 4.304426876676277e-05, + "loss": 6.4413, + "step": 57200 + }, + { + "epoch": 4.22, + "learning_rate": 4.299404325421141e-05, + "loss": 6.597, + "step": 57250 + }, + { + "epoch": 4.23, + "learning_rate": 4.294381774166006e-05, + "loss": 6.6893, + "step": 57300 + }, + { + "epoch": 4.23, + "learning_rate": 4.28935922291087e-05, + "loss": 6.4746, + "step": 57350 + }, + { + "epoch": 4.23, + "learning_rate": 4.2843366716557345e-05, + "loss": 7.376, + "step": 57400 + }, + { + "epoch": 4.24, + "learning_rate": 4.279314120400599e-05, + "loss": 7.2823, + "step": 57450 + }, + { + "epoch": 4.24, + "learning_rate": 4.274291569145463e-05, + "loss": 6.3184, + "step": 57500 + }, + { + "epoch": 4.25, + "learning_rate": 4.269269017890328e-05, + "loss": 6.4526, + "step": 57550 + }, + { + "epoch": 4.25, + "learning_rate": 4.264246466635192e-05, + "loss": 6.7892, + "step": 57600 + }, + { + "epoch": 4.25, + "learning_rate": 4.2592239153800565e-05, + "loss": 6.2082, + "step": 57650 + }, + { + "epoch": 4.26, + "learning_rate": 4.254201364124921e-05, + "loss": 7.1488, + "step": 57700 + }, + { + "epoch": 4.26, + "learning_rate": 4.249178812869786e-05, + "loss": 6.9399, + "step": 57750 + }, + { + "epoch": 4.26, + "learning_rate": 4.2441562616146504e-05, + "loss": 6.8596, + "step": 57800 + }, + { + "epoch": 4.27, + "learning_rate": 4.239133710359515e-05, + "loss": 6.8899, + "step": 57850 + }, + { + "epoch": 4.27, + "learning_rate": 4.2341111591043785e-05, + "loss": 6.8196, + "step": 57900 + }, + { + "epoch": 4.27, + "learning_rate": 4.229088607849243e-05, + "loss": 7.5114, + "step": 57950 + }, + { + "epoch": 4.28, + "learning_rate": 4.224066056594107e-05, + "loss": 6.4122, + "step": 58000 + }, + { + "epoch": 4.28, + "learning_rate": 4.219043505338972e-05, + "loss": 6.9228, + "step": 58050 + }, + { + "epoch": 4.29, + "learning_rate": 4.214020954083837e-05, + "loss": 6.3687, + "step": 58100 + }, + { + "epoch": 4.29, + "learning_rate": 4.208998402828701e-05, + "loss": 6.8616, + "step": 58150 + }, + { + "epoch": 4.29, + "learning_rate": 4.2039758515735656e-05, + "loss": 6.002, + "step": 58200 + }, + { + "epoch": 4.3, + "learning_rate": 4.19895330031843e-05, + "loss": 6.0985, + "step": 58250 + }, + { + "epoch": 4.3, + "learning_rate": 4.1939307490632944e-05, + "loss": 6.5857, + "step": 58300 + }, + { + "epoch": 4.3, + "learning_rate": 4.188908197808159e-05, + "loss": 6.257, + "step": 58350 + }, + { + "epoch": 4.31, + "learning_rate": 4.183885646553023e-05, + "loss": 6.9222, + "step": 58400 + }, + { + "epoch": 4.31, + "learning_rate": 4.178863095297888e-05, + "loss": 6.7801, + "step": 58450 + }, + { + "epoch": 4.32, + "learning_rate": 4.173840544042752e-05, + "loss": 6.3861, + "step": 58500 + }, + { + "epoch": 4.32, + "learning_rate": 4.1688179927876165e-05, + "loss": 6.8685, + "step": 58550 + }, + { + "epoch": 4.32, + "learning_rate": 4.163795441532481e-05, + "loss": 6.9948, + "step": 58600 + }, + { + "epoch": 4.33, + "learning_rate": 4.158772890277345e-05, + "loss": 6.0965, + "step": 58650 + }, + { + "epoch": 4.33, + "learning_rate": 4.15375033902221e-05, + "loss": 7.282, + "step": 58700 + }, + { + "epoch": 4.33, + "learning_rate": 4.148727787767074e-05, + "loss": 7.6165, + "step": 58750 + }, + { + "epoch": 4.34, + "learning_rate": 4.143705236511939e-05, + "loss": 6.734, + "step": 58800 + }, + { + "epoch": 4.34, + "learning_rate": 4.1386826852568036e-05, + "loss": 6.0334, + "step": 58850 + }, + { + "epoch": 4.34, + "learning_rate": 4.133660134001668e-05, + "loss": 6.5306, + "step": 58900 + }, + { + "epoch": 4.35, + "learning_rate": 4.1286375827465324e-05, + "loss": 7.4324, + "step": 58950 + }, + { + "epoch": 4.35, + "learning_rate": 4.123615031491397e-05, + "loss": 7.234, + "step": 59000 + }, + { + "epoch": 4.36, + "learning_rate": 4.1185924802362605e-05, + "loss": 6.7196, + "step": 59050 + }, + { + "epoch": 4.36, + "learning_rate": 4.113569928981125e-05, + "loss": 6.0641, + "step": 59100 + }, + { + "epoch": 4.36, + "learning_rate": 4.10854737772599e-05, + "loss": 5.9373, + "step": 59150 + }, + { + "epoch": 4.37, + "learning_rate": 4.1035248264708544e-05, + "loss": 6.4428, + "step": 59200 + }, + { + "epoch": 4.37, + "learning_rate": 4.098502275215719e-05, + "loss": 6.7303, + "step": 59250 + }, + { + "epoch": 4.37, + "learning_rate": 4.093479723960583e-05, + "loss": 6.5585, + "step": 59300 + }, + { + "epoch": 4.38, + "learning_rate": 4.0884571727054476e-05, + "loss": 5.837, + "step": 59350 + }, + { + "epoch": 4.38, + "learning_rate": 4.083434621450312e-05, + "loss": 6.574, + "step": 59400 + }, + { + "epoch": 4.39, + "learning_rate": 4.0784120701951764e-05, + "loss": 7.4319, + "step": 59450 + }, + { + "epoch": 4.39, + "learning_rate": 4.073389518940041e-05, + "loss": 6.1092, + "step": 59500 + }, + { + "epoch": 4.39, + "learning_rate": 4.068366967684906e-05, + "loss": 6.4683, + "step": 59550 + }, + { + "epoch": 4.4, + "learning_rate": 4.06334441642977e-05, + "loss": 7.1323, + "step": 59600 + }, + { + "epoch": 4.4, + "learning_rate": 4.058321865174634e-05, + "loss": 6.7011, + "step": 59650 + }, + { + "epoch": 4.4, + "learning_rate": 4.0532993139194985e-05, + "loss": 6.7281, + "step": 59700 + }, + { + "epoch": 4.41, + "learning_rate": 4.048276762664363e-05, + "loss": 6.0361, + "step": 59750 + }, + { + "epoch": 4.41, + "learning_rate": 4.043254211409227e-05, + "loss": 6.5359, + "step": 59800 + }, + { + "epoch": 4.41, + "learning_rate": 4.0382316601540924e-05, + "loss": 7.3576, + "step": 59850 + }, + { + "epoch": 4.42, + "learning_rate": 4.033209108898957e-05, + "loss": 6.7324, + "step": 59900 + }, + { + "epoch": 4.42, + "learning_rate": 4.028186557643821e-05, + "loss": 7.1445, + "step": 59950 + }, + { + "epoch": 4.43, + "learning_rate": 4.0231640063886856e-05, + "loss": 5.426, + "step": 60000 + }, + { + "epoch": 4.43, + "eval_loss": 7.799332618713379, + "eval_runtime": 963.4222, + "eval_samples_per_second": 13.595, + "eval_steps_per_second": 3.399, + "eval_wer": 0.20775061946159337, + "step": 60000 + } + ], + "max_steps": 100051, + "num_train_epochs": 8, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-60000/training_args.bin b/checkpoint-60000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dbc064046a36220dd960e955c565bc3e2c9e3abd --- /dev/null +++ b/checkpoint-60000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b64c669f66dd7a2e54d3001ce7e31c26cc60dd58136e8ce90e6055bd0ae15eb +size 3503 diff --git a/checkpoint-80000/optimizer.pt b/checkpoint-80000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d364cecd0642f27e9221e6cb2c68aca2d7326814 --- /dev/null +++ b/checkpoint-80000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56861ad8a03582034a89047c1e6397a79297e194daab37dae36192eb72f16c4a +size 5154565443 diff --git a/checkpoint-80000/rng_state.pth b/checkpoint-80000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..6d8c6c85a0341e6f0d5397e4127222d27a121847 --- /dev/null +++ b/checkpoint-80000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b36e92749442e712801d00e24ed95ea736e78f8ef065b6af0b801ae709dfb48d +size 14503 diff --git a/checkpoint-80000/scheduler.pt b/checkpoint-80000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..496dc3b4a25d0beaa6051e4f35e82ca055816e89 --- /dev/null +++ b/checkpoint-80000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:827a7ad0b8599273336e50134d47c6b281fcbf26c0ef32fd1bca5bf3db63fe69 +size 623 diff --git a/checkpoint-80000/stt_en_conformer_transducer_xlarge.nemo b/checkpoint-80000/stt_en_conformer_transducer_xlarge.nemo new file mode 100644 index 0000000000000000000000000000000000000000..33368f19406618f11ba83167eed1d5dc27e6ac9a --- /dev/null +++ b/checkpoint-80000/stt_en_conformer_transducer_xlarge.nemo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d9af5c4d6859c9af2c18bca5723158554500ba93753fb4ffd4923e3e72011340 +size 2577971200 diff --git a/checkpoint-80000/trainer_state.json b/checkpoint-80000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..59d2399c89d2c7546714222ca37a5c4814d610f1 --- /dev/null +++ b/checkpoint-80000/trainer_state.json @@ -0,0 +1,9652 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.901010548056354, + "global_step": 80000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 178.9465, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 164.9707, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 3e-05, + "loss": 142.2782, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4e-05, + "loss": 121.5122, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 91.8622, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 6e-05, + "loss": 82.2062, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 7e-05, + "loss": 72.6893, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 8e-05, + "loss": 71.8709, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 9e-05, + "loss": 69.9995, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001, + "loss": 70.6458, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 9.994977448744865e-05, + "loss": 73.9929, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 9.989954897489729e-05, + "loss": 66.52, + "step": 600 + }, + { + "epoch": 0.05, + "learning_rate": 9.984932346234594e-05, + "loss": 65.8947, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 9.979909794979458e-05, + "loss": 62.5809, + "step": 700 + }, + { + "epoch": 0.06, + "learning_rate": 9.974887243724323e-05, + "loss": 61.212, + "step": 750 + }, + { + "epoch": 0.06, + "learning_rate": 9.969864692469187e-05, + "loss": 68.2408, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 9.964842141214051e-05, + "loss": 61.5308, + "step": 850 + }, + { + "epoch": 0.07, + "learning_rate": 9.959819589958916e-05, + "loss": 58.9116, + "step": 900 + }, + { + "epoch": 0.07, + "learning_rate": 9.95479703870378e-05, + "loss": 60.0702, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 9.949774487448646e-05, + "loss": 57.6135, + "step": 1000 + }, + { + "epoch": 0.08, + "learning_rate": 9.944751936193509e-05, + "loss": 50.9231, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 9.939729384938373e-05, + "loss": 51.187, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 9.934706833683238e-05, + "loss": 52.1127, + "step": 1150 + }, + { + "epoch": 0.09, + "learning_rate": 9.929684282428102e-05, + "loss": 47.4608, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 9.924661731172968e-05, + "loss": 51.6108, + "step": 1250 + }, + { + "epoch": 0.1, + "learning_rate": 9.919639179917831e-05, + "loss": 46.5874, + "step": 1300 + }, + { + "epoch": 0.1, + "learning_rate": 9.914616628662697e-05, + "loss": 41.4706, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 9.90959407740756e-05, + "loss": 43.7544, + "step": 1400 + }, + { + "epoch": 0.11, + "learning_rate": 9.904571526152426e-05, + "loss": 44.6039, + "step": 1450 + }, + { + "epoch": 0.11, + "learning_rate": 9.899548974897289e-05, + "loss": 41.4384, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 9.894526423642154e-05, + "loss": 42.8289, + "step": 1550 + }, + { + "epoch": 0.12, + "learning_rate": 9.889503872387019e-05, + "loss": 39.9726, + "step": 1600 + }, + { + "epoch": 0.12, + "learning_rate": 9.884481321131882e-05, + "loss": 43.9533, + "step": 1650 + }, + { + "epoch": 0.13, + "learning_rate": 9.879458769876748e-05, + "loss": 38.7605, + "step": 1700 + }, + { + "epoch": 0.13, + "learning_rate": 9.87443621862161e-05, + "loss": 39.5425, + "step": 1750 + }, + { + "epoch": 0.13, + "learning_rate": 9.869413667366476e-05, + "loss": 37.588, + "step": 1800 + }, + { + "epoch": 0.14, + "learning_rate": 9.86439111611134e-05, + "loss": 39.7744, + "step": 1850 + }, + { + "epoch": 0.14, + "learning_rate": 9.859368564856205e-05, + "loss": 38.2154, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 9.85434601360107e-05, + "loss": 35.0806, + "step": 1950 + }, + { + "epoch": 0.15, + "learning_rate": 9.849323462345934e-05, + "loss": 39.061, + "step": 2000 + }, + { + "epoch": 0.15, + "learning_rate": 9.844300911090798e-05, + "loss": 35.1544, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 9.839278359835663e-05, + "loss": 38.123, + "step": 2100 + }, + { + "epoch": 0.16, + "learning_rate": 9.834255808580527e-05, + "loss": 33.1144, + "step": 2150 + }, + { + "epoch": 0.16, + "learning_rate": 9.829233257325392e-05, + "loss": 34.3476, + "step": 2200 + }, + { + "epoch": 0.17, + "learning_rate": 9.824210706070256e-05, + "loss": 29.5665, + "step": 2250 + }, + { + "epoch": 0.17, + "learning_rate": 9.81918815481512e-05, + "loss": 35.8756, + "step": 2300 + }, + { + "epoch": 0.17, + "learning_rate": 9.814165603559985e-05, + "loss": 37.2579, + "step": 2350 + }, + { + "epoch": 0.18, + "learning_rate": 9.809143052304849e-05, + "loss": 33.6245, + "step": 2400 + }, + { + "epoch": 0.18, + "learning_rate": 9.804120501049714e-05, + "loss": 35.6543, + "step": 2450 + }, + { + "epoch": 0.18, + "learning_rate": 9.799097949794578e-05, + "loss": 36.7847, + "step": 2500 + }, + { + "epoch": 0.19, + "learning_rate": 9.794075398539442e-05, + "loss": 33.463, + "step": 2550 + }, + { + "epoch": 0.19, + "learning_rate": 9.789052847284307e-05, + "loss": 32.2215, + "step": 2600 + }, + { + "epoch": 0.2, + "learning_rate": 9.784030296029171e-05, + "loss": 33.4301, + "step": 2650 + }, + { + "epoch": 0.2, + "learning_rate": 9.779007744774036e-05, + "loss": 29.9579, + "step": 2700 + }, + { + "epoch": 0.2, + "learning_rate": 9.773985193518901e-05, + "loss": 31.9141, + "step": 2750 + }, + { + "epoch": 0.21, + "learning_rate": 9.768962642263764e-05, + "loss": 33.2049, + "step": 2800 + }, + { + "epoch": 0.21, + "learning_rate": 9.763940091008629e-05, + "loss": 32.8774, + "step": 2850 + }, + { + "epoch": 0.21, + "learning_rate": 9.758917539753493e-05, + "loss": 29.0858, + "step": 2900 + }, + { + "epoch": 0.22, + "learning_rate": 9.753894988498358e-05, + "loss": 30.1145, + "step": 2950 + }, + { + "epoch": 0.22, + "learning_rate": 9.748872437243222e-05, + "loss": 27.6986, + "step": 3000 + }, + { + "epoch": 0.22, + "learning_rate": 9.743849885988087e-05, + "loss": 31.7807, + "step": 3050 + }, + { + "epoch": 0.23, + "learning_rate": 9.738827334732952e-05, + "loss": 30.5108, + "step": 3100 + }, + { + "epoch": 0.23, + "learning_rate": 9.733804783477815e-05, + "loss": 31.0909, + "step": 3150 + }, + { + "epoch": 0.24, + "learning_rate": 9.728782232222681e-05, + "loss": 27.9057, + "step": 3200 + }, + { + "epoch": 0.24, + "learning_rate": 9.723759680967544e-05, + "loss": 29.7323, + "step": 3250 + }, + { + "epoch": 0.24, + "learning_rate": 9.71873712971241e-05, + "loss": 29.7527, + "step": 3300 + }, + { + "epoch": 0.25, + "learning_rate": 9.713714578457273e-05, + "loss": 29.1442, + "step": 3350 + }, + { + "epoch": 0.25, + "learning_rate": 9.708692027202137e-05, + "loss": 30.8906, + "step": 3400 + }, + { + "epoch": 0.25, + "learning_rate": 9.703669475947003e-05, + "loss": 26.8419, + "step": 3450 + }, + { + "epoch": 0.26, + "learning_rate": 9.698646924691866e-05, + "loss": 29.2181, + "step": 3500 + }, + { + "epoch": 0.26, + "learning_rate": 9.693624373436732e-05, + "loss": 27.6549, + "step": 3550 + }, + { + "epoch": 0.27, + "learning_rate": 9.688601822181595e-05, + "loss": 34.0701, + "step": 3600 + }, + { + "epoch": 0.27, + "learning_rate": 9.683579270926461e-05, + "loss": 24.7487, + "step": 3650 + }, + { + "epoch": 0.27, + "learning_rate": 9.678556719671325e-05, + "loss": 30.0266, + "step": 3700 + }, + { + "epoch": 0.28, + "learning_rate": 9.67353416841619e-05, + "loss": 25.5011, + "step": 3750 + }, + { + "epoch": 0.28, + "learning_rate": 9.668511617161054e-05, + "loss": 26.1437, + "step": 3800 + }, + { + "epoch": 0.28, + "learning_rate": 9.663489065905918e-05, + "loss": 23.2303, + "step": 3850 + }, + { + "epoch": 0.29, + "learning_rate": 9.658466514650783e-05, + "loss": 26.357, + "step": 3900 + }, + { + "epoch": 0.29, + "learning_rate": 9.653443963395646e-05, + "loss": 27.2201, + "step": 3950 + }, + { + "epoch": 0.3, + "learning_rate": 9.648421412140512e-05, + "loss": 25.5695, + "step": 4000 + }, + { + "epoch": 0.3, + "learning_rate": 9.643398860885376e-05, + "loss": 24.8346, + "step": 4050 + }, + { + "epoch": 0.3, + "learning_rate": 9.63837630963024e-05, + "loss": 22.3957, + "step": 4100 + }, + { + "epoch": 0.31, + "learning_rate": 9.633353758375105e-05, + "loss": 24.9532, + "step": 4150 + }, + { + "epoch": 0.31, + "learning_rate": 9.628331207119969e-05, + "loss": 23.1574, + "step": 4200 + }, + { + "epoch": 0.31, + "learning_rate": 9.623308655864834e-05, + "loss": 23.7018, + "step": 4250 + }, + { + "epoch": 0.32, + "learning_rate": 9.618286104609698e-05, + "loss": 25.1433, + "step": 4300 + }, + { + "epoch": 0.32, + "learning_rate": 9.613263553354562e-05, + "loss": 25.0571, + "step": 4350 + }, + { + "epoch": 0.32, + "learning_rate": 9.608241002099427e-05, + "loss": 24.2231, + "step": 4400 + }, + { + "epoch": 0.33, + "learning_rate": 9.603218450844291e-05, + "loss": 23.0983, + "step": 4450 + }, + { + "epoch": 0.33, + "learning_rate": 9.598195899589156e-05, + "loss": 25.0078, + "step": 4500 + }, + { + "epoch": 0.34, + "learning_rate": 9.59317334833402e-05, + "loss": 20.6933, + "step": 4550 + }, + { + "epoch": 0.34, + "learning_rate": 9.588150797078884e-05, + "loss": 23.6196, + "step": 4600 + }, + { + "epoch": 0.34, + "learning_rate": 9.583128245823749e-05, + "loss": 25.2331, + "step": 4650 + }, + { + "epoch": 0.35, + "learning_rate": 9.578105694568613e-05, + "loss": 24.7932, + "step": 4700 + }, + { + "epoch": 0.35, + "learning_rate": 9.573083143313478e-05, + "loss": 24.3586, + "step": 4750 + }, + { + "epoch": 0.35, + "learning_rate": 9.568060592058342e-05, + "loss": 22.7161, + "step": 4800 + }, + { + "epoch": 0.36, + "learning_rate": 9.563038040803208e-05, + "loss": 22.4188, + "step": 4850 + }, + { + "epoch": 0.36, + "learning_rate": 9.558015489548071e-05, + "loss": 21.6516, + "step": 4900 + }, + { + "epoch": 0.37, + "learning_rate": 9.552992938292937e-05, + "loss": 21.78, + "step": 4950 + }, + { + "epoch": 0.37, + "learning_rate": 9.5479703870378e-05, + "loss": 21.0172, + "step": 5000 + }, + { + "epoch": 0.37, + "learning_rate": 9.542947835782665e-05, + "loss": 22.4624, + "step": 5050 + }, + { + "epoch": 0.38, + "learning_rate": 9.537925284527528e-05, + "loss": 23.6615, + "step": 5100 + }, + { + "epoch": 0.38, + "learning_rate": 9.532902733272393e-05, + "loss": 21.8091, + "step": 5150 + }, + { + "epoch": 0.38, + "learning_rate": 9.527880182017259e-05, + "loss": 21.4173, + "step": 5200 + }, + { + "epoch": 0.39, + "learning_rate": 9.522857630762122e-05, + "loss": 20.5415, + "step": 5250 + }, + { + "epoch": 0.39, + "learning_rate": 9.517835079506987e-05, + "loss": 21.0639, + "step": 5300 + }, + { + "epoch": 0.39, + "learning_rate": 9.51281252825185e-05, + "loss": 21.6078, + "step": 5350 + }, + { + "epoch": 0.4, + "learning_rate": 9.507789976996716e-05, + "loss": 19.4142, + "step": 5400 + }, + { + "epoch": 0.4, + "learning_rate": 9.50276742574158e-05, + "loss": 20.2504, + "step": 5450 + }, + { + "epoch": 0.41, + "learning_rate": 9.497744874486445e-05, + "loss": 23.8683, + "step": 5500 + }, + { + "epoch": 0.41, + "learning_rate": 9.49272232323131e-05, + "loss": 19.7559, + "step": 5550 + }, + { + "epoch": 0.41, + "learning_rate": 9.487699771976174e-05, + "loss": 21.1743, + "step": 5600 + }, + { + "epoch": 0.42, + "learning_rate": 9.482677220721038e-05, + "loss": 21.1908, + "step": 5650 + }, + { + "epoch": 0.42, + "learning_rate": 9.477654669465901e-05, + "loss": 20.9591, + "step": 5700 + }, + { + "epoch": 0.42, + "learning_rate": 9.472632118210767e-05, + "loss": 20.9036, + "step": 5750 + }, + { + "epoch": 0.43, + "learning_rate": 9.46760956695563e-05, + "loss": 22.249, + "step": 5800 + }, + { + "epoch": 0.43, + "learning_rate": 9.462587015700496e-05, + "loss": 19.1093, + "step": 5850 + }, + { + "epoch": 0.44, + "learning_rate": 9.45756446444536e-05, + "loss": 21.2714, + "step": 5900 + }, + { + "epoch": 0.44, + "learning_rate": 9.452541913190225e-05, + "loss": 21.3794, + "step": 5950 + }, + { + "epoch": 0.44, + "learning_rate": 9.447519361935089e-05, + "loss": 20.0326, + "step": 6000 + }, + { + "epoch": 0.45, + "learning_rate": 9.442496810679954e-05, + "loss": 19.8004, + "step": 6050 + }, + { + "epoch": 0.45, + "learning_rate": 9.437474259424818e-05, + "loss": 19.0229, + "step": 6100 + }, + { + "epoch": 0.45, + "learning_rate": 9.432451708169682e-05, + "loss": 17.6587, + "step": 6150 + }, + { + "epoch": 0.46, + "learning_rate": 9.427429156914547e-05, + "loss": 21.9247, + "step": 6200 + }, + { + "epoch": 0.46, + "learning_rate": 9.422406605659411e-05, + "loss": 19.743, + "step": 6250 + }, + { + "epoch": 0.46, + "learning_rate": 9.417384054404276e-05, + "loss": 22.9746, + "step": 6300 + }, + { + "epoch": 0.47, + "learning_rate": 9.41236150314914e-05, + "loss": 19.6693, + "step": 6350 + }, + { + "epoch": 0.47, + "learning_rate": 9.407338951894004e-05, + "loss": 19.1141, + "step": 6400 + }, + { + "epoch": 0.48, + "learning_rate": 9.402316400638869e-05, + "loss": 18.3847, + "step": 6450 + }, + { + "epoch": 0.48, + "learning_rate": 9.397293849383733e-05, + "loss": 18.9357, + "step": 6500 + }, + { + "epoch": 0.48, + "learning_rate": 9.392271298128598e-05, + "loss": 18.9316, + "step": 6550 + }, + { + "epoch": 0.49, + "learning_rate": 9.387248746873462e-05, + "loss": 20.9141, + "step": 6600 + }, + { + "epoch": 0.49, + "learning_rate": 9.382226195618326e-05, + "loss": 18.7472, + "step": 6650 + }, + { + "epoch": 0.49, + "learning_rate": 9.377203644363192e-05, + "loss": 18.8577, + "step": 6700 + }, + { + "epoch": 0.5, + "learning_rate": 9.372181093108055e-05, + "loss": 17.8061, + "step": 6750 + }, + { + "epoch": 0.5, + "learning_rate": 9.36715854185292e-05, + "loss": 19.4687, + "step": 6800 + }, + { + "epoch": 0.51, + "learning_rate": 9.362135990597784e-05, + "loss": 19.5103, + "step": 6850 + }, + { + "epoch": 0.51, + "learning_rate": 9.357113439342648e-05, + "loss": 18.5319, + "step": 6900 + }, + { + "epoch": 0.51, + "learning_rate": 9.352090888087514e-05, + "loss": 20.16, + "step": 6950 + }, + { + "epoch": 0.52, + "learning_rate": 9.347068336832377e-05, + "loss": 18.1913, + "step": 7000 + }, + { + "epoch": 0.52, + "learning_rate": 9.342045785577243e-05, + "loss": 21.341, + "step": 7050 + }, + { + "epoch": 0.52, + "learning_rate": 9.337023234322106e-05, + "loss": 16.7701, + "step": 7100 + }, + { + "epoch": 0.53, + "learning_rate": 9.332000683066972e-05, + "loss": 18.045, + "step": 7150 + }, + { + "epoch": 0.53, + "learning_rate": 9.326978131811835e-05, + "loss": 16.0393, + "step": 7200 + }, + { + "epoch": 0.53, + "learning_rate": 9.3219555805567e-05, + "loss": 17.4833, + "step": 7250 + }, + { + "epoch": 0.54, + "learning_rate": 9.316933029301565e-05, + "loss": 17.3978, + "step": 7300 + }, + { + "epoch": 0.54, + "learning_rate": 9.31191047804643e-05, + "loss": 18.2649, + "step": 7350 + }, + { + "epoch": 0.55, + "learning_rate": 9.306887926791294e-05, + "loss": 16.3891, + "step": 7400 + }, + { + "epoch": 0.55, + "learning_rate": 9.301865375536157e-05, + "loss": 21.4399, + "step": 7450 + }, + { + "epoch": 0.55, + "learning_rate": 9.296842824281023e-05, + "loss": 16.3082, + "step": 7500 + }, + { + "epoch": 0.56, + "learning_rate": 9.291820273025886e-05, + "loss": 14.8713, + "step": 7550 + }, + { + "epoch": 0.56, + "learning_rate": 9.286797721770751e-05, + "loss": 16.3099, + "step": 7600 + }, + { + "epoch": 0.56, + "learning_rate": 9.281775170515616e-05, + "loss": 17.8771, + "step": 7650 + }, + { + "epoch": 0.57, + "learning_rate": 9.27675261926048e-05, + "loss": 17.1421, + "step": 7700 + }, + { + "epoch": 0.57, + "learning_rate": 9.271730068005345e-05, + "loss": 16.6478, + "step": 7750 + }, + { + "epoch": 0.58, + "learning_rate": 9.266707516750209e-05, + "loss": 15.3247, + "step": 7800 + }, + { + "epoch": 0.58, + "learning_rate": 9.261684965495073e-05, + "loss": 17.6577, + "step": 7850 + }, + { + "epoch": 0.58, + "learning_rate": 9.256662414239938e-05, + "loss": 18.8549, + "step": 7900 + }, + { + "epoch": 0.59, + "learning_rate": 9.251639862984802e-05, + "loss": 17.4187, + "step": 7950 + }, + { + "epoch": 0.59, + "learning_rate": 9.246617311729667e-05, + "loss": 15.6643, + "step": 8000 + }, + { + "epoch": 0.59, + "learning_rate": 9.241594760474531e-05, + "loss": 17.1987, + "step": 8050 + }, + { + "epoch": 0.6, + "learning_rate": 9.236572209219396e-05, + "loss": 18.1712, + "step": 8100 + }, + { + "epoch": 0.6, + "learning_rate": 9.23154965796426e-05, + "loss": 15.8015, + "step": 8150 + }, + { + "epoch": 0.6, + "learning_rate": 9.226527106709124e-05, + "loss": 19.064, + "step": 8200 + }, + { + "epoch": 0.61, + "learning_rate": 9.221504555453989e-05, + "loss": 18.2748, + "step": 8250 + }, + { + "epoch": 0.61, + "learning_rate": 9.216482004198853e-05, + "loss": 15.0679, + "step": 8300 + }, + { + "epoch": 0.62, + "learning_rate": 9.211459452943718e-05, + "loss": 17.995, + "step": 8350 + }, + { + "epoch": 0.62, + "learning_rate": 9.206436901688582e-05, + "loss": 17.467, + "step": 8400 + }, + { + "epoch": 0.62, + "learning_rate": 9.201414350433448e-05, + "loss": 18.6665, + "step": 8450 + }, + { + "epoch": 0.63, + "learning_rate": 9.196391799178311e-05, + "loss": 17.2848, + "step": 8500 + }, + { + "epoch": 0.63, + "learning_rate": 9.191369247923175e-05, + "loss": 14.4767, + "step": 8550 + }, + { + "epoch": 0.63, + "learning_rate": 9.18634669666804e-05, + "loss": 17.5444, + "step": 8600 + }, + { + "epoch": 0.64, + "learning_rate": 9.181324145412904e-05, + "loss": 14.4661, + "step": 8650 + }, + { + "epoch": 0.64, + "learning_rate": 9.176301594157768e-05, + "loss": 16.3339, + "step": 8700 + }, + { + "epoch": 0.65, + "learning_rate": 9.171279042902633e-05, + "loss": 17.5122, + "step": 8750 + }, + { + "epoch": 0.65, + "learning_rate": 9.166256491647499e-05, + "loss": 16.7631, + "step": 8800 + }, + { + "epoch": 0.65, + "learning_rate": 9.161233940392362e-05, + "loss": 16.5193, + "step": 8850 + }, + { + "epoch": 0.66, + "learning_rate": 9.156211389137227e-05, + "loss": 17.8364, + "step": 8900 + }, + { + "epoch": 0.66, + "learning_rate": 9.15118883788209e-05, + "loss": 16.2916, + "step": 8950 + }, + { + "epoch": 0.66, + "learning_rate": 9.146166286626956e-05, + "loss": 14.1719, + "step": 9000 + }, + { + "epoch": 0.67, + "learning_rate": 9.141143735371819e-05, + "loss": 18.2987, + "step": 9050 + }, + { + "epoch": 0.67, + "learning_rate": 9.136121184116684e-05, + "loss": 17.4248, + "step": 9100 + }, + { + "epoch": 0.67, + "learning_rate": 9.13109863286155e-05, + "loss": 16.1862, + "step": 9150 + }, + { + "epoch": 0.68, + "learning_rate": 9.126076081606412e-05, + "loss": 16.3134, + "step": 9200 + }, + { + "epoch": 0.68, + "learning_rate": 9.121053530351278e-05, + "loss": 14.9158, + "step": 9250 + }, + { + "epoch": 0.69, + "learning_rate": 9.116030979096141e-05, + "loss": 15.2504, + "step": 9300 + }, + { + "epoch": 0.69, + "learning_rate": 9.111008427841007e-05, + "loss": 14.1967, + "step": 9350 + }, + { + "epoch": 0.69, + "learning_rate": 9.105985876585871e-05, + "loss": 17.3165, + "step": 9400 + }, + { + "epoch": 0.7, + "learning_rate": 9.100963325330736e-05, + "loss": 14.5912, + "step": 9450 + }, + { + "epoch": 0.7, + "learning_rate": 9.0959407740756e-05, + "loss": 17.5593, + "step": 9500 + }, + { + "epoch": 0.7, + "learning_rate": 9.090918222820465e-05, + "loss": 16.3421, + "step": 9550 + }, + { + "epoch": 0.71, + "learning_rate": 9.085895671565329e-05, + "loss": 16.2821, + "step": 9600 + }, + { + "epoch": 0.71, + "learning_rate": 9.080873120310192e-05, + "loss": 16.4985, + "step": 9650 + }, + { + "epoch": 0.72, + "learning_rate": 9.075850569055058e-05, + "loss": 16.1138, + "step": 9700 + }, + { + "epoch": 0.72, + "learning_rate": 9.070828017799922e-05, + "loss": 16.3997, + "step": 9750 + }, + { + "epoch": 0.72, + "learning_rate": 9.065805466544787e-05, + "loss": 15.518, + "step": 9800 + }, + { + "epoch": 0.73, + "learning_rate": 9.060782915289651e-05, + "loss": 13.8424, + "step": 9850 + }, + { + "epoch": 0.73, + "learning_rate": 9.055760364034515e-05, + "loss": 15.0784, + "step": 9900 + }, + { + "epoch": 0.73, + "learning_rate": 9.05073781277938e-05, + "loss": 14.0163, + "step": 9950 + }, + { + "epoch": 0.74, + "learning_rate": 9.045715261524244e-05, + "loss": 16.7863, + "step": 10000 + }, + { + "epoch": 0.74, + "learning_rate": 9.040692710269109e-05, + "loss": 13.6715, + "step": 10050 + }, + { + "epoch": 0.75, + "learning_rate": 9.035670159013973e-05, + "loss": 15.1071, + "step": 10100 + }, + { + "epoch": 0.75, + "learning_rate": 9.030647607758837e-05, + "loss": 14.2658, + "step": 10150 + }, + { + "epoch": 0.75, + "learning_rate": 9.025625056503703e-05, + "loss": 15.1115, + "step": 10200 + }, + { + "epoch": 0.76, + "learning_rate": 9.020602505248566e-05, + "loss": 14.028, + "step": 10250 + }, + { + "epoch": 0.76, + "learning_rate": 9.015579953993431e-05, + "loss": 13.3066, + "step": 10300 + }, + { + "epoch": 0.76, + "learning_rate": 9.010557402738295e-05, + "loss": 14.1185, + "step": 10350 + }, + { + "epoch": 0.77, + "learning_rate": 9.00553485148316e-05, + "loss": 14.061, + "step": 10400 + }, + { + "epoch": 0.77, + "learning_rate": 9.000512300228024e-05, + "loss": 15.2439, + "step": 10450 + }, + { + "epoch": 0.77, + "learning_rate": 8.995489748972888e-05, + "loss": 13.3617, + "step": 10500 + }, + { + "epoch": 0.78, + "learning_rate": 8.990467197717754e-05, + "loss": 14.5514, + "step": 10550 + }, + { + "epoch": 0.78, + "learning_rate": 8.985444646462617e-05, + "loss": 15.2426, + "step": 10600 + }, + { + "epoch": 0.79, + "learning_rate": 8.980422095207483e-05, + "loss": 16.6418, + "step": 10650 + }, + { + "epoch": 0.79, + "learning_rate": 8.975399543952346e-05, + "loss": 13.3146, + "step": 10700 + }, + { + "epoch": 0.79, + "learning_rate": 8.970376992697212e-05, + "loss": 14.9333, + "step": 10750 + }, + { + "epoch": 0.8, + "learning_rate": 8.965354441442075e-05, + "loss": 14.4502, + "step": 10800 + }, + { + "epoch": 0.8, + "learning_rate": 8.960331890186939e-05, + "loss": 14.7886, + "step": 10850 + }, + { + "epoch": 0.8, + "learning_rate": 8.955309338931805e-05, + "loss": 15.0266, + "step": 10900 + }, + { + "epoch": 0.81, + "learning_rate": 8.950286787676668e-05, + "loss": 14.543, + "step": 10950 + }, + { + "epoch": 0.81, + "learning_rate": 8.945264236421534e-05, + "loss": 15.8078, + "step": 11000 + }, + { + "epoch": 0.82, + "learning_rate": 8.940241685166397e-05, + "loss": 13.6052, + "step": 11050 + }, + { + "epoch": 0.82, + "learning_rate": 8.935219133911263e-05, + "loss": 14.2995, + "step": 11100 + }, + { + "epoch": 0.82, + "learning_rate": 8.930196582656126e-05, + "loss": 15.732, + "step": 11150 + }, + { + "epoch": 0.83, + "learning_rate": 8.925174031400991e-05, + "loss": 14.0573, + "step": 11200 + }, + { + "epoch": 0.83, + "learning_rate": 8.920151480145856e-05, + "loss": 17.5941, + "step": 11250 + }, + { + "epoch": 0.83, + "learning_rate": 8.91512892889072e-05, + "loss": 14.7829, + "step": 11300 + }, + { + "epoch": 0.84, + "learning_rate": 8.910106377635585e-05, + "loss": 14.6669, + "step": 11350 + }, + { + "epoch": 0.84, + "learning_rate": 8.905083826380448e-05, + "loss": 14.3315, + "step": 11400 + }, + { + "epoch": 0.84, + "learning_rate": 8.900061275125313e-05, + "loss": 14.2639, + "step": 11450 + }, + { + "epoch": 0.85, + "learning_rate": 8.895038723870176e-05, + "loss": 14.3226, + "step": 11500 + }, + { + "epoch": 0.85, + "learning_rate": 8.890016172615042e-05, + "loss": 14.4975, + "step": 11550 + }, + { + "epoch": 0.86, + "learning_rate": 8.884993621359907e-05, + "loss": 14.8436, + "step": 11600 + }, + { + "epoch": 0.86, + "learning_rate": 8.879971070104771e-05, + "loss": 13.8481, + "step": 11650 + }, + { + "epoch": 0.86, + "learning_rate": 8.874948518849635e-05, + "loss": 12.8151, + "step": 11700 + }, + { + "epoch": 0.87, + "learning_rate": 8.8699259675945e-05, + "loss": 13.1659, + "step": 11750 + }, + { + "epoch": 0.87, + "learning_rate": 8.864903416339364e-05, + "loss": 15.0919, + "step": 11800 + }, + { + "epoch": 0.87, + "learning_rate": 8.859880865084229e-05, + "loss": 14.4382, + "step": 11850 + }, + { + "epoch": 0.88, + "learning_rate": 8.854858313829093e-05, + "loss": 14.0989, + "step": 11900 + }, + { + "epoch": 0.88, + "learning_rate": 8.849835762573957e-05, + "loss": 14.5763, + "step": 11950 + }, + { + "epoch": 0.89, + "learning_rate": 8.844813211318822e-05, + "loss": 13.4144, + "step": 12000 + }, + { + "epoch": 0.89, + "learning_rate": 8.839790660063686e-05, + "loss": 15.6018, + "step": 12050 + }, + { + "epoch": 0.89, + "learning_rate": 8.83476810880855e-05, + "loss": 14.7849, + "step": 12100 + }, + { + "epoch": 0.9, + "learning_rate": 8.829745557553415e-05, + "loss": 14.441, + "step": 12150 + }, + { + "epoch": 0.9, + "learning_rate": 8.82472300629828e-05, + "loss": 14.2135, + "step": 12200 + }, + { + "epoch": 0.9, + "learning_rate": 8.819700455043144e-05, + "loss": 17.1245, + "step": 12250 + }, + { + "epoch": 0.91, + "learning_rate": 8.814677903788008e-05, + "loss": 14.6629, + "step": 12300 + }, + { + "epoch": 0.91, + "learning_rate": 8.809655352532873e-05, + "loss": 16.6715, + "step": 12350 + }, + { + "epoch": 0.91, + "learning_rate": 8.804632801277738e-05, + "loss": 13.0133, + "step": 12400 + }, + { + "epoch": 0.92, + "learning_rate": 8.799610250022601e-05, + "loss": 14.1551, + "step": 12450 + }, + { + "epoch": 0.92, + "learning_rate": 8.794587698767466e-05, + "loss": 14.019, + "step": 12500 + }, + { + "epoch": 0.93, + "learning_rate": 8.78956514751233e-05, + "loss": 14.4279, + "step": 12550 + }, + { + "epoch": 0.93, + "learning_rate": 8.784542596257195e-05, + "loss": 12.5293, + "step": 12600 + }, + { + "epoch": 0.93, + "learning_rate": 8.77952004500206e-05, + "loss": 15.0403, + "step": 12650 + }, + { + "epoch": 0.94, + "learning_rate": 8.774497493746924e-05, + "loss": 13.8193, + "step": 12700 + }, + { + "epoch": 0.94, + "learning_rate": 8.769474942491789e-05, + "loss": 13.1564, + "step": 12750 + }, + { + "epoch": 0.94, + "learning_rate": 8.764452391236652e-05, + "loss": 14.6415, + "step": 12800 + }, + { + "epoch": 0.95, + "learning_rate": 8.759429839981518e-05, + "loss": 12.2339, + "step": 12850 + }, + { + "epoch": 0.95, + "learning_rate": 8.754407288726381e-05, + "loss": 12.1604, + "step": 12900 + }, + { + "epoch": 0.96, + "learning_rate": 8.749384737471247e-05, + "loss": 15.4939, + "step": 12950 + }, + { + "epoch": 0.96, + "learning_rate": 8.744362186216111e-05, + "loss": 13.9713, + "step": 13000 + }, + { + "epoch": 0.96, + "learning_rate": 8.739339634960976e-05, + "loss": 14.0986, + "step": 13050 + }, + { + "epoch": 0.97, + "learning_rate": 8.73431708370584e-05, + "loss": 13.6334, + "step": 13100 + }, + { + "epoch": 0.97, + "learning_rate": 8.729294532450703e-05, + "loss": 13.5201, + "step": 13150 + }, + { + "epoch": 0.97, + "learning_rate": 8.724271981195569e-05, + "loss": 14.3793, + "step": 13200 + }, + { + "epoch": 0.98, + "learning_rate": 8.719249429940432e-05, + "loss": 13.1741, + "step": 13250 + }, + { + "epoch": 0.98, + "learning_rate": 8.714226878685298e-05, + "loss": 11.7782, + "step": 13300 + }, + { + "epoch": 0.98, + "learning_rate": 8.709204327430162e-05, + "loss": 12.2758, + "step": 13350 + }, + { + "epoch": 0.99, + "learning_rate": 8.704181776175027e-05, + "loss": 13.1723, + "step": 13400 + }, + { + "epoch": 0.99, + "learning_rate": 8.699159224919891e-05, + "loss": 14.0858, + "step": 13450 + }, + { + "epoch": 1.0, + "learning_rate": 8.694136673664755e-05, + "loss": 11.2836, + "step": 13500 + }, + { + "epoch": 1.0, + "learning_rate": 8.68911412240962e-05, + "loss": 15.7226, + "step": 13550 + }, + { + "epoch": 1.0, + "learning_rate": 8.684091571154484e-05, + "loss": 15.8889, + "step": 13600 + }, + { + "epoch": 1.01, + "learning_rate": 8.679069019899349e-05, + "loss": 12.2185, + "step": 13650 + }, + { + "epoch": 1.01, + "learning_rate": 8.674046468644213e-05, + "loss": 11.4647, + "step": 13700 + }, + { + "epoch": 1.01, + "learning_rate": 8.669023917389077e-05, + "loss": 13.1238, + "step": 13750 + }, + { + "epoch": 1.02, + "learning_rate": 8.664001366133942e-05, + "loss": 11.909, + "step": 13800 + }, + { + "epoch": 1.02, + "learning_rate": 8.658978814878806e-05, + "loss": 12.5478, + "step": 13850 + }, + { + "epoch": 1.03, + "learning_rate": 8.65395626362367e-05, + "loss": 13.017, + "step": 13900 + }, + { + "epoch": 1.03, + "learning_rate": 8.648933712368535e-05, + "loss": 12.9134, + "step": 13950 + }, + { + "epoch": 1.03, + "learning_rate": 8.6439111611134e-05, + "loss": 13.3485, + "step": 14000 + }, + { + "epoch": 1.04, + "learning_rate": 8.638888609858264e-05, + "loss": 11.4706, + "step": 14050 + }, + { + "epoch": 1.04, + "learning_rate": 8.633866058603128e-05, + "loss": 11.1063, + "step": 14100 + }, + { + "epoch": 1.04, + "learning_rate": 8.628843507347994e-05, + "loss": 12.7408, + "step": 14150 + }, + { + "epoch": 1.05, + "learning_rate": 8.623820956092857e-05, + "loss": 12.0689, + "step": 14200 + }, + { + "epoch": 1.05, + "learning_rate": 8.618798404837721e-05, + "loss": 11.0724, + "step": 14250 + }, + { + "epoch": 1.05, + "learning_rate": 8.613775853582586e-05, + "loss": 12.5685, + "step": 14300 + }, + { + "epoch": 1.06, + "learning_rate": 8.60875330232745e-05, + "loss": 12.7776, + "step": 14350 + }, + { + "epoch": 1.06, + "learning_rate": 8.603730751072315e-05, + "loss": 11.3066, + "step": 14400 + }, + { + "epoch": 1.07, + "learning_rate": 8.598708199817179e-05, + "loss": 13.06, + "step": 14450 + }, + { + "epoch": 1.07, + "learning_rate": 8.593685648562045e-05, + "loss": 15.6523, + "step": 14500 + }, + { + "epoch": 1.07, + "learning_rate": 8.588663097306908e-05, + "loss": 12.019, + "step": 14550 + }, + { + "epoch": 1.08, + "learning_rate": 8.583640546051774e-05, + "loss": 11.0941, + "step": 14600 + }, + { + "epoch": 1.08, + "learning_rate": 8.578617994796637e-05, + "loss": 12.4755, + "step": 14650 + }, + { + "epoch": 1.08, + "learning_rate": 8.573595443541502e-05, + "loss": 13.7012, + "step": 14700 + }, + { + "epoch": 1.09, + "learning_rate": 8.568572892286366e-05, + "loss": 12.2024, + "step": 14750 + }, + { + "epoch": 1.09, + "learning_rate": 8.56355034103123e-05, + "loss": 12.4744, + "step": 14800 + }, + { + "epoch": 1.1, + "learning_rate": 8.558527789776096e-05, + "loss": 12.3234, + "step": 14850 + }, + { + "epoch": 1.1, + "learning_rate": 8.553505238520959e-05, + "loss": 12.5616, + "step": 14900 + }, + { + "epoch": 1.1, + "learning_rate": 8.548482687265824e-05, + "loss": 11.9559, + "step": 14950 + }, + { + "epoch": 1.11, + "learning_rate": 8.543460136010688e-05, + "loss": 12.0734, + "step": 15000 + }, + { + "epoch": 1.11, + "learning_rate": 8.538437584755553e-05, + "loss": 13.0341, + "step": 15050 + }, + { + "epoch": 1.11, + "learning_rate": 8.533415033500418e-05, + "loss": 12.7406, + "step": 15100 + }, + { + "epoch": 1.12, + "learning_rate": 8.528392482245282e-05, + "loss": 11.7258, + "step": 15150 + }, + { + "epoch": 1.12, + "learning_rate": 8.523369930990147e-05, + "loss": 11.8709, + "step": 15200 + }, + { + "epoch": 1.12, + "learning_rate": 8.518347379735011e-05, + "loss": 11.7021, + "step": 15250 + }, + { + "epoch": 1.13, + "learning_rate": 8.513324828479875e-05, + "loss": 13.2674, + "step": 15300 + }, + { + "epoch": 1.13, + "learning_rate": 8.508302277224738e-05, + "loss": 11.9099, + "step": 15350 + }, + { + "epoch": 1.14, + "learning_rate": 8.503279725969604e-05, + "loss": 11.7841, + "step": 15400 + }, + { + "epoch": 1.14, + "learning_rate": 8.498257174714469e-05, + "loss": 11.9573, + "step": 15450 + }, + { + "epoch": 1.14, + "learning_rate": 8.493234623459333e-05, + "loss": 11.7211, + "step": 15500 + }, + { + "epoch": 1.15, + "learning_rate": 8.488212072204197e-05, + "loss": 12.3513, + "step": 15550 + }, + { + "epoch": 1.15, + "learning_rate": 8.483189520949062e-05, + "loss": 11.0709, + "step": 15600 + }, + { + "epoch": 1.15, + "learning_rate": 8.478166969693926e-05, + "loss": 11.6544, + "step": 15650 + }, + { + "epoch": 1.16, + "learning_rate": 8.47314441843879e-05, + "loss": 11.8285, + "step": 15700 + }, + { + "epoch": 1.16, + "learning_rate": 8.468121867183655e-05, + "loss": 10.4208, + "step": 15750 + }, + { + "epoch": 1.17, + "learning_rate": 8.46309931592852e-05, + "loss": 10.7821, + "step": 15800 + }, + { + "epoch": 1.17, + "learning_rate": 8.458076764673384e-05, + "loss": 13.2724, + "step": 15850 + }, + { + "epoch": 1.17, + "learning_rate": 8.45305421341825e-05, + "loss": 10.9219, + "step": 15900 + }, + { + "epoch": 1.18, + "learning_rate": 8.448031662163113e-05, + "loss": 12.2532, + "step": 15950 + }, + { + "epoch": 1.18, + "learning_rate": 8.443009110907977e-05, + "loss": 11.0132, + "step": 16000 + }, + { + "epoch": 1.18, + "learning_rate": 8.437986559652841e-05, + "loss": 12.319, + "step": 16050 + }, + { + "epoch": 1.19, + "learning_rate": 8.432964008397706e-05, + "loss": 12.9871, + "step": 16100 + }, + { + "epoch": 1.19, + "learning_rate": 8.42794145714257e-05, + "loss": 12.0625, + "step": 16150 + }, + { + "epoch": 1.19, + "learning_rate": 8.422918905887435e-05, + "loss": 13.4629, + "step": 16200 + }, + { + "epoch": 1.2, + "learning_rate": 8.4178963546323e-05, + "loss": 10.9291, + "step": 16250 + }, + { + "epoch": 1.2, + "learning_rate": 8.412873803377163e-05, + "loss": 13.7719, + "step": 16300 + }, + { + "epoch": 1.21, + "learning_rate": 8.407851252122029e-05, + "loss": 11.3634, + "step": 16350 + }, + { + "epoch": 1.21, + "learning_rate": 8.402828700866892e-05, + "loss": 12.7941, + "step": 16400 + }, + { + "epoch": 1.21, + "learning_rate": 8.397806149611758e-05, + "loss": 11.8863, + "step": 16450 + }, + { + "epoch": 1.22, + "learning_rate": 8.392783598356621e-05, + "loss": 9.5225, + "step": 16500 + }, + { + "epoch": 1.22, + "learning_rate": 8.387761047101485e-05, + "loss": 12.983, + "step": 16550 + }, + { + "epoch": 1.22, + "learning_rate": 8.382738495846351e-05, + "loss": 11.8489, + "step": 16600 + }, + { + "epoch": 1.23, + "learning_rate": 8.377715944591214e-05, + "loss": 11.8122, + "step": 16650 + }, + { + "epoch": 1.23, + "learning_rate": 8.37269339333608e-05, + "loss": 12.3387, + "step": 16700 + }, + { + "epoch": 1.24, + "learning_rate": 8.367670842080943e-05, + "loss": 13.4648, + "step": 16750 + }, + { + "epoch": 1.24, + "learning_rate": 8.362648290825809e-05, + "loss": 10.2301, + "step": 16800 + }, + { + "epoch": 1.24, + "learning_rate": 8.357625739570672e-05, + "loss": 11.492, + "step": 16850 + }, + { + "epoch": 1.25, + "learning_rate": 8.352603188315538e-05, + "loss": 12.5997, + "step": 16900 + }, + { + "epoch": 1.25, + "learning_rate": 8.347580637060402e-05, + "loss": 11.5588, + "step": 16950 + }, + { + "epoch": 1.25, + "learning_rate": 8.342558085805266e-05, + "loss": 11.8627, + "step": 17000 + }, + { + "epoch": 1.26, + "learning_rate": 8.337535534550131e-05, + "loss": 13.2469, + "step": 17050 + }, + { + "epoch": 1.26, + "learning_rate": 8.332512983294994e-05, + "loss": 10.4327, + "step": 17100 + }, + { + "epoch": 1.27, + "learning_rate": 8.32749043203986e-05, + "loss": 12.7566, + "step": 17150 + }, + { + "epoch": 1.27, + "learning_rate": 8.322467880784723e-05, + "loss": 11.0729, + "step": 17200 + }, + { + "epoch": 1.27, + "learning_rate": 8.317445329529588e-05, + "loss": 12.3484, + "step": 17250 + }, + { + "epoch": 1.28, + "learning_rate": 8.312422778274453e-05, + "loss": 10.5193, + "step": 17300 + }, + { + "epoch": 1.28, + "learning_rate": 8.307400227019317e-05, + "loss": 12.2369, + "step": 17350 + }, + { + "epoch": 1.28, + "learning_rate": 8.302377675764182e-05, + "loss": 12.2976, + "step": 17400 + }, + { + "epoch": 1.29, + "learning_rate": 8.297355124509046e-05, + "loss": 12.3852, + "step": 17450 + }, + { + "epoch": 1.29, + "learning_rate": 8.29233257325391e-05, + "loss": 11.2137, + "step": 17500 + }, + { + "epoch": 1.29, + "learning_rate": 8.287310021998775e-05, + "loss": 11.609, + "step": 17550 + }, + { + "epoch": 1.3, + "learning_rate": 8.282287470743639e-05, + "loss": 13.3339, + "step": 17600 + }, + { + "epoch": 1.3, + "learning_rate": 8.277264919488504e-05, + "loss": 11.4263, + "step": 17650 + }, + { + "epoch": 1.31, + "learning_rate": 8.272242368233368e-05, + "loss": 12.6949, + "step": 17700 + }, + { + "epoch": 1.31, + "learning_rate": 8.267219816978233e-05, + "loss": 11.4767, + "step": 17750 + }, + { + "epoch": 1.31, + "learning_rate": 8.262197265723097e-05, + "loss": 12.2225, + "step": 17800 + }, + { + "epoch": 1.32, + "learning_rate": 8.257174714467961e-05, + "loss": 11.0755, + "step": 17850 + }, + { + "epoch": 1.32, + "learning_rate": 8.252152163212826e-05, + "loss": 11.9677, + "step": 17900 + }, + { + "epoch": 1.32, + "learning_rate": 8.24712961195769e-05, + "loss": 11.098, + "step": 17950 + }, + { + "epoch": 1.33, + "learning_rate": 8.242107060702555e-05, + "loss": 11.1102, + "step": 18000 + }, + { + "epoch": 1.33, + "learning_rate": 8.237084509447419e-05, + "loss": 11.4985, + "step": 18050 + }, + { + "epoch": 1.34, + "learning_rate": 8.232061958192285e-05, + "loss": 11.7356, + "step": 18100 + }, + { + "epoch": 1.34, + "learning_rate": 8.227039406937148e-05, + "loss": 11.3336, + "step": 18150 + }, + { + "epoch": 1.34, + "learning_rate": 8.222016855682012e-05, + "loss": 11.0448, + "step": 18200 + }, + { + "epoch": 1.35, + "learning_rate": 8.216994304426877e-05, + "loss": 10.9986, + "step": 18250 + }, + { + "epoch": 1.35, + "learning_rate": 8.211971753171741e-05, + "loss": 10.768, + "step": 18300 + }, + { + "epoch": 1.35, + "learning_rate": 8.206949201916607e-05, + "loss": 11.6844, + "step": 18350 + }, + { + "epoch": 1.36, + "learning_rate": 8.20192665066147e-05, + "loss": 11.5615, + "step": 18400 + }, + { + "epoch": 1.36, + "learning_rate": 8.196904099406336e-05, + "loss": 11.4019, + "step": 18450 + }, + { + "epoch": 1.36, + "learning_rate": 8.191881548151199e-05, + "loss": 12.1784, + "step": 18500 + }, + { + "epoch": 1.37, + "learning_rate": 8.186858996896064e-05, + "loss": 12.4565, + "step": 18550 + }, + { + "epoch": 1.37, + "learning_rate": 8.181836445640927e-05, + "loss": 11.0557, + "step": 18600 + }, + { + "epoch": 1.38, + "learning_rate": 8.176813894385793e-05, + "loss": 12.1892, + "step": 18650 + }, + { + "epoch": 1.38, + "learning_rate": 8.171791343130658e-05, + "loss": 12.0531, + "step": 18700 + }, + { + "epoch": 1.38, + "learning_rate": 8.166768791875522e-05, + "loss": 10.1791, + "step": 18750 + }, + { + "epoch": 1.39, + "learning_rate": 8.161746240620386e-05, + "loss": 11.2501, + "step": 18800 + }, + { + "epoch": 1.39, + "learning_rate": 8.15672368936525e-05, + "loss": 9.92, + "step": 18850 + }, + { + "epoch": 1.39, + "learning_rate": 8.151701138110115e-05, + "loss": 10.0603, + "step": 18900 + }, + { + "epoch": 1.4, + "learning_rate": 8.146678586854978e-05, + "loss": 10.9477, + "step": 18950 + }, + { + "epoch": 1.4, + "learning_rate": 8.141656035599844e-05, + "loss": 9.7579, + "step": 19000 + }, + { + "epoch": 1.41, + "learning_rate": 8.136633484344708e-05, + "loss": 11.243, + "step": 19050 + }, + { + "epoch": 1.41, + "learning_rate": 8.131610933089573e-05, + "loss": 11.0069, + "step": 19100 + }, + { + "epoch": 1.41, + "learning_rate": 8.126588381834437e-05, + "loss": 9.7387, + "step": 19150 + }, + { + "epoch": 1.42, + "learning_rate": 8.121565830579302e-05, + "loss": 11.4624, + "step": 19200 + }, + { + "epoch": 1.42, + "learning_rate": 8.116543279324166e-05, + "loss": 12.1299, + "step": 19250 + }, + { + "epoch": 1.42, + "learning_rate": 8.11152072806903e-05, + "loss": 12.2796, + "step": 19300 + }, + { + "epoch": 1.43, + "learning_rate": 8.106498176813895e-05, + "loss": 10.3295, + "step": 19350 + }, + { + "epoch": 1.43, + "learning_rate": 8.101475625558759e-05, + "loss": 10.0709, + "step": 19400 + }, + { + "epoch": 1.43, + "learning_rate": 8.096453074303624e-05, + "loss": 11.0725, + "step": 19450 + }, + { + "epoch": 1.44, + "learning_rate": 8.091430523048488e-05, + "loss": 10.7882, + "step": 19500 + }, + { + "epoch": 1.44, + "learning_rate": 8.086407971793352e-05, + "loss": 11.4124, + "step": 19550 + }, + { + "epoch": 1.45, + "learning_rate": 8.081385420538217e-05, + "loss": 10.4941, + "step": 19600 + }, + { + "epoch": 1.45, + "learning_rate": 8.076362869283081e-05, + "loss": 11.8687, + "step": 19650 + }, + { + "epoch": 1.45, + "learning_rate": 8.071340318027946e-05, + "loss": 11.3221, + "step": 19700 + }, + { + "epoch": 1.46, + "learning_rate": 8.06631776677281e-05, + "loss": 10.2167, + "step": 19750 + }, + { + "epoch": 1.46, + "learning_rate": 8.061295215517675e-05, + "loss": 10.5425, + "step": 19800 + }, + { + "epoch": 1.46, + "learning_rate": 8.05627266426254e-05, + "loss": 11.2982, + "step": 19850 + }, + { + "epoch": 1.47, + "learning_rate": 8.051250113007403e-05, + "loss": 12.0685, + "step": 19900 + }, + { + "epoch": 1.47, + "learning_rate": 8.046227561752268e-05, + "loss": 10.6613, + "step": 19950 + }, + { + "epoch": 1.48, + "learning_rate": 8.041205010497132e-05, + "loss": 10.8245, + "step": 20000 + }, + { + "epoch": 1.48, + "eval_loss": 10.409339904785156, + "eval_runtime": 890.9956, + "eval_samples_per_second": 14.7, + "eval_steps_per_second": 3.676, + "eval_wer": 0.2624627273109067, + "step": 20000 + }, + { + "epoch": 1.48, + "learning_rate": 8.036182459241997e-05, + "loss": 10.671, + "step": 20050 + }, + { + "epoch": 1.48, + "learning_rate": 8.031159907986861e-05, + "loss": 11.0263, + "step": 20100 + }, + { + "epoch": 1.49, + "learning_rate": 8.026137356731725e-05, + "loss": 11.0571, + "step": 20150 + }, + { + "epoch": 1.49, + "learning_rate": 8.021114805476591e-05, + "loss": 13.0778, + "step": 20200 + }, + { + "epoch": 1.49, + "learning_rate": 8.016092254221454e-05, + "loss": 11.0495, + "step": 20250 + }, + { + "epoch": 1.5, + "learning_rate": 8.01106970296632e-05, + "loss": 10.6039, + "step": 20300 + }, + { + "epoch": 1.5, + "learning_rate": 8.006047151711183e-05, + "loss": 11.4221, + "step": 20350 + }, + { + "epoch": 1.5, + "learning_rate": 8.001024600456049e-05, + "loss": 10.7975, + "step": 20400 + }, + { + "epoch": 1.51, + "learning_rate": 7.996002049200912e-05, + "loss": 10.1123, + "step": 20450 + }, + { + "epoch": 1.51, + "learning_rate": 7.990979497945776e-05, + "loss": 10.2241, + "step": 20500 + }, + { + "epoch": 1.52, + "learning_rate": 7.985956946690642e-05, + "loss": 10.0191, + "step": 20550 + }, + { + "epoch": 1.52, + "learning_rate": 7.980934395435505e-05, + "loss": 10.649, + "step": 20600 + }, + { + "epoch": 1.52, + "learning_rate": 7.975911844180371e-05, + "loss": 9.6091, + "step": 20650 + }, + { + "epoch": 1.53, + "learning_rate": 7.970889292925234e-05, + "loss": 9.9386, + "step": 20700 + }, + { + "epoch": 1.53, + "learning_rate": 7.9658667416701e-05, + "loss": 11.2646, + "step": 20750 + }, + { + "epoch": 1.53, + "learning_rate": 7.960844190414964e-05, + "loss": 10.0181, + "step": 20800 + }, + { + "epoch": 1.54, + "learning_rate": 7.955821639159828e-05, + "loss": 11.9437, + "step": 20850 + }, + { + "epoch": 1.54, + "learning_rate": 7.950799087904693e-05, + "loss": 10.9254, + "step": 20900 + }, + { + "epoch": 1.55, + "learning_rate": 7.945776536649557e-05, + "loss": 11.7954, + "step": 20950 + }, + { + "epoch": 1.55, + "learning_rate": 7.940753985394422e-05, + "loss": 9.6569, + "step": 21000 + }, + { + "epoch": 1.55, + "learning_rate": 7.935731434139286e-05, + "loss": 10.6546, + "step": 21050 + }, + { + "epoch": 1.56, + "learning_rate": 7.93070888288415e-05, + "loss": 10.2795, + "step": 21100 + }, + { + "epoch": 1.56, + "learning_rate": 7.925686331629015e-05, + "loss": 10.4595, + "step": 21150 + }, + { + "epoch": 1.56, + "learning_rate": 7.920663780373879e-05, + "loss": 9.2921, + "step": 21200 + }, + { + "epoch": 1.57, + "learning_rate": 7.915641229118744e-05, + "loss": 10.1245, + "step": 21250 + }, + { + "epoch": 1.57, + "learning_rate": 7.910618677863608e-05, + "loss": 11.2896, + "step": 21300 + }, + { + "epoch": 1.57, + "learning_rate": 7.905596126608472e-05, + "loss": 11.3328, + "step": 21350 + }, + { + "epoch": 1.58, + "learning_rate": 7.900573575353337e-05, + "loss": 10.0718, + "step": 21400 + }, + { + "epoch": 1.58, + "learning_rate": 7.895551024098201e-05, + "loss": 10.8954, + "step": 21450 + }, + { + "epoch": 1.59, + "learning_rate": 7.890528472843066e-05, + "loss": 10.2921, + "step": 21500 + }, + { + "epoch": 1.59, + "learning_rate": 7.88550592158793e-05, + "loss": 9.4609, + "step": 21550 + }, + { + "epoch": 1.59, + "learning_rate": 7.880483370332796e-05, + "loss": 11.4751, + "step": 21600 + }, + { + "epoch": 1.6, + "learning_rate": 7.875460819077659e-05, + "loss": 10.1189, + "step": 21650 + }, + { + "epoch": 1.6, + "learning_rate": 7.870438267822523e-05, + "loss": 11.6478, + "step": 21700 + }, + { + "epoch": 1.6, + "learning_rate": 7.865415716567388e-05, + "loss": 11.2943, + "step": 21750 + }, + { + "epoch": 1.61, + "learning_rate": 7.860393165312252e-05, + "loss": 11.5788, + "step": 21800 + }, + { + "epoch": 1.61, + "learning_rate": 7.855370614057116e-05, + "loss": 10.638, + "step": 21850 + }, + { + "epoch": 1.62, + "learning_rate": 7.850348062801981e-05, + "loss": 9.2895, + "step": 21900 + }, + { + "epoch": 1.62, + "learning_rate": 7.845325511546847e-05, + "loss": 11.4984, + "step": 21950 + }, + { + "epoch": 1.62, + "learning_rate": 7.84030296029171e-05, + "loss": 10.3685, + "step": 22000 + }, + { + "epoch": 1.63, + "learning_rate": 7.835280409036575e-05, + "loss": 10.0115, + "step": 22050 + }, + { + "epoch": 1.63, + "learning_rate": 7.830257857781439e-05, + "loss": 10.2941, + "step": 22100 + }, + { + "epoch": 1.63, + "learning_rate": 7.825235306526304e-05, + "loss": 10.8751, + "step": 22150 + }, + { + "epoch": 1.64, + "learning_rate": 7.820212755271167e-05, + "loss": 10.7477, + "step": 22200 + }, + { + "epoch": 1.64, + "learning_rate": 7.815190204016032e-05, + "loss": 12.2573, + "step": 22250 + }, + { + "epoch": 1.64, + "learning_rate": 7.810167652760897e-05, + "loss": 10.1055, + "step": 22300 + }, + { + "epoch": 1.65, + "learning_rate": 7.80514510150576e-05, + "loss": 10.7913, + "step": 22350 + }, + { + "epoch": 1.65, + "learning_rate": 7.800122550250626e-05, + "loss": 9.4701, + "step": 22400 + }, + { + "epoch": 1.66, + "learning_rate": 7.79509999899549e-05, + "loss": 9.9434, + "step": 22450 + }, + { + "epoch": 1.66, + "learning_rate": 7.790077447740355e-05, + "loss": 10.9016, + "step": 22500 + }, + { + "epoch": 1.66, + "learning_rate": 7.785054896485218e-05, + "loss": 10.1733, + "step": 22550 + }, + { + "epoch": 1.67, + "learning_rate": 7.780032345230084e-05, + "loss": 11.0693, + "step": 22600 + }, + { + "epoch": 1.67, + "learning_rate": 7.775009793974948e-05, + "loss": 10.4538, + "step": 22650 + }, + { + "epoch": 1.67, + "learning_rate": 7.769987242719813e-05, + "loss": 10.5127, + "step": 22700 + }, + { + "epoch": 1.68, + "learning_rate": 7.764964691464677e-05, + "loss": 10.1074, + "step": 22750 + }, + { + "epoch": 1.68, + "learning_rate": 7.75994214020954e-05, + "loss": 11.2803, + "step": 22800 + }, + { + "epoch": 1.69, + "learning_rate": 7.754919588954406e-05, + "loss": 10.9954, + "step": 22850 + }, + { + "epoch": 1.69, + "learning_rate": 7.749897037699269e-05, + "loss": 10.1006, + "step": 22900 + }, + { + "epoch": 1.69, + "learning_rate": 7.744874486444135e-05, + "loss": 10.9978, + "step": 22950 + }, + { + "epoch": 1.7, + "learning_rate": 7.739851935188999e-05, + "loss": 10.5885, + "step": 23000 + }, + { + "epoch": 1.7, + "learning_rate": 7.734829383933864e-05, + "loss": 10.5676, + "step": 23050 + }, + { + "epoch": 1.7, + "learning_rate": 7.729806832678728e-05, + "loss": 11.3204, + "step": 23100 + }, + { + "epoch": 1.71, + "learning_rate": 7.724784281423592e-05, + "loss": 10.5388, + "step": 23150 + }, + { + "epoch": 1.71, + "learning_rate": 7.719761730168457e-05, + "loss": 10.7915, + "step": 23200 + }, + { + "epoch": 1.71, + "learning_rate": 7.714739178913321e-05, + "loss": 11.9486, + "step": 23250 + }, + { + "epoch": 1.72, + "learning_rate": 7.709716627658186e-05, + "loss": 11.6693, + "step": 23300 + }, + { + "epoch": 1.72, + "learning_rate": 7.70469407640305e-05, + "loss": 9.2664, + "step": 23350 + }, + { + "epoch": 1.73, + "learning_rate": 7.699671525147914e-05, + "loss": 12.1429, + "step": 23400 + }, + { + "epoch": 1.73, + "learning_rate": 7.694648973892779e-05, + "loss": 10.1155, + "step": 23450 + }, + { + "epoch": 1.73, + "learning_rate": 7.689626422637643e-05, + "loss": 10.1562, + "step": 23500 + }, + { + "epoch": 1.74, + "learning_rate": 7.684603871382508e-05, + "loss": 11.3484, + "step": 23550 + }, + { + "epoch": 1.74, + "learning_rate": 7.679581320127372e-05, + "loss": 9.5912, + "step": 23600 + }, + { + "epoch": 1.74, + "learning_rate": 7.674558768872236e-05, + "loss": 11.1067, + "step": 23650 + }, + { + "epoch": 1.75, + "learning_rate": 7.669536217617101e-05, + "loss": 11.7182, + "step": 23700 + }, + { + "epoch": 1.75, + "learning_rate": 7.664513666361965e-05, + "loss": 10.1444, + "step": 23750 + }, + { + "epoch": 1.76, + "learning_rate": 7.659491115106831e-05, + "loss": 11.2671, + "step": 23800 + }, + { + "epoch": 1.76, + "learning_rate": 7.654468563851694e-05, + "loss": 10.9027, + "step": 23850 + }, + { + "epoch": 1.76, + "learning_rate": 7.64944601259656e-05, + "loss": 10.9078, + "step": 23900 + }, + { + "epoch": 1.77, + "learning_rate": 7.644423461341423e-05, + "loss": 10.5441, + "step": 23950 + }, + { + "epoch": 1.77, + "learning_rate": 7.639400910086287e-05, + "loss": 9.8617, + "step": 24000 + }, + { + "epoch": 1.77, + "learning_rate": 7.634378358831153e-05, + "loss": 10.8022, + "step": 24050 + }, + { + "epoch": 1.78, + "learning_rate": 7.629355807576016e-05, + "loss": 10.3082, + "step": 24100 + }, + { + "epoch": 1.78, + "learning_rate": 7.624333256320882e-05, + "loss": 9.8398, + "step": 24150 + }, + { + "epoch": 1.79, + "learning_rate": 7.619310705065745e-05, + "loss": 10.3631, + "step": 24200 + }, + { + "epoch": 1.79, + "learning_rate": 7.61428815381061e-05, + "loss": 10.6078, + "step": 24250 + }, + { + "epoch": 1.79, + "learning_rate": 7.609265602555474e-05, + "loss": 11.366, + "step": 24300 + }, + { + "epoch": 1.8, + "learning_rate": 7.60424305130034e-05, + "loss": 12.1154, + "step": 24350 + }, + { + "epoch": 1.8, + "learning_rate": 7.599220500045204e-05, + "loss": 11.3429, + "step": 24400 + }, + { + "epoch": 1.8, + "learning_rate": 7.594197948790068e-05, + "loss": 9.135, + "step": 24450 + }, + { + "epoch": 1.81, + "learning_rate": 7.589175397534933e-05, + "loss": 10.3796, + "step": 24500 + }, + { + "epoch": 1.81, + "learning_rate": 7.584152846279796e-05, + "loss": 10.6452, + "step": 24550 + }, + { + "epoch": 1.81, + "learning_rate": 7.579130295024661e-05, + "loss": 9.6237, + "step": 24600 + }, + { + "epoch": 1.82, + "learning_rate": 7.574107743769525e-05, + "loss": 10.7158, + "step": 24650 + }, + { + "epoch": 1.82, + "learning_rate": 7.56908519251439e-05, + "loss": 9.8296, + "step": 24700 + }, + { + "epoch": 1.83, + "learning_rate": 7.564062641259255e-05, + "loss": 10.1654, + "step": 24750 + }, + { + "epoch": 1.83, + "learning_rate": 7.559040090004119e-05, + "loss": 10.395, + "step": 24800 + }, + { + "epoch": 1.83, + "learning_rate": 7.554017538748984e-05, + "loss": 10.3067, + "step": 24850 + }, + { + "epoch": 1.84, + "learning_rate": 7.548994987493848e-05, + "loss": 10.7243, + "step": 24900 + }, + { + "epoch": 1.84, + "learning_rate": 7.543972436238712e-05, + "loss": 10.4022, + "step": 24950 + }, + { + "epoch": 1.84, + "learning_rate": 7.538949884983577e-05, + "loss": 10.5045, + "step": 25000 + }, + { + "epoch": 1.85, + "learning_rate": 7.533927333728441e-05, + "loss": 11.2205, + "step": 25050 + }, + { + "epoch": 1.85, + "learning_rate": 7.528904782473306e-05, + "loss": 10.5375, + "step": 25100 + }, + { + "epoch": 1.86, + "learning_rate": 7.52388223121817e-05, + "loss": 10.4876, + "step": 25150 + }, + { + "epoch": 1.86, + "learning_rate": 7.518859679963034e-05, + "loss": 9.2096, + "step": 25200 + }, + { + "epoch": 1.86, + "learning_rate": 7.513837128707899e-05, + "loss": 10.0442, + "step": 25250 + }, + { + "epoch": 1.87, + "learning_rate": 7.508814577452763e-05, + "loss": 9.8174, + "step": 25300 + }, + { + "epoch": 1.87, + "learning_rate": 7.503792026197628e-05, + "loss": 10.8789, + "step": 25350 + }, + { + "epoch": 1.87, + "learning_rate": 7.498769474942492e-05, + "loss": 9.8789, + "step": 25400 + }, + { + "epoch": 1.88, + "learning_rate": 7.493746923687356e-05, + "loss": 11.1431, + "step": 25450 + }, + { + "epoch": 1.88, + "learning_rate": 7.488724372432221e-05, + "loss": 10.4659, + "step": 25500 + }, + { + "epoch": 1.88, + "learning_rate": 7.483701821177087e-05, + "loss": 10.7342, + "step": 25550 + }, + { + "epoch": 1.89, + "learning_rate": 7.47867926992195e-05, + "loss": 10.7841, + "step": 25600 + }, + { + "epoch": 1.89, + "learning_rate": 7.473656718666814e-05, + "loss": 9.6162, + "step": 25650 + }, + { + "epoch": 1.9, + "learning_rate": 7.468634167411678e-05, + "loss": 10.3568, + "step": 25700 + }, + { + "epoch": 1.9, + "learning_rate": 7.463611616156543e-05, + "loss": 9.6701, + "step": 25750 + }, + { + "epoch": 1.9, + "learning_rate": 7.458589064901407e-05, + "loss": 9.4003, + "step": 25800 + }, + { + "epoch": 1.91, + "learning_rate": 7.453566513646272e-05, + "loss": 9.6621, + "step": 25850 + }, + { + "epoch": 1.91, + "learning_rate": 7.448543962391137e-05, + "loss": 10.1086, + "step": 25900 + }, + { + "epoch": 1.91, + "learning_rate": 7.443521411136e-05, + "loss": 11.5655, + "step": 25950 + }, + { + "epoch": 1.92, + "learning_rate": 7.438498859880866e-05, + "loss": 8.9418, + "step": 26000 + }, + { + "epoch": 1.92, + "learning_rate": 7.433476308625729e-05, + "loss": 9.2415, + "step": 26050 + }, + { + "epoch": 1.93, + "learning_rate": 7.428453757370595e-05, + "loss": 9.4192, + "step": 26100 + }, + { + "epoch": 1.93, + "learning_rate": 7.423431206115458e-05, + "loss": 9.1755, + "step": 26150 + }, + { + "epoch": 1.93, + "learning_rate": 7.418408654860322e-05, + "loss": 9.6327, + "step": 26200 + }, + { + "epoch": 1.94, + "learning_rate": 7.413386103605188e-05, + "loss": 10.3333, + "step": 26250 + }, + { + "epoch": 1.94, + "learning_rate": 7.408363552350051e-05, + "loss": 10.298, + "step": 26300 + }, + { + "epoch": 1.94, + "learning_rate": 7.403341001094917e-05, + "loss": 10.7038, + "step": 26350 + }, + { + "epoch": 1.95, + "learning_rate": 7.39831844983978e-05, + "loss": 10.5099, + "step": 26400 + }, + { + "epoch": 1.95, + "learning_rate": 7.393295898584646e-05, + "loss": 9.8063, + "step": 26450 + }, + { + "epoch": 1.95, + "learning_rate": 7.38827334732951e-05, + "loss": 9.5784, + "step": 26500 + }, + { + "epoch": 1.96, + "learning_rate": 7.383250796074375e-05, + "loss": 10.1958, + "step": 26550 + }, + { + "epoch": 1.96, + "learning_rate": 7.378228244819239e-05, + "loss": 9.6869, + "step": 26600 + }, + { + "epoch": 1.97, + "learning_rate": 7.373205693564103e-05, + "loss": 10.3761, + "step": 26650 + }, + { + "epoch": 1.97, + "learning_rate": 7.368183142308968e-05, + "loss": 11.6806, + "step": 26700 + }, + { + "epoch": 1.97, + "learning_rate": 7.363160591053832e-05, + "loss": 10.3183, + "step": 26750 + }, + { + "epoch": 1.98, + "learning_rate": 7.358138039798697e-05, + "loss": 11.041, + "step": 26800 + }, + { + "epoch": 1.98, + "learning_rate": 7.353115488543561e-05, + "loss": 9.6997, + "step": 26850 + }, + { + "epoch": 1.98, + "learning_rate": 7.348092937288425e-05, + "loss": 9.6029, + "step": 26900 + }, + { + "epoch": 1.99, + "learning_rate": 7.34307038603329e-05, + "loss": 10.3322, + "step": 26950 + }, + { + "epoch": 1.99, + "learning_rate": 7.338047834778154e-05, + "loss": 9.9009, + "step": 27000 + }, + { + "epoch": 2.0, + "learning_rate": 7.333025283523019e-05, + "loss": 10.4815, + "step": 27050 + }, + { + "epoch": 2.0, + "learning_rate": 7.328002732267883e-05, + "loss": 11.7049, + "step": 27100 + }, + { + "epoch": 2.0, + "learning_rate": 7.322980181012748e-05, + "loss": 10.7831, + "step": 27150 + }, + { + "epoch": 2.01, + "learning_rate": 7.317957629757612e-05, + "loss": 8.735, + "step": 27200 + }, + { + "epoch": 2.01, + "learning_rate": 7.312935078502476e-05, + "loss": 9.4056, + "step": 27250 + }, + { + "epoch": 2.01, + "learning_rate": 7.307912527247342e-05, + "loss": 10.7689, + "step": 27300 + }, + { + "epoch": 2.02, + "learning_rate": 7.302889975992205e-05, + "loss": 9.5266, + "step": 27350 + }, + { + "epoch": 2.02, + "learning_rate": 7.29786742473707e-05, + "loss": 8.2467, + "step": 27400 + }, + { + "epoch": 2.02, + "learning_rate": 7.292844873481934e-05, + "loss": 8.6572, + "step": 27450 + }, + { + "epoch": 2.03, + "learning_rate": 7.287822322226798e-05, + "loss": 8.4693, + "step": 27500 + }, + { + "epoch": 2.03, + "learning_rate": 7.282799770971663e-05, + "loss": 10.4867, + "step": 27550 + }, + { + "epoch": 2.04, + "learning_rate": 7.277777219716527e-05, + "loss": 8.9364, + "step": 27600 + }, + { + "epoch": 2.04, + "learning_rate": 7.272754668461393e-05, + "loss": 10.0109, + "step": 27650 + }, + { + "epoch": 2.04, + "learning_rate": 7.267732117206256e-05, + "loss": 9.5535, + "step": 27700 + }, + { + "epoch": 2.05, + "learning_rate": 7.262709565951122e-05, + "loss": 9.3029, + "step": 27750 + }, + { + "epoch": 2.05, + "learning_rate": 7.257687014695985e-05, + "loss": 9.854, + "step": 27800 + }, + { + "epoch": 2.05, + "learning_rate": 7.25266446344085e-05, + "loss": 9.5327, + "step": 27850 + }, + { + "epoch": 2.06, + "learning_rate": 7.247641912185714e-05, + "loss": 9.8255, + "step": 27900 + }, + { + "epoch": 2.06, + "learning_rate": 7.242619360930578e-05, + "loss": 9.9737, + "step": 27950 + }, + { + "epoch": 2.07, + "learning_rate": 7.237596809675444e-05, + "loss": 9.0471, + "step": 28000 + }, + { + "epoch": 2.07, + "learning_rate": 7.232574258420307e-05, + "loss": 10.0566, + "step": 28050 + }, + { + "epoch": 2.07, + "learning_rate": 7.227551707165173e-05, + "loss": 9.4781, + "step": 28100 + }, + { + "epoch": 2.08, + "learning_rate": 7.222529155910036e-05, + "loss": 8.7599, + "step": 28150 + }, + { + "epoch": 2.08, + "learning_rate": 7.217506604654901e-05, + "loss": 8.7605, + "step": 28200 + }, + { + "epoch": 2.08, + "learning_rate": 7.212484053399764e-05, + "loss": 10.061, + "step": 28250 + }, + { + "epoch": 2.09, + "learning_rate": 7.20746150214463e-05, + "loss": 9.6124, + "step": 28300 + }, + { + "epoch": 2.09, + "learning_rate": 7.202438950889495e-05, + "loss": 10.4776, + "step": 28350 + }, + { + "epoch": 2.09, + "learning_rate": 7.197416399634359e-05, + "loss": 9.2169, + "step": 28400 + }, + { + "epoch": 2.1, + "learning_rate": 7.192393848379223e-05, + "loss": 9.3654, + "step": 28450 + }, + { + "epoch": 2.1, + "learning_rate": 7.187371297124086e-05, + "loss": 9.4445, + "step": 28500 + }, + { + "epoch": 2.11, + "learning_rate": 7.182348745868952e-05, + "loss": 8.3614, + "step": 28550 + }, + { + "epoch": 2.11, + "learning_rate": 7.177326194613815e-05, + "loss": 9.1661, + "step": 28600 + }, + { + "epoch": 2.11, + "learning_rate": 7.172303643358681e-05, + "loss": 9.4976, + "step": 28650 + }, + { + "epoch": 2.12, + "learning_rate": 7.167281092103545e-05, + "loss": 9.125, + "step": 28700 + }, + { + "epoch": 2.12, + "learning_rate": 7.16225854084841e-05, + "loss": 8.9051, + "step": 28750 + }, + { + "epoch": 2.12, + "learning_rate": 7.157235989593274e-05, + "loss": 8.9753, + "step": 28800 + }, + { + "epoch": 2.13, + "learning_rate": 7.152213438338139e-05, + "loss": 9.133, + "step": 28850 + }, + { + "epoch": 2.13, + "learning_rate": 7.147190887083003e-05, + "loss": 9.9677, + "step": 28900 + }, + { + "epoch": 2.14, + "learning_rate": 7.142168335827867e-05, + "loss": 8.725, + "step": 28950 + }, + { + "epoch": 2.14, + "learning_rate": 7.137145784572732e-05, + "loss": 8.831, + "step": 29000 + }, + { + "epoch": 2.14, + "learning_rate": 7.132123233317596e-05, + "loss": 7.8207, + "step": 29050 + }, + { + "epoch": 2.15, + "learning_rate": 7.127100682062461e-05, + "loss": 9.3707, + "step": 29100 + }, + { + "epoch": 2.15, + "learning_rate": 7.122078130807325e-05, + "loss": 10.4259, + "step": 29150 + }, + { + "epoch": 2.15, + "learning_rate": 7.11705557955219e-05, + "loss": 8.1836, + "step": 29200 + }, + { + "epoch": 2.16, + "learning_rate": 7.112033028297054e-05, + "loss": 9.0874, + "step": 29250 + }, + { + "epoch": 2.16, + "learning_rate": 7.107010477041918e-05, + "loss": 9.5957, + "step": 29300 + }, + { + "epoch": 2.16, + "learning_rate": 7.101987925786783e-05, + "loss": 8.7545, + "step": 29350 + }, + { + "epoch": 2.17, + "learning_rate": 7.096965374531647e-05, + "loss": 8.4478, + "step": 29400 + }, + { + "epoch": 2.17, + "learning_rate": 7.091942823276512e-05, + "loss": 8.601, + "step": 29450 + }, + { + "epoch": 2.18, + "learning_rate": 7.086920272021377e-05, + "loss": 9.6172, + "step": 29500 + }, + { + "epoch": 2.18, + "learning_rate": 7.08189772076624e-05, + "loss": 9.0805, + "step": 29550 + }, + { + "epoch": 2.18, + "learning_rate": 7.076875169511106e-05, + "loss": 9.6039, + "step": 29600 + }, + { + "epoch": 2.19, + "learning_rate": 7.071852618255969e-05, + "loss": 9.3622, + "step": 29650 + }, + { + "epoch": 2.19, + "learning_rate": 7.066830067000834e-05, + "loss": 8.8765, + "step": 29700 + }, + { + "epoch": 2.19, + "learning_rate": 7.061807515745699e-05, + "loss": 8.992, + "step": 29750 + }, + { + "epoch": 2.2, + "learning_rate": 7.056784964490562e-05, + "loss": 10.3564, + "step": 29800 + }, + { + "epoch": 2.2, + "learning_rate": 7.051762413235428e-05, + "loss": 8.8092, + "step": 29850 + }, + { + "epoch": 2.21, + "learning_rate": 7.046739861980291e-05, + "loss": 9.8373, + "step": 29900 + }, + { + "epoch": 2.21, + "learning_rate": 7.041717310725157e-05, + "loss": 8.004, + "step": 29950 + }, + { + "epoch": 2.21, + "learning_rate": 7.03669475947002e-05, + "loss": 9.4461, + "step": 30000 + }, + { + "epoch": 2.22, + "learning_rate": 7.031672208214886e-05, + "loss": 8.4964, + "step": 30050 + }, + { + "epoch": 2.22, + "learning_rate": 7.02664965695975e-05, + "loss": 10.3181, + "step": 30100 + }, + { + "epoch": 2.22, + "learning_rate": 7.021627105704615e-05, + "loss": 8.6637, + "step": 30150 + }, + { + "epoch": 2.23, + "learning_rate": 7.016604554449479e-05, + "loss": 10.1703, + "step": 30200 + }, + { + "epoch": 2.23, + "learning_rate": 7.011582003194342e-05, + "loss": 9.2846, + "step": 30250 + }, + { + "epoch": 2.24, + "learning_rate": 7.006559451939208e-05, + "loss": 8.5913, + "step": 30300 + }, + { + "epoch": 2.24, + "learning_rate": 7.001536900684071e-05, + "loss": 9.1308, + "step": 30350 + }, + { + "epoch": 2.24, + "learning_rate": 6.996514349428937e-05, + "loss": 11.2229, + "step": 30400 + }, + { + "epoch": 2.25, + "learning_rate": 6.991491798173801e-05, + "loss": 8.5923, + "step": 30450 + }, + { + "epoch": 2.25, + "learning_rate": 6.986469246918665e-05, + "loss": 9.9826, + "step": 30500 + }, + { + "epoch": 2.25, + "learning_rate": 6.98144669566353e-05, + "loss": 8.4765, + "step": 30550 + }, + { + "epoch": 2.26, + "learning_rate": 6.976424144408394e-05, + "loss": 8.7624, + "step": 30600 + }, + { + "epoch": 2.26, + "learning_rate": 6.971401593153259e-05, + "loss": 9.238, + "step": 30650 + }, + { + "epoch": 2.26, + "learning_rate": 6.966379041898123e-05, + "loss": 8.4976, + "step": 30700 + }, + { + "epoch": 2.27, + "learning_rate": 6.961356490642987e-05, + "loss": 9.1886, + "step": 30750 + }, + { + "epoch": 2.27, + "learning_rate": 6.956333939387852e-05, + "loss": 8.4443, + "step": 30800 + }, + { + "epoch": 2.28, + "learning_rate": 6.951311388132716e-05, + "loss": 8.3648, + "step": 30850 + }, + { + "epoch": 2.28, + "learning_rate": 6.94628883687758e-05, + "loss": 9.2509, + "step": 30900 + }, + { + "epoch": 2.28, + "learning_rate": 6.941266285622445e-05, + "loss": 8.3765, + "step": 30950 + }, + { + "epoch": 2.29, + "learning_rate": 6.93624373436731e-05, + "loss": 9.6616, + "step": 31000 + }, + { + "epoch": 2.29, + "learning_rate": 6.931221183112174e-05, + "loss": 9.658, + "step": 31050 + }, + { + "epoch": 2.29, + "learning_rate": 6.926198631857038e-05, + "loss": 8.7527, + "step": 31100 + }, + { + "epoch": 2.3, + "learning_rate": 6.921176080601903e-05, + "loss": 8.7148, + "step": 31150 + }, + { + "epoch": 2.3, + "learning_rate": 6.916153529346767e-05, + "loss": 8.5962, + "step": 31200 + }, + { + "epoch": 2.31, + "learning_rate": 6.911130978091633e-05, + "loss": 9.2625, + "step": 31250 + }, + { + "epoch": 2.31, + "learning_rate": 6.906108426836496e-05, + "loss": 8.8352, + "step": 31300 + }, + { + "epoch": 2.31, + "learning_rate": 6.90108587558136e-05, + "loss": 7.3991, + "step": 31350 + }, + { + "epoch": 2.32, + "learning_rate": 6.896063324326225e-05, + "loss": 9.9391, + "step": 31400 + }, + { + "epoch": 2.32, + "learning_rate": 6.891040773071089e-05, + "loss": 8.9575, + "step": 31450 + }, + { + "epoch": 2.32, + "learning_rate": 6.886018221815954e-05, + "loss": 7.9103, + "step": 31500 + }, + { + "epoch": 2.33, + "learning_rate": 6.880995670560818e-05, + "loss": 8.5276, + "step": 31550 + }, + { + "epoch": 2.33, + "learning_rate": 6.875973119305684e-05, + "loss": 8.5427, + "step": 31600 + }, + { + "epoch": 2.33, + "learning_rate": 6.870950568050547e-05, + "loss": 8.4672, + "step": 31650 + }, + { + "epoch": 2.34, + "learning_rate": 6.865928016795412e-05, + "loss": 8.9638, + "step": 31700 + }, + { + "epoch": 2.34, + "learning_rate": 6.860905465540276e-05, + "loss": 8.3136, + "step": 31750 + }, + { + "epoch": 2.35, + "learning_rate": 6.855882914285141e-05, + "loss": 8.8076, + "step": 31800 + }, + { + "epoch": 2.35, + "learning_rate": 6.850860363030004e-05, + "loss": 8.6041, + "step": 31850 + }, + { + "epoch": 2.35, + "learning_rate": 6.845837811774869e-05, + "loss": 9.1751, + "step": 31900 + }, + { + "epoch": 2.36, + "learning_rate": 6.840815260519735e-05, + "loss": 8.5955, + "step": 31950 + }, + { + "epoch": 2.36, + "learning_rate": 6.835792709264598e-05, + "loss": 9.0927, + "step": 32000 + }, + { + "epoch": 2.36, + "learning_rate": 6.830770158009463e-05, + "loss": 7.9647, + "step": 32050 + }, + { + "epoch": 2.37, + "learning_rate": 6.825747606754326e-05, + "loss": 10.2647, + "step": 32100 + }, + { + "epoch": 2.37, + "learning_rate": 6.820725055499192e-05, + "loss": 8.3442, + "step": 32150 + }, + { + "epoch": 2.38, + "learning_rate": 6.815702504244057e-05, + "loss": 9.2019, + "step": 32200 + }, + { + "epoch": 2.38, + "learning_rate": 6.810679952988921e-05, + "loss": 8.345, + "step": 32250 + }, + { + "epoch": 2.38, + "learning_rate": 6.805657401733785e-05, + "loss": 9.1835, + "step": 32300 + }, + { + "epoch": 2.39, + "learning_rate": 6.80063485047865e-05, + "loss": 9.1846, + "step": 32350 + }, + { + "epoch": 2.39, + "learning_rate": 6.795612299223514e-05, + "loss": 9.0015, + "step": 32400 + }, + { + "epoch": 2.39, + "learning_rate": 6.790589747968379e-05, + "loss": 8.2404, + "step": 32450 + }, + { + "epoch": 2.4, + "learning_rate": 6.785567196713243e-05, + "loss": 8.8715, + "step": 32500 + }, + { + "epoch": 2.4, + "learning_rate": 6.780544645458107e-05, + "loss": 8.817, + "step": 32550 + }, + { + "epoch": 2.4, + "learning_rate": 6.775522094202972e-05, + "loss": 9.2154, + "step": 32600 + }, + { + "epoch": 2.41, + "learning_rate": 6.770499542947836e-05, + "loss": 9.1914, + "step": 32650 + }, + { + "epoch": 2.41, + "learning_rate": 6.7654769916927e-05, + "loss": 9.2804, + "step": 32700 + }, + { + "epoch": 2.42, + "learning_rate": 6.760454440437565e-05, + "loss": 9.177, + "step": 32750 + }, + { + "epoch": 2.42, + "learning_rate": 6.75543188918243e-05, + "loss": 8.8259, + "step": 32800 + }, + { + "epoch": 2.42, + "learning_rate": 6.750409337927294e-05, + "loss": 8.6121, + "step": 32850 + }, + { + "epoch": 2.43, + "learning_rate": 6.745386786672158e-05, + "loss": 8.644, + "step": 32900 + }, + { + "epoch": 2.43, + "learning_rate": 6.740364235417023e-05, + "loss": 8.5743, + "step": 32950 + }, + { + "epoch": 2.43, + "learning_rate": 6.735341684161888e-05, + "loss": 8.7636, + "step": 33000 + }, + { + "epoch": 2.44, + "learning_rate": 6.730319132906751e-05, + "loss": 8.3064, + "step": 33050 + }, + { + "epoch": 2.44, + "learning_rate": 6.725296581651616e-05, + "loss": 8.8806, + "step": 33100 + }, + { + "epoch": 2.45, + "learning_rate": 6.72027403039648e-05, + "loss": 8.8212, + "step": 33150 + }, + { + "epoch": 2.45, + "learning_rate": 6.715251479141345e-05, + "loss": 9.5261, + "step": 33200 + }, + { + "epoch": 2.45, + "learning_rate": 6.710228927886209e-05, + "loss": 9.0764, + "step": 33250 + }, + { + "epoch": 2.46, + "learning_rate": 6.705206376631073e-05, + "loss": 7.399, + "step": 33300 + }, + { + "epoch": 2.46, + "learning_rate": 6.700183825375939e-05, + "loss": 9.4119, + "step": 33350 + }, + { + "epoch": 2.46, + "learning_rate": 6.695161274120802e-05, + "loss": 8.4576, + "step": 33400 + }, + { + "epoch": 2.47, + "learning_rate": 6.690138722865668e-05, + "loss": 8.024, + "step": 33450 + }, + { + "epoch": 2.47, + "learning_rate": 6.685116171610531e-05, + "loss": 9.1605, + "step": 33500 + }, + { + "epoch": 2.47, + "learning_rate": 6.680093620355397e-05, + "loss": 8.3661, + "step": 33550 + }, + { + "epoch": 2.48, + "learning_rate": 6.67507106910026e-05, + "loss": 8.4145, + "step": 33600 + }, + { + "epoch": 2.48, + "learning_rate": 6.670048517845124e-05, + "loss": 7.824, + "step": 33650 + }, + { + "epoch": 2.49, + "learning_rate": 6.66502596658999e-05, + "loss": 9.129, + "step": 33700 + }, + { + "epoch": 2.49, + "learning_rate": 6.660003415334853e-05, + "loss": 9.0876, + "step": 33750 + }, + { + "epoch": 2.49, + "learning_rate": 6.654980864079719e-05, + "loss": 8.6961, + "step": 33800 + }, + { + "epoch": 2.5, + "learning_rate": 6.649958312824582e-05, + "loss": 8.1584, + "step": 33850 + }, + { + "epoch": 2.5, + "learning_rate": 6.644935761569448e-05, + "loss": 8.6587, + "step": 33900 + }, + { + "epoch": 2.5, + "learning_rate": 6.639913210314311e-05, + "loss": 8.1059, + "step": 33950 + }, + { + "epoch": 2.51, + "learning_rate": 6.634890659059176e-05, + "loss": 9.2588, + "step": 34000 + }, + { + "epoch": 2.51, + "learning_rate": 6.629868107804041e-05, + "loss": 8.6443, + "step": 34050 + }, + { + "epoch": 2.52, + "learning_rate": 6.624845556548905e-05, + "loss": 8.8006, + "step": 34100 + }, + { + "epoch": 2.52, + "learning_rate": 6.61982300529377e-05, + "loss": 9.2288, + "step": 34150 + }, + { + "epoch": 2.52, + "learning_rate": 6.614800454038633e-05, + "loss": 9.0328, + "step": 34200 + }, + { + "epoch": 2.53, + "learning_rate": 6.609777902783499e-05, + "loss": 7.8269, + "step": 34250 + }, + { + "epoch": 2.53, + "learning_rate": 6.604755351528362e-05, + "loss": 8.5883, + "step": 34300 + }, + { + "epoch": 2.53, + "learning_rate": 6.599732800273227e-05, + "loss": 9.9388, + "step": 34350 + }, + { + "epoch": 2.54, + "learning_rate": 6.594710249018092e-05, + "loss": 8.6776, + "step": 34400 + }, + { + "epoch": 2.54, + "learning_rate": 6.589687697762956e-05, + "loss": 7.2287, + "step": 34450 + }, + { + "epoch": 2.54, + "learning_rate": 6.58466514650782e-05, + "loss": 7.7042, + "step": 34500 + }, + { + "epoch": 2.55, + "learning_rate": 6.579642595252685e-05, + "loss": 9.0004, + "step": 34550 + }, + { + "epoch": 2.55, + "learning_rate": 6.57462004399755e-05, + "loss": 9.3279, + "step": 34600 + }, + { + "epoch": 2.56, + "learning_rate": 6.569597492742414e-05, + "loss": 8.9144, + "step": 34650 + }, + { + "epoch": 2.56, + "learning_rate": 6.564574941487278e-05, + "loss": 9.3319, + "step": 34700 + }, + { + "epoch": 2.56, + "learning_rate": 6.559552390232143e-05, + "loss": 9.4986, + "step": 34750 + }, + { + "epoch": 2.57, + "learning_rate": 6.554529838977007e-05, + "loss": 9.002, + "step": 34800 + }, + { + "epoch": 2.57, + "learning_rate": 6.549507287721871e-05, + "loss": 8.6061, + "step": 34850 + }, + { + "epoch": 2.57, + "learning_rate": 6.544484736466736e-05, + "loss": 7.4598, + "step": 34900 + }, + { + "epoch": 2.58, + "learning_rate": 6.5394621852116e-05, + "loss": 8.6618, + "step": 34950 + }, + { + "epoch": 2.58, + "learning_rate": 6.534439633956465e-05, + "loss": 9.0226, + "step": 35000 + }, + { + "epoch": 2.59, + "learning_rate": 6.529417082701329e-05, + "loss": 7.9738, + "step": 35050 + }, + { + "epoch": 2.59, + "learning_rate": 6.524394531446193e-05, + "loss": 8.7871, + "step": 35100 + }, + { + "epoch": 2.59, + "learning_rate": 6.519371980191058e-05, + "loss": 8.8744, + "step": 35150 + }, + { + "epoch": 2.6, + "learning_rate": 6.514349428935924e-05, + "loss": 8.3771, + "step": 35200 + }, + { + "epoch": 2.6, + "learning_rate": 6.509326877680787e-05, + "loss": 8.058, + "step": 35250 + }, + { + "epoch": 2.6, + "learning_rate": 6.504304326425652e-05, + "loss": 8.2627, + "step": 35300 + }, + { + "epoch": 2.61, + "learning_rate": 6.499281775170515e-05, + "loss": 8.1643, + "step": 35350 + }, + { + "epoch": 2.61, + "learning_rate": 6.49425922391538e-05, + "loss": 7.888, + "step": 35400 + }, + { + "epoch": 2.61, + "learning_rate": 6.489236672660246e-05, + "loss": 7.9235, + "step": 35450 + }, + { + "epoch": 2.62, + "learning_rate": 6.484214121405109e-05, + "loss": 8.1139, + "step": 35500 + }, + { + "epoch": 2.62, + "learning_rate": 6.479191570149974e-05, + "loss": 8.7467, + "step": 35550 + }, + { + "epoch": 2.63, + "learning_rate": 6.474169018894837e-05, + "loss": 7.4693, + "step": 35600 + }, + { + "epoch": 2.63, + "learning_rate": 6.469146467639703e-05, + "loss": 8.5167, + "step": 35650 + }, + { + "epoch": 2.63, + "learning_rate": 6.464123916384566e-05, + "loss": 9.5274, + "step": 35700 + }, + { + "epoch": 2.64, + "learning_rate": 6.459101365129432e-05, + "loss": 8.9735, + "step": 35750 + }, + { + "epoch": 2.64, + "learning_rate": 6.454078813874296e-05, + "loss": 8.1756, + "step": 35800 + }, + { + "epoch": 2.64, + "learning_rate": 6.449056262619161e-05, + "loss": 7.8084, + "step": 35850 + }, + { + "epoch": 2.65, + "learning_rate": 6.444033711364025e-05, + "loss": 8.2671, + "step": 35900 + }, + { + "epoch": 2.65, + "learning_rate": 6.439011160108888e-05, + "loss": 8.6628, + "step": 35950 + }, + { + "epoch": 2.66, + "learning_rate": 6.433988608853754e-05, + "loss": 9.8654, + "step": 36000 + }, + { + "epoch": 2.66, + "learning_rate": 6.428966057598617e-05, + "loss": 9.104, + "step": 36050 + }, + { + "epoch": 2.66, + "learning_rate": 6.423943506343483e-05, + "loss": 9.4156, + "step": 36100 + }, + { + "epoch": 2.67, + "learning_rate": 6.418920955088347e-05, + "loss": 8.9803, + "step": 36150 + }, + { + "epoch": 2.67, + "learning_rate": 6.413898403833212e-05, + "loss": 8.9584, + "step": 36200 + }, + { + "epoch": 2.67, + "learning_rate": 6.408875852578076e-05, + "loss": 7.3683, + "step": 36250 + }, + { + "epoch": 2.68, + "learning_rate": 6.40385330132294e-05, + "loss": 8.3277, + "step": 36300 + }, + { + "epoch": 2.68, + "learning_rate": 6.398830750067805e-05, + "loss": 9.3236, + "step": 36350 + }, + { + "epoch": 2.68, + "learning_rate": 6.393808198812669e-05, + "loss": 8.6918, + "step": 36400 + }, + { + "epoch": 2.69, + "learning_rate": 6.388785647557534e-05, + "loss": 8.9422, + "step": 36450 + }, + { + "epoch": 2.69, + "learning_rate": 6.383763096302398e-05, + "loss": 8.8438, + "step": 36500 + }, + { + "epoch": 2.7, + "learning_rate": 6.378740545047263e-05, + "loss": 8.7752, + "step": 36550 + }, + { + "epoch": 2.7, + "learning_rate": 6.373717993792127e-05, + "loss": 8.6483, + "step": 36600 + }, + { + "epoch": 2.7, + "learning_rate": 6.368695442536991e-05, + "loss": 8.5753, + "step": 36650 + }, + { + "epoch": 2.71, + "learning_rate": 6.363672891281856e-05, + "loss": 8.1893, + "step": 36700 + }, + { + "epoch": 2.71, + "learning_rate": 6.35865034002672e-05, + "loss": 8.189, + "step": 36750 + }, + { + "epoch": 2.71, + "learning_rate": 6.353627788771585e-05, + "loss": 8.2979, + "step": 36800 + }, + { + "epoch": 2.72, + "learning_rate": 6.348605237516449e-05, + "loss": 8.3904, + "step": 36850 + }, + { + "epoch": 2.72, + "learning_rate": 6.343582686261313e-05, + "loss": 9.3274, + "step": 36900 + }, + { + "epoch": 2.73, + "learning_rate": 6.338560135006179e-05, + "loss": 7.7663, + "step": 36950 + }, + { + "epoch": 2.73, + "learning_rate": 6.333537583751042e-05, + "loss": 8.2105, + "step": 37000 + }, + { + "epoch": 2.73, + "learning_rate": 6.328515032495907e-05, + "loss": 8.035, + "step": 37050 + }, + { + "epoch": 2.74, + "learning_rate": 6.323492481240771e-05, + "loss": 9.5032, + "step": 37100 + }, + { + "epoch": 2.74, + "learning_rate": 6.318469929985635e-05, + "loss": 8.3856, + "step": 37150 + }, + { + "epoch": 2.74, + "learning_rate": 6.3134473787305e-05, + "loss": 8.9941, + "step": 37200 + }, + { + "epoch": 2.75, + "learning_rate": 6.308424827475364e-05, + "loss": 8.3987, + "step": 37250 + }, + { + "epoch": 2.75, + "learning_rate": 6.30340227622023e-05, + "loss": 9.1753, + "step": 37300 + }, + { + "epoch": 2.76, + "learning_rate": 6.298379724965093e-05, + "loss": 7.9557, + "step": 37350 + }, + { + "epoch": 2.76, + "learning_rate": 6.293357173709959e-05, + "loss": 7.725, + "step": 37400 + }, + { + "epoch": 2.76, + "learning_rate": 6.288334622454822e-05, + "loss": 8.0807, + "step": 37450 + }, + { + "epoch": 2.77, + "learning_rate": 6.283312071199688e-05, + "loss": 8.6492, + "step": 37500 + }, + { + "epoch": 2.77, + "learning_rate": 6.27828951994455e-05, + "loss": 8.4716, + "step": 37550 + }, + { + "epoch": 2.77, + "learning_rate": 6.273266968689416e-05, + "loss": 8.7209, + "step": 37600 + }, + { + "epoch": 2.78, + "learning_rate": 6.268244417434281e-05, + "loss": 8.4902, + "step": 37650 + }, + { + "epoch": 2.78, + "learning_rate": 6.263221866179144e-05, + "loss": 7.9589, + "step": 37700 + }, + { + "epoch": 2.78, + "learning_rate": 6.25819931492401e-05, + "loss": 9.3285, + "step": 37750 + }, + { + "epoch": 2.79, + "learning_rate": 6.253176763668873e-05, + "loss": 9.0506, + "step": 37800 + }, + { + "epoch": 2.79, + "learning_rate": 6.248154212413738e-05, + "loss": 7.9992, + "step": 37850 + }, + { + "epoch": 2.8, + "learning_rate": 6.243131661158603e-05, + "loss": 8.029, + "step": 37900 + }, + { + "epoch": 2.8, + "learning_rate": 6.238109109903467e-05, + "loss": 8.6681, + "step": 37950 + }, + { + "epoch": 2.8, + "learning_rate": 6.233086558648332e-05, + "loss": 8.5906, + "step": 38000 + }, + { + "epoch": 2.81, + "learning_rate": 6.228064007393196e-05, + "loss": 10.4719, + "step": 38050 + }, + { + "epoch": 2.81, + "learning_rate": 6.22304145613806e-05, + "loss": 8.2759, + "step": 38100 + }, + { + "epoch": 2.81, + "learning_rate": 6.218018904882925e-05, + "loss": 8.2633, + "step": 38150 + }, + { + "epoch": 2.82, + "learning_rate": 6.212996353627789e-05, + "loss": 8.5218, + "step": 38200 + }, + { + "epoch": 2.82, + "learning_rate": 6.207973802372654e-05, + "loss": 8.0609, + "step": 38250 + }, + { + "epoch": 2.83, + "learning_rate": 6.202951251117518e-05, + "loss": 9.3672, + "step": 38300 + }, + { + "epoch": 2.83, + "learning_rate": 6.197928699862382e-05, + "loss": 10.1768, + "step": 38350 + }, + { + "epoch": 2.83, + "learning_rate": 6.192906148607247e-05, + "loss": 9.4389, + "step": 38400 + }, + { + "epoch": 2.84, + "learning_rate": 6.187883597352111e-05, + "loss": 7.6737, + "step": 38450 + }, + { + "epoch": 2.84, + "learning_rate": 6.182861046096976e-05, + "loss": 9.2337, + "step": 38500 + }, + { + "epoch": 2.84, + "learning_rate": 6.17783849484184e-05, + "loss": 8.7846, + "step": 38550 + }, + { + "epoch": 2.85, + "learning_rate": 6.172815943586704e-05, + "loss": 7.8709, + "step": 38600 + }, + { + "epoch": 2.85, + "learning_rate": 6.167793392331569e-05, + "loss": 8.8688, + "step": 38650 + }, + { + "epoch": 2.85, + "learning_rate": 6.162770841076435e-05, + "loss": 8.4087, + "step": 38700 + }, + { + "epoch": 2.86, + "learning_rate": 6.157748289821298e-05, + "loss": 7.7129, + "step": 38750 + }, + { + "epoch": 2.86, + "learning_rate": 6.152725738566162e-05, + "loss": 9.3196, + "step": 38800 + }, + { + "epoch": 2.87, + "learning_rate": 6.147703187311027e-05, + "loss": 8.8242, + "step": 38850 + }, + { + "epoch": 2.87, + "learning_rate": 6.142680636055891e-05, + "loss": 8.4237, + "step": 38900 + }, + { + "epoch": 2.87, + "learning_rate": 6.137658084800755e-05, + "loss": 8.9383, + "step": 38950 + }, + { + "epoch": 2.88, + "learning_rate": 6.13263553354562e-05, + "loss": 8.3749, + "step": 39000 + }, + { + "epoch": 2.88, + "learning_rate": 6.127612982290485e-05, + "loss": 8.8894, + "step": 39050 + }, + { + "epoch": 2.88, + "learning_rate": 6.122590431035349e-05, + "loss": 8.2975, + "step": 39100 + }, + { + "epoch": 2.89, + "learning_rate": 6.117567879780214e-05, + "loss": 8.0517, + "step": 39150 + }, + { + "epoch": 2.89, + "learning_rate": 6.112545328525077e-05, + "loss": 8.0154, + "step": 39200 + }, + { + "epoch": 2.9, + "learning_rate": 6.107522777269943e-05, + "loss": 8.4887, + "step": 39250 + }, + { + "epoch": 2.9, + "learning_rate": 6.102500226014807e-05, + "loss": 8.7064, + "step": 39300 + }, + { + "epoch": 2.9, + "learning_rate": 6.0974776747596706e-05, + "loss": 9.7375, + "step": 39350 + }, + { + "epoch": 2.91, + "learning_rate": 6.0924551235045357e-05, + "loss": 8.8614, + "step": 39400 + }, + { + "epoch": 2.91, + "learning_rate": 6.0874325722493994e-05, + "loss": 8.302, + "step": 39450 + }, + { + "epoch": 2.91, + "learning_rate": 6.0824100209942645e-05, + "loss": 7.8469, + "step": 39500 + }, + { + "epoch": 2.92, + "learning_rate": 6.077387469739129e-05, + "loss": 9.0706, + "step": 39550 + }, + { + "epoch": 2.92, + "learning_rate": 6.072364918483994e-05, + "loss": 9.1398, + "step": 39600 + }, + { + "epoch": 2.92, + "learning_rate": 6.067342367228858e-05, + "loss": 8.1838, + "step": 39650 + }, + { + "epoch": 2.93, + "learning_rate": 6.062319815973723e-05, + "loss": 9.2303, + "step": 39700 + }, + { + "epoch": 2.93, + "learning_rate": 6.0572972647185865e-05, + "loss": 8.3715, + "step": 39750 + }, + { + "epoch": 2.94, + "learning_rate": 6.0522747134634516e-05, + "loss": 8.409, + "step": 39800 + }, + { + "epoch": 2.94, + "learning_rate": 6.047252162208315e-05, + "loss": 8.6441, + "step": 39850 + }, + { + "epoch": 2.94, + "learning_rate": 6.04222961095318e-05, + "loss": 9.0975, + "step": 39900 + }, + { + "epoch": 2.95, + "learning_rate": 6.037207059698045e-05, + "loss": 8.0691, + "step": 39950 + }, + { + "epoch": 2.95, + "learning_rate": 6.0321845084429085e-05, + "loss": 8.6646, + "step": 40000 + }, + { + "epoch": 2.95, + "eval_loss": 8.163222312927246, + "eval_runtime": 957.6189, + "eval_samples_per_second": 13.678, + "eval_steps_per_second": 3.42, + "eval_wer": 0.22493805384066187, + "step": 40000 + }, + { + "epoch": 2.95, + "learning_rate": 6.0271619571877736e-05, + "loss": 8.4278, + "step": 40050 + }, + { + "epoch": 2.96, + "learning_rate": 6.022139405932637e-05, + "loss": 8.1656, + "step": 40100 + }, + { + "epoch": 2.96, + "learning_rate": 6.0171168546775024e-05, + "loss": 7.7975, + "step": 40150 + }, + { + "epoch": 2.97, + "learning_rate": 6.012094303422366e-05, + "loss": 7.5465, + "step": 40200 + }, + { + "epoch": 2.97, + "learning_rate": 6.007071752167231e-05, + "loss": 8.3986, + "step": 40250 + }, + { + "epoch": 2.97, + "learning_rate": 6.0020492009120956e-05, + "loss": 8.3762, + "step": 40300 + }, + { + "epoch": 2.98, + "learning_rate": 5.997026649656961e-05, + "loss": 8.6175, + "step": 40350 + }, + { + "epoch": 2.98, + "learning_rate": 5.9920040984018244e-05, + "loss": 8.5622, + "step": 40400 + }, + { + "epoch": 2.98, + "learning_rate": 5.9869815471466895e-05, + "loss": 8.1824, + "step": 40450 + }, + { + "epoch": 2.99, + "learning_rate": 5.981958995891553e-05, + "loss": 7.2886, + "step": 40500 + }, + { + "epoch": 2.99, + "learning_rate": 5.9769364446364177e-05, + "loss": 8.3469, + "step": 40550 + }, + { + "epoch": 2.99, + "learning_rate": 5.971913893381282e-05, + "loss": 8.6257, + "step": 40600 + }, + { + "epoch": 3.0, + "learning_rate": 5.9668913421261465e-05, + "loss": 7.7071, + "step": 40650 + }, + { + "epoch": 3.0, + "learning_rate": 5.9618687908710116e-05, + "loss": 7.8413, + "step": 40700 + }, + { + "epoch": 3.01, + "learning_rate": 5.956846239615875e-05, + "loss": 7.6704, + "step": 40750 + }, + { + "epoch": 3.01, + "learning_rate": 5.9518236883607404e-05, + "loss": 7.3902, + "step": 40800 + }, + { + "epoch": 3.01, + "learning_rate": 5.946801137105604e-05, + "loss": 8.3296, + "step": 40850 + }, + { + "epoch": 3.02, + "learning_rate": 5.941778585850469e-05, + "loss": 7.0884, + "step": 40900 + }, + { + "epoch": 3.02, + "learning_rate": 5.9367560345953336e-05, + "loss": 7.043, + "step": 40950 + }, + { + "epoch": 3.02, + "learning_rate": 5.931733483340198e-05, + "loss": 7.5367, + "step": 41000 + }, + { + "epoch": 3.03, + "learning_rate": 5.9267109320850624e-05, + "loss": 8.3064, + "step": 41050 + }, + { + "epoch": 3.03, + "learning_rate": 5.921688380829926e-05, + "loss": 7.6769, + "step": 41100 + }, + { + "epoch": 3.04, + "learning_rate": 5.916665829574791e-05, + "loss": 9.067, + "step": 41150 + }, + { + "epoch": 3.04, + "learning_rate": 5.911643278319655e-05, + "loss": 8.3565, + "step": 41200 + }, + { + "epoch": 3.04, + "learning_rate": 5.90662072706452e-05, + "loss": 7.8335, + "step": 41250 + }, + { + "epoch": 3.05, + "learning_rate": 5.9015981758093844e-05, + "loss": 7.9617, + "step": 41300 + }, + { + "epoch": 3.05, + "learning_rate": 5.8965756245542495e-05, + "loss": 8.6728, + "step": 41350 + }, + { + "epoch": 3.05, + "learning_rate": 5.891553073299113e-05, + "loss": 7.9142, + "step": 41400 + }, + { + "epoch": 3.06, + "learning_rate": 5.886530522043978e-05, + "loss": 7.7702, + "step": 41450 + }, + { + "epoch": 3.06, + "learning_rate": 5.881507970788842e-05, + "loss": 8.2997, + "step": 41500 + }, + { + "epoch": 3.06, + "learning_rate": 5.876485419533707e-05, + "loss": 8.1519, + "step": 41550 + }, + { + "epoch": 3.07, + "learning_rate": 5.871462868278571e-05, + "loss": 7.3762, + "step": 41600 + }, + { + "epoch": 3.07, + "learning_rate": 5.866440317023435e-05, + "loss": 7.5129, + "step": 41650 + }, + { + "epoch": 3.08, + "learning_rate": 5.8614177657683e-05, + "loss": 8.2537, + "step": 41700 + }, + { + "epoch": 3.08, + "learning_rate": 5.856395214513164e-05, + "loss": 8.4148, + "step": 41750 + }, + { + "epoch": 3.08, + "learning_rate": 5.851372663258029e-05, + "loss": 7.1737, + "step": 41800 + }, + { + "epoch": 3.09, + "learning_rate": 5.846350112002893e-05, + "loss": 7.2628, + "step": 41850 + }, + { + "epoch": 3.09, + "learning_rate": 5.841327560747758e-05, + "loss": 7.2933, + "step": 41900 + }, + { + "epoch": 3.09, + "learning_rate": 5.836305009492622e-05, + "loss": 7.7675, + "step": 41950 + }, + { + "epoch": 3.1, + "learning_rate": 5.831282458237487e-05, + "loss": 8.2344, + "step": 42000 + }, + { + "epoch": 3.1, + "learning_rate": 5.826259906982351e-05, + "loss": 7.1329, + "step": 42050 + }, + { + "epoch": 3.11, + "learning_rate": 5.821237355727216e-05, + "loss": 7.3924, + "step": 42100 + }, + { + "epoch": 3.11, + "learning_rate": 5.81621480447208e-05, + "loss": 6.6189, + "step": 42150 + }, + { + "epoch": 3.11, + "learning_rate": 5.811192253216944e-05, + "loss": 7.3457, + "step": 42200 + }, + { + "epoch": 3.12, + "learning_rate": 5.806169701961809e-05, + "loss": 8.9924, + "step": 42250 + }, + { + "epoch": 3.12, + "learning_rate": 5.8011471507066725e-05, + "loss": 7.6315, + "step": 42300 + }, + { + "epoch": 3.12, + "learning_rate": 5.7961245994515376e-05, + "loss": 8.4726, + "step": 42350 + }, + { + "epoch": 3.13, + "learning_rate": 5.791102048196402e-05, + "loss": 7.1755, + "step": 42400 + }, + { + "epoch": 3.13, + "learning_rate": 5.786079496941267e-05, + "loss": 7.5716, + "step": 42450 + }, + { + "epoch": 3.13, + "learning_rate": 5.781056945686131e-05, + "loss": 7.938, + "step": 42500 + }, + { + "epoch": 3.14, + "learning_rate": 5.776034394430996e-05, + "loss": 7.3833, + "step": 42550 + }, + { + "epoch": 3.14, + "learning_rate": 5.7710118431758596e-05, + "loss": 6.4276, + "step": 42600 + }, + { + "epoch": 3.15, + "learning_rate": 5.765989291920725e-05, + "loss": 6.8907, + "step": 42650 + }, + { + "epoch": 3.15, + "learning_rate": 5.7609667406655884e-05, + "loss": 7.7592, + "step": 42700 + }, + { + "epoch": 3.15, + "learning_rate": 5.755944189410453e-05, + "loss": 7.4997, + "step": 42750 + }, + { + "epoch": 3.16, + "learning_rate": 5.750921638155318e-05, + "loss": 7.2821, + "step": 42800 + }, + { + "epoch": 3.16, + "learning_rate": 5.745899086900182e-05, + "loss": 7.4861, + "step": 42850 + }, + { + "epoch": 3.16, + "learning_rate": 5.740876535645047e-05, + "loss": 7.9266, + "step": 42900 + }, + { + "epoch": 3.17, + "learning_rate": 5.7358539843899105e-05, + "loss": 7.6244, + "step": 42950 + }, + { + "epoch": 3.17, + "learning_rate": 5.7308314331347756e-05, + "loss": 7.382, + "step": 43000 + }, + { + "epoch": 3.18, + "learning_rate": 5.725808881879639e-05, + "loss": 8.1925, + "step": 43050 + }, + { + "epoch": 3.18, + "learning_rate": 5.7207863306245044e-05, + "loss": 8.3185, + "step": 43100 + }, + { + "epoch": 3.18, + "learning_rate": 5.715763779369369e-05, + "loss": 7.091, + "step": 43150 + }, + { + "epoch": 3.19, + "learning_rate": 5.710741228114234e-05, + "loss": 7.8352, + "step": 43200 + }, + { + "epoch": 3.19, + "learning_rate": 5.7057186768590976e-05, + "loss": 6.6085, + "step": 43250 + }, + { + "epoch": 3.19, + "learning_rate": 5.700696125603963e-05, + "loss": 7.8052, + "step": 43300 + }, + { + "epoch": 3.2, + "learning_rate": 5.6956735743488264e-05, + "loss": 8.1999, + "step": 43350 + }, + { + "epoch": 3.2, + "learning_rate": 5.690651023093691e-05, + "loss": 7.2801, + "step": 43400 + }, + { + "epoch": 3.2, + "learning_rate": 5.685628471838555e-05, + "loss": 7.6289, + "step": 43450 + }, + { + "epoch": 3.21, + "learning_rate": 5.6806059205834196e-05, + "loss": 6.8215, + "step": 43500 + }, + { + "epoch": 3.21, + "learning_rate": 5.675583369328285e-05, + "loss": 7.1678, + "step": 43550 + }, + { + "epoch": 3.22, + "learning_rate": 5.6705608180731484e-05, + "loss": 7.6612, + "step": 43600 + }, + { + "epoch": 3.22, + "learning_rate": 5.6655382668180135e-05, + "loss": 7.8899, + "step": 43650 + }, + { + "epoch": 3.22, + "learning_rate": 5.660515715562877e-05, + "loss": 7.8546, + "step": 43700 + }, + { + "epoch": 3.23, + "learning_rate": 5.655493164307742e-05, + "loss": 7.319, + "step": 43750 + }, + { + "epoch": 3.23, + "learning_rate": 5.650470613052607e-05, + "loss": 7.3317, + "step": 43800 + }, + { + "epoch": 3.23, + "learning_rate": 5.645448061797471e-05, + "loss": 7.8875, + "step": 43850 + }, + { + "epoch": 3.24, + "learning_rate": 5.6404255105423355e-05, + "loss": 7.8145, + "step": 43900 + }, + { + "epoch": 3.24, + "learning_rate": 5.635402959287199e-05, + "loss": 7.0667, + "step": 43950 + }, + { + "epoch": 3.25, + "learning_rate": 5.6303804080320643e-05, + "loss": 7.7603, + "step": 44000 + }, + { + "epoch": 3.25, + "learning_rate": 5.625357856776928e-05, + "loss": 7.6111, + "step": 44050 + }, + { + "epoch": 3.25, + "learning_rate": 5.620335305521793e-05, + "loss": 7.9858, + "step": 44100 + }, + { + "epoch": 3.26, + "learning_rate": 5.6153127542666576e-05, + "loss": 8.9896, + "step": 44150 + }, + { + "epoch": 3.26, + "learning_rate": 5.6102902030115226e-05, + "loss": 8.4081, + "step": 44200 + }, + { + "epoch": 3.26, + "learning_rate": 5.6052676517563864e-05, + "loss": 7.4748, + "step": 44250 + }, + { + "epoch": 3.27, + "learning_rate": 5.6002451005012515e-05, + "loss": 8.2133, + "step": 44300 + }, + { + "epoch": 3.27, + "learning_rate": 5.595222549246115e-05, + "loss": 7.3073, + "step": 44350 + }, + { + "epoch": 3.28, + "learning_rate": 5.59019999799098e-05, + "loss": 7.9638, + "step": 44400 + }, + { + "epoch": 3.28, + "learning_rate": 5.585177446735844e-05, + "loss": 7.9653, + "step": 44450 + }, + { + "epoch": 3.28, + "learning_rate": 5.5801548954807084e-05, + "loss": 7.8583, + "step": 44500 + }, + { + "epoch": 3.29, + "learning_rate": 5.5751323442255735e-05, + "loss": 8.0561, + "step": 44550 + }, + { + "epoch": 3.29, + "learning_rate": 5.570109792970437e-05, + "loss": 8.1276, + "step": 44600 + }, + { + "epoch": 3.29, + "learning_rate": 5.565087241715302e-05, + "loss": 7.7357, + "step": 44650 + }, + { + "epoch": 3.3, + "learning_rate": 5.560064690460166e-05, + "loss": 7.7529, + "step": 44700 + }, + { + "epoch": 3.3, + "learning_rate": 5.555042139205031e-05, + "loss": 7.2583, + "step": 44750 + }, + { + "epoch": 3.3, + "learning_rate": 5.550019587949895e-05, + "loss": 6.4675, + "step": 44800 + }, + { + "epoch": 3.31, + "learning_rate": 5.54499703669476e-05, + "loss": 7.3658, + "step": 44850 + }, + { + "epoch": 3.31, + "learning_rate": 5.539974485439624e-05, + "loss": 8.278, + "step": 44900 + }, + { + "epoch": 3.32, + "learning_rate": 5.5349519341844894e-05, + "loss": 7.3867, + "step": 44950 + }, + { + "epoch": 3.32, + "learning_rate": 5.529929382929353e-05, + "loss": 7.4187, + "step": 45000 + }, + { + "epoch": 3.32, + "learning_rate": 5.524906831674217e-05, + "loss": 7.5281, + "step": 45050 + }, + { + "epoch": 3.33, + "learning_rate": 5.519884280419082e-05, + "loss": 7.8815, + "step": 45100 + }, + { + "epoch": 3.33, + "learning_rate": 5.514861729163946e-05, + "loss": 7.2487, + "step": 45150 + }, + { + "epoch": 3.33, + "learning_rate": 5.509839177908811e-05, + "loss": 8.3441, + "step": 45200 + }, + { + "epoch": 3.34, + "learning_rate": 5.504816626653675e-05, + "loss": 7.4892, + "step": 45250 + }, + { + "epoch": 3.34, + "learning_rate": 5.49979407539854e-05, + "loss": 7.7789, + "step": 45300 + }, + { + "epoch": 3.35, + "learning_rate": 5.494771524143404e-05, + "loss": 7.3951, + "step": 45350 + }, + { + "epoch": 3.35, + "learning_rate": 5.489748972888269e-05, + "loss": 7.8756, + "step": 45400 + }, + { + "epoch": 3.35, + "learning_rate": 5.484726421633133e-05, + "loss": 7.9274, + "step": 45450 + }, + { + "epoch": 3.36, + "learning_rate": 5.479703870377998e-05, + "loss": 8.1525, + "step": 45500 + }, + { + "epoch": 3.36, + "learning_rate": 5.4746813191228616e-05, + "loss": 7.5597, + "step": 45550 + }, + { + "epoch": 3.36, + "learning_rate": 5.469658767867726e-05, + "loss": 7.8939, + "step": 45600 + }, + { + "epoch": 3.37, + "learning_rate": 5.464636216612591e-05, + "loss": 6.1451, + "step": 45650 + }, + { + "epoch": 3.37, + "learning_rate": 5.459613665357455e-05, + "loss": 7.224, + "step": 45700 + }, + { + "epoch": 3.37, + "learning_rate": 5.45459111410232e-05, + "loss": 7.2489, + "step": 45750 + }, + { + "epoch": 3.38, + "learning_rate": 5.4495685628471836e-05, + "loss": 7.4162, + "step": 45800 + }, + { + "epoch": 3.38, + "learning_rate": 5.444546011592049e-05, + "loss": 6.8503, + "step": 45850 + }, + { + "epoch": 3.39, + "learning_rate": 5.4395234603369124e-05, + "loss": 6.7087, + "step": 45900 + }, + { + "epoch": 3.39, + "learning_rate": 5.4345009090817775e-05, + "loss": 6.9697, + "step": 45950 + }, + { + "epoch": 3.39, + "learning_rate": 5.429478357826642e-05, + "loss": 7.8369, + "step": 46000 + }, + { + "epoch": 3.4, + "learning_rate": 5.424455806571507e-05, + "loss": 7.7567, + "step": 46050 + }, + { + "epoch": 3.4, + "learning_rate": 5.419433255316371e-05, + "loss": 6.6241, + "step": 46100 + }, + { + "epoch": 3.4, + "learning_rate": 5.414410704061236e-05, + "loss": 7.5218, + "step": 46150 + }, + { + "epoch": 3.41, + "learning_rate": 5.4093881528060995e-05, + "loss": 7.2338, + "step": 46200 + }, + { + "epoch": 3.41, + "learning_rate": 5.404365601550964e-05, + "loss": 7.0707, + "step": 46250 + }, + { + "epoch": 3.42, + "learning_rate": 5.3993430502958283e-05, + "loss": 7.6922, + "step": 46300 + }, + { + "epoch": 3.42, + "learning_rate": 5.394320499040693e-05, + "loss": 8.6056, + "step": 46350 + }, + { + "epoch": 3.42, + "learning_rate": 5.389297947785558e-05, + "loss": 7.4641, + "step": 46400 + }, + { + "epoch": 3.43, + "learning_rate": 5.3842753965304216e-05, + "loss": 7.1716, + "step": 46450 + }, + { + "epoch": 3.43, + "learning_rate": 5.3792528452752866e-05, + "loss": 7.6382, + "step": 46500 + }, + { + "epoch": 3.43, + "learning_rate": 5.3742302940201504e-05, + "loss": 7.0739, + "step": 46550 + }, + { + "epoch": 3.44, + "learning_rate": 5.3692077427650155e-05, + "loss": 7.8667, + "step": 46600 + }, + { + "epoch": 3.44, + "learning_rate": 5.36418519150988e-05, + "loss": 7.559, + "step": 46650 + }, + { + "epoch": 3.44, + "learning_rate": 5.359162640254744e-05, + "loss": 7.6078, + "step": 46700 + }, + { + "epoch": 3.45, + "learning_rate": 5.354140088999609e-05, + "loss": 7.7994, + "step": 46750 + }, + { + "epoch": 3.45, + "learning_rate": 5.3491175377444724e-05, + "loss": 7.0418, + "step": 46800 + }, + { + "epoch": 3.46, + "learning_rate": 5.3440949864893375e-05, + "loss": 7.3261, + "step": 46850 + }, + { + "epoch": 3.46, + "learning_rate": 5.339072435234201e-05, + "loss": 7.9914, + "step": 46900 + }, + { + "epoch": 3.46, + "learning_rate": 5.334049883979066e-05, + "loss": 7.1998, + "step": 46950 + }, + { + "epoch": 3.47, + "learning_rate": 5.329027332723931e-05, + "loss": 7.3343, + "step": 47000 + }, + { + "epoch": 3.47, + "learning_rate": 5.324004781468796e-05, + "loss": 8.1604, + "step": 47050 + }, + { + "epoch": 3.47, + "learning_rate": 5.3189822302136595e-05, + "loss": 7.5405, + "step": 47100 + }, + { + "epoch": 3.48, + "learning_rate": 5.3139596789585246e-05, + "loss": 7.9409, + "step": 47150 + }, + { + "epoch": 3.48, + "learning_rate": 5.308937127703388e-05, + "loss": 8.0573, + "step": 47200 + }, + { + "epoch": 3.49, + "learning_rate": 5.3039145764482534e-05, + "loss": 7.2927, + "step": 47250 + }, + { + "epoch": 3.49, + "learning_rate": 5.298892025193117e-05, + "loss": 6.9476, + "step": 47300 + }, + { + "epoch": 3.49, + "learning_rate": 5.2938694739379815e-05, + "loss": 7.1999, + "step": 47350 + }, + { + "epoch": 3.5, + "learning_rate": 5.2888469226828466e-05, + "loss": 7.8224, + "step": 47400 + }, + { + "epoch": 3.5, + "learning_rate": 5.2838243714277103e-05, + "loss": 8.1369, + "step": 47450 + }, + { + "epoch": 3.5, + "learning_rate": 5.2788018201725754e-05, + "loss": 6.7302, + "step": 47500 + }, + { + "epoch": 3.51, + "learning_rate": 5.273779268917439e-05, + "loss": 8.0819, + "step": 47550 + }, + { + "epoch": 3.51, + "learning_rate": 5.268756717662304e-05, + "loss": 7.8832, + "step": 47600 + }, + { + "epoch": 3.51, + "learning_rate": 5.263734166407168e-05, + "loss": 8.4479, + "step": 47650 + }, + { + "epoch": 3.52, + "learning_rate": 5.258711615152033e-05, + "loss": 7.7838, + "step": 47700 + }, + { + "epoch": 3.52, + "learning_rate": 5.2536890638968975e-05, + "loss": 8.3843, + "step": 47750 + }, + { + "epoch": 3.53, + "learning_rate": 5.2486665126417625e-05, + "loss": 6.9055, + "step": 47800 + }, + { + "epoch": 3.53, + "learning_rate": 5.243643961386626e-05, + "loss": 6.6339, + "step": 47850 + }, + { + "epoch": 3.53, + "learning_rate": 5.23862141013149e-05, + "loss": 7.0316, + "step": 47900 + }, + { + "epoch": 3.54, + "learning_rate": 5.233598858876355e-05, + "loss": 7.4569, + "step": 47950 + }, + { + "epoch": 3.54, + "learning_rate": 5.228576307621219e-05, + "loss": 7.6204, + "step": 48000 + }, + { + "epoch": 3.54, + "learning_rate": 5.223553756366084e-05, + "loss": 7.1085, + "step": 48050 + }, + { + "epoch": 3.55, + "learning_rate": 5.218531205110948e-05, + "loss": 7.7254, + "step": 48100 + }, + { + "epoch": 3.55, + "learning_rate": 5.2135086538558134e-05, + "loss": 7.1486, + "step": 48150 + }, + { + "epoch": 3.56, + "learning_rate": 5.208486102600677e-05, + "loss": 6.9297, + "step": 48200 + }, + { + "epoch": 3.56, + "learning_rate": 5.203463551345542e-05, + "loss": 7.5314, + "step": 48250 + }, + { + "epoch": 3.56, + "learning_rate": 5.198441000090406e-05, + "loss": 7.68, + "step": 48300 + }, + { + "epoch": 3.57, + "learning_rate": 5.193418448835271e-05, + "loss": 7.9467, + "step": 48350 + }, + { + "epoch": 3.57, + "learning_rate": 5.188395897580135e-05, + "loss": 6.7188, + "step": 48400 + }, + { + "epoch": 3.57, + "learning_rate": 5.183373346325e-05, + "loss": 7.7619, + "step": 48450 + }, + { + "epoch": 3.58, + "learning_rate": 5.178350795069864e-05, + "loss": 7.6537, + "step": 48500 + }, + { + "epoch": 3.58, + "learning_rate": 5.173328243814728e-05, + "loss": 6.9593, + "step": 48550 + }, + { + "epoch": 3.58, + "learning_rate": 5.168305692559593e-05, + "loss": 7.4834, + "step": 48600 + }, + { + "epoch": 3.59, + "learning_rate": 5.163283141304457e-05, + "loss": 8.2864, + "step": 48650 + }, + { + "epoch": 3.59, + "learning_rate": 5.158260590049322e-05, + "loss": 7.234, + "step": 48700 + }, + { + "epoch": 3.6, + "learning_rate": 5.1532380387941856e-05, + "loss": 7.2513, + "step": 48750 + }, + { + "epoch": 3.6, + "learning_rate": 5.1482154875390506e-05, + "loss": 7.508, + "step": 48800 + }, + { + "epoch": 3.6, + "learning_rate": 5.143192936283915e-05, + "loss": 7.1513, + "step": 48850 + }, + { + "epoch": 3.61, + "learning_rate": 5.13817038502878e-05, + "loss": 7.8882, + "step": 48900 + }, + { + "epoch": 3.61, + "learning_rate": 5.133147833773644e-05, + "loss": 8.0859, + "step": 48950 + }, + { + "epoch": 3.61, + "learning_rate": 5.128125282518509e-05, + "loss": 7.5506, + "step": 49000 + }, + { + "epoch": 3.62, + "learning_rate": 5.123102731263373e-05, + "loss": 7.9777, + "step": 49050 + }, + { + "epoch": 3.62, + "learning_rate": 5.118080180008237e-05, + "loss": 8.3599, + "step": 49100 + }, + { + "epoch": 3.63, + "learning_rate": 5.1130576287531015e-05, + "loss": 7.105, + "step": 49150 + }, + { + "epoch": 3.63, + "learning_rate": 5.108035077497966e-05, + "loss": 8.2692, + "step": 49200 + }, + { + "epoch": 3.63, + "learning_rate": 5.103012526242831e-05, + "loss": 7.9098, + "step": 49250 + }, + { + "epoch": 3.64, + "learning_rate": 5.097989974987695e-05, + "loss": 7.1698, + "step": 49300 + }, + { + "epoch": 3.64, + "learning_rate": 5.09296742373256e-05, + "loss": 7.406, + "step": 49350 + }, + { + "epoch": 3.64, + "learning_rate": 5.0879448724774235e-05, + "loss": 8.3276, + "step": 49400 + }, + { + "epoch": 3.65, + "learning_rate": 5.0829223212222886e-05, + "loss": 7.5714, + "step": 49450 + }, + { + "epoch": 3.65, + "learning_rate": 5.077899769967153e-05, + "loss": 7.0839, + "step": 49500 + }, + { + "epoch": 3.65, + "learning_rate": 5.0728772187120174e-05, + "loss": 7.0589, + "step": 49550 + }, + { + "epoch": 3.66, + "learning_rate": 5.067854667456882e-05, + "loss": 7.4998, + "step": 49600 + }, + { + "epoch": 3.66, + "learning_rate": 5.0628321162017455e-05, + "loss": 7.3495, + "step": 49650 + }, + { + "epoch": 3.67, + "learning_rate": 5.0578095649466106e-05, + "loss": 7.5101, + "step": 49700 + }, + { + "epoch": 3.67, + "learning_rate": 5.0527870136914743e-05, + "loss": 6.7707, + "step": 49750 + }, + { + "epoch": 3.67, + "learning_rate": 5.0477644624363394e-05, + "loss": 7.5822, + "step": 49800 + }, + { + "epoch": 3.68, + "learning_rate": 5.042741911181204e-05, + "loss": 6.5937, + "step": 49850 + }, + { + "epoch": 3.68, + "learning_rate": 5.037719359926069e-05, + "loss": 7.2497, + "step": 49900 + }, + { + "epoch": 3.68, + "learning_rate": 5.0326968086709326e-05, + "loss": 7.35, + "step": 49950 + }, + { + "epoch": 3.69, + "learning_rate": 5.027674257415798e-05, + "loss": 7.767, + "step": 50000 + }, + { + "epoch": 3.69, + "learning_rate": 5.0226517061606615e-05, + "loss": 8.3228, + "step": 50050 + }, + { + "epoch": 3.7, + "learning_rate": 5.0176291549055265e-05, + "loss": 7.6905, + "step": 50100 + }, + { + "epoch": 3.7, + "learning_rate": 5.01260660365039e-05, + "loss": 7.8275, + "step": 50150 + }, + { + "epoch": 3.7, + "learning_rate": 5.007584052395255e-05, + "loss": 8.0724, + "step": 50200 + }, + { + "epoch": 3.71, + "learning_rate": 5.00256150114012e-05, + "loss": 7.0501, + "step": 50250 + }, + { + "epoch": 3.71, + "learning_rate": 4.997538949884984e-05, + "loss": 7.4269, + "step": 50300 + }, + { + "epoch": 3.71, + "learning_rate": 4.9925163986298486e-05, + "loss": 7.5186, + "step": 50350 + }, + { + "epoch": 3.72, + "learning_rate": 4.987493847374713e-05, + "loss": 8.2606, + "step": 50400 + }, + { + "epoch": 3.72, + "learning_rate": 4.9824712961195774e-05, + "loss": 8.2097, + "step": 50450 + }, + { + "epoch": 3.73, + "learning_rate": 4.977448744864441e-05, + "loss": 7.468, + "step": 50500 + }, + { + "epoch": 3.73, + "learning_rate": 4.9724261936093055e-05, + "loss": 8.2075, + "step": 50550 + }, + { + "epoch": 3.73, + "learning_rate": 4.9674036423541706e-05, + "loss": 7.3928, + "step": 50600 + }, + { + "epoch": 3.74, + "learning_rate": 4.962381091099035e-05, + "loss": 7.2907, + "step": 50650 + }, + { + "epoch": 3.74, + "learning_rate": 4.9573585398438994e-05, + "loss": 7.706, + "step": 50700 + }, + { + "epoch": 3.74, + "learning_rate": 4.952335988588764e-05, + "loss": 7.301, + "step": 50750 + }, + { + "epoch": 3.75, + "learning_rate": 4.947313437333628e-05, + "loss": 6.9109, + "step": 50800 + }, + { + "epoch": 3.75, + "learning_rate": 4.9422908860784926e-05, + "loss": 6.6967, + "step": 50850 + }, + { + "epoch": 3.75, + "learning_rate": 4.937268334823357e-05, + "loss": 5.9484, + "step": 50900 + }, + { + "epoch": 3.76, + "learning_rate": 4.9322457835682214e-05, + "loss": 7.8288, + "step": 50950 + }, + { + "epoch": 3.76, + "learning_rate": 4.9272232323130865e-05, + "loss": 7.3987, + "step": 51000 + }, + { + "epoch": 3.77, + "learning_rate": 4.92220068105795e-05, + "loss": 7.3714, + "step": 51050 + }, + { + "epoch": 3.77, + "learning_rate": 4.9171781298028147e-05, + "loss": 7.258, + "step": 51100 + }, + { + "epoch": 3.77, + "learning_rate": 4.912155578547679e-05, + "loss": 6.8541, + "step": 51150 + }, + { + "epoch": 3.78, + "learning_rate": 4.9071330272925435e-05, + "loss": 7.085, + "step": 51200 + }, + { + "epoch": 3.78, + "learning_rate": 4.902110476037408e-05, + "loss": 6.7827, + "step": 51250 + }, + { + "epoch": 3.78, + "learning_rate": 4.897087924782273e-05, + "loss": 6.6806, + "step": 51300 + }, + { + "epoch": 3.79, + "learning_rate": 4.8920653735271374e-05, + "loss": 7.2918, + "step": 51350 + }, + { + "epoch": 3.79, + "learning_rate": 4.887042822272002e-05, + "loss": 7.9022, + "step": 51400 + }, + { + "epoch": 3.8, + "learning_rate": 4.882020271016866e-05, + "loss": 7.6094, + "step": 51450 + }, + { + "epoch": 3.8, + "learning_rate": 4.8769977197617306e-05, + "loss": 8.1048, + "step": 51500 + }, + { + "epoch": 3.8, + "learning_rate": 4.871975168506595e-05, + "loss": 6.9056, + "step": 51550 + }, + { + "epoch": 3.81, + "learning_rate": 4.866952617251459e-05, + "loss": 6.4347, + "step": 51600 + }, + { + "epoch": 3.81, + "learning_rate": 4.861930065996324e-05, + "loss": 7.307, + "step": 51650 + }, + { + "epoch": 3.81, + "learning_rate": 4.856907514741188e-05, + "loss": 7.649, + "step": 51700 + }, + { + "epoch": 3.82, + "learning_rate": 4.8518849634860526e-05, + "loss": 6.7706, + "step": 51750 + }, + { + "epoch": 3.82, + "learning_rate": 4.846862412230917e-05, + "loss": 6.7943, + "step": 51800 + }, + { + "epoch": 3.82, + "learning_rate": 4.8418398609757814e-05, + "loss": 7.654, + "step": 51850 + }, + { + "epoch": 3.83, + "learning_rate": 4.836817309720646e-05, + "loss": 7.6245, + "step": 51900 + }, + { + "epoch": 3.83, + "learning_rate": 4.83179475846551e-05, + "loss": 7.8284, + "step": 51950 + }, + { + "epoch": 3.84, + "learning_rate": 4.8267722072103746e-05, + "loss": 6.9516, + "step": 52000 + }, + { + "epoch": 3.84, + "learning_rate": 4.82174965595524e-05, + "loss": 7.1367, + "step": 52050 + }, + { + "epoch": 3.84, + "learning_rate": 4.816727104700104e-05, + "loss": 7.4153, + "step": 52100 + }, + { + "epoch": 3.85, + "learning_rate": 4.8117045534449685e-05, + "loss": 6.5358, + "step": 52150 + }, + { + "epoch": 3.85, + "learning_rate": 4.806682002189832e-05, + "loss": 7.5443, + "step": 52200 + }, + { + "epoch": 3.85, + "learning_rate": 4.8016594509346967e-05, + "loss": 7.8184, + "step": 52250 + }, + { + "epoch": 3.86, + "learning_rate": 4.796636899679561e-05, + "loss": 6.9702, + "step": 52300 + }, + { + "epoch": 3.86, + "learning_rate": 4.791614348424426e-05, + "loss": 8.3988, + "step": 52350 + }, + { + "epoch": 3.87, + "learning_rate": 4.7865917971692905e-05, + "loss": 8.1025, + "step": 52400 + }, + { + "epoch": 3.87, + "learning_rate": 4.781569245914155e-05, + "loss": 7.81, + "step": 52450 + }, + { + "epoch": 3.87, + "learning_rate": 4.7765466946590194e-05, + "loss": 6.6809, + "step": 52500 + }, + { + "epoch": 3.88, + "learning_rate": 4.771524143403884e-05, + "loss": 6.81, + "step": 52550 + }, + { + "epoch": 3.88, + "learning_rate": 4.766501592148748e-05, + "loss": 7.1717, + "step": 52600 + }, + { + "epoch": 3.88, + "learning_rate": 4.7614790408936126e-05, + "loss": 7.3114, + "step": 52650 + }, + { + "epoch": 3.89, + "learning_rate": 4.756456489638477e-05, + "loss": 7.2819, + "step": 52700 + }, + { + "epoch": 3.89, + "learning_rate": 4.7514339383833414e-05, + "loss": 6.6964, + "step": 52750 + }, + { + "epoch": 3.89, + "learning_rate": 4.746411387128206e-05, + "loss": 8.1118, + "step": 52800 + }, + { + "epoch": 3.9, + "learning_rate": 4.74138883587307e-05, + "loss": 8.1901, + "step": 52850 + }, + { + "epoch": 3.9, + "learning_rate": 4.7363662846179346e-05, + "loss": 6.8883, + "step": 52900 + }, + { + "epoch": 3.91, + "learning_rate": 4.731343733362799e-05, + "loss": 7.2554, + "step": 52950 + }, + { + "epoch": 3.91, + "learning_rate": 4.7263211821076634e-05, + "loss": 7.402, + "step": 53000 + }, + { + "epoch": 3.91, + "learning_rate": 4.721298630852528e-05, + "loss": 8.8808, + "step": 53050 + }, + { + "epoch": 3.92, + "learning_rate": 4.716276079597393e-05, + "loss": 7.1652, + "step": 53100 + }, + { + "epoch": 3.92, + "learning_rate": 4.711253528342257e-05, + "loss": 6.884, + "step": 53150 + }, + { + "epoch": 3.92, + "learning_rate": 4.706230977087122e-05, + "loss": 7.4472, + "step": 53200 + }, + { + "epoch": 3.93, + "learning_rate": 4.701208425831986e-05, + "loss": 6.8787, + "step": 53250 + }, + { + "epoch": 3.93, + "learning_rate": 4.6961858745768505e-05, + "loss": 6.9316, + "step": 53300 + }, + { + "epoch": 3.94, + "learning_rate": 4.691163323321714e-05, + "loss": 7.1614, + "step": 53350 + }, + { + "epoch": 3.94, + "learning_rate": 4.6861407720665787e-05, + "loss": 7.193, + "step": 53400 + }, + { + "epoch": 3.94, + "learning_rate": 4.681118220811444e-05, + "loss": 7.5875, + "step": 53450 + }, + { + "epoch": 3.95, + "learning_rate": 4.676095669556308e-05, + "loss": 7.0836, + "step": 53500 + }, + { + "epoch": 3.95, + "learning_rate": 4.6710731183011725e-05, + "loss": 7.2054, + "step": 53550 + }, + { + "epoch": 3.95, + "learning_rate": 4.666050567046037e-05, + "loss": 6.95, + "step": 53600 + }, + { + "epoch": 3.96, + "learning_rate": 4.6610280157909014e-05, + "loss": 6.6366, + "step": 53650 + }, + { + "epoch": 3.96, + "learning_rate": 4.656005464535766e-05, + "loss": 6.7976, + "step": 53700 + }, + { + "epoch": 3.96, + "learning_rate": 4.65098291328063e-05, + "loss": 7.1371, + "step": 53750 + }, + { + "epoch": 3.97, + "learning_rate": 4.6459603620254946e-05, + "loss": 6.7457, + "step": 53800 + }, + { + "epoch": 3.97, + "learning_rate": 4.6409378107703597e-05, + "loss": 6.6139, + "step": 53850 + }, + { + "epoch": 3.98, + "learning_rate": 4.6359152595152234e-05, + "loss": 7.9291, + "step": 53900 + }, + { + "epoch": 3.98, + "learning_rate": 4.630892708260088e-05, + "loss": 7.4235, + "step": 53950 + }, + { + "epoch": 3.98, + "learning_rate": 4.625870157004952e-05, + "loss": 6.8609, + "step": 54000 + }, + { + "epoch": 3.99, + "learning_rate": 4.6208476057498166e-05, + "loss": 6.6128, + "step": 54050 + }, + { + "epoch": 3.99, + "learning_rate": 4.615825054494681e-05, + "loss": 7.0313, + "step": 54100 + }, + { + "epoch": 3.99, + "learning_rate": 4.610802503239546e-05, + "loss": 7.3654, + "step": 54150 + }, + { + "epoch": 4.0, + "learning_rate": 4.6057799519844105e-05, + "loss": 8.266, + "step": 54200 + }, + { + "epoch": 4.0, + "learning_rate": 4.600757400729275e-05, + "loss": 7.9471, + "step": 54250 + }, + { + "epoch": 4.01, + "learning_rate": 4.595734849474139e-05, + "loss": 6.0877, + "step": 54300 + }, + { + "epoch": 4.01, + "learning_rate": 4.590712298219004e-05, + "loss": 6.7453, + "step": 54350 + }, + { + "epoch": 4.01, + "learning_rate": 4.585689746963868e-05, + "loss": 5.8985, + "step": 54400 + }, + { + "epoch": 4.02, + "learning_rate": 4.580667195708732e-05, + "loss": 7.4527, + "step": 54450 + }, + { + "epoch": 4.02, + "learning_rate": 4.575644644453597e-05, + "loss": 7.0419, + "step": 54500 + }, + { + "epoch": 4.02, + "learning_rate": 4.570622093198461e-05, + "loss": 6.281, + "step": 54550 + }, + { + "epoch": 4.03, + "learning_rate": 4.565599541943326e-05, + "loss": 6.6096, + "step": 54600 + }, + { + "epoch": 4.03, + "learning_rate": 4.56057699068819e-05, + "loss": 7.0341, + "step": 54650 + }, + { + "epoch": 4.03, + "learning_rate": 4.5555544394330545e-05, + "loss": 6.621, + "step": 54700 + }, + { + "epoch": 4.04, + "learning_rate": 4.550531888177919e-05, + "loss": 7.4405, + "step": 54750 + }, + { + "epoch": 4.04, + "learning_rate": 4.5455093369227834e-05, + "loss": 7.2506, + "step": 54800 + }, + { + "epoch": 4.05, + "learning_rate": 4.540486785667648e-05, + "loss": 7.1534, + "step": 54850 + }, + { + "epoch": 4.05, + "learning_rate": 4.535464234412513e-05, + "loss": 6.5829, + "step": 54900 + }, + { + "epoch": 4.05, + "learning_rate": 4.530441683157377e-05, + "loss": 7.0338, + "step": 54950 + }, + { + "epoch": 4.06, + "learning_rate": 4.5254191319022417e-05, + "loss": 6.6234, + "step": 55000 + }, + { + "epoch": 4.06, + "learning_rate": 4.5203965806471054e-05, + "loss": 6.2412, + "step": 55050 + }, + { + "epoch": 4.06, + "learning_rate": 4.51537402939197e-05, + "loss": 6.3439, + "step": 55100 + }, + { + "epoch": 4.07, + "learning_rate": 4.510351478136834e-05, + "loss": 6.8272, + "step": 55150 + }, + { + "epoch": 4.07, + "learning_rate": 4.5053289268816986e-05, + "loss": 6.4758, + "step": 55200 + }, + { + "epoch": 4.08, + "learning_rate": 4.500306375626564e-05, + "loss": 6.434, + "step": 55250 + }, + { + "epoch": 4.08, + "learning_rate": 4.495283824371428e-05, + "loss": 6.5471, + "step": 55300 + }, + { + "epoch": 4.08, + "learning_rate": 4.4902612731162925e-05, + "loss": 6.5088, + "step": 55350 + }, + { + "epoch": 4.09, + "learning_rate": 4.485238721861157e-05, + "loss": 6.6941, + "step": 55400 + }, + { + "epoch": 4.09, + "learning_rate": 4.480216170606021e-05, + "loss": 6.3248, + "step": 55450 + }, + { + "epoch": 4.09, + "learning_rate": 4.475193619350886e-05, + "loss": 7.2989, + "step": 55500 + }, + { + "epoch": 4.1, + "learning_rate": 4.47017106809575e-05, + "loss": 7.0947, + "step": 55550 + }, + { + "epoch": 4.1, + "learning_rate": 4.4651485168406145e-05, + "loss": 6.4896, + "step": 55600 + }, + { + "epoch": 4.1, + "learning_rate": 4.460125965585479e-05, + "loss": 5.9249, + "step": 55650 + }, + { + "epoch": 4.11, + "learning_rate": 4.455103414330343e-05, + "loss": 6.7801, + "step": 55700 + }, + { + "epoch": 4.11, + "learning_rate": 4.450080863075208e-05, + "loss": 6.2216, + "step": 55750 + }, + { + "epoch": 4.12, + "learning_rate": 4.445058311820072e-05, + "loss": 6.8346, + "step": 55800 + }, + { + "epoch": 4.12, + "learning_rate": 4.4400357605649366e-05, + "loss": 7.2863, + "step": 55850 + }, + { + "epoch": 4.12, + "learning_rate": 4.435013209309801e-05, + "loss": 7.406, + "step": 55900 + }, + { + "epoch": 4.13, + "learning_rate": 4.429990658054666e-05, + "loss": 6.0165, + "step": 55950 + }, + { + "epoch": 4.13, + "learning_rate": 4.4249681067995304e-05, + "loss": 6.8052, + "step": 56000 + }, + { + "epoch": 4.13, + "learning_rate": 4.419945555544395e-05, + "loss": 6.679, + "step": 56050 + }, + { + "epoch": 4.14, + "learning_rate": 4.414923004289259e-05, + "loss": 6.2087, + "step": 56100 + }, + { + "epoch": 4.14, + "learning_rate": 4.4099004530341237e-05, + "loss": 6.5904, + "step": 56150 + }, + { + "epoch": 4.15, + "learning_rate": 4.4048779017789874e-05, + "loss": 6.4147, + "step": 56200 + }, + { + "epoch": 4.15, + "learning_rate": 4.399855350523852e-05, + "loss": 6.6465, + "step": 56250 + }, + { + "epoch": 4.15, + "learning_rate": 4.394832799268717e-05, + "loss": 7.11, + "step": 56300 + }, + { + "epoch": 4.16, + "learning_rate": 4.389810248013581e-05, + "loss": 7.0558, + "step": 56350 + }, + { + "epoch": 4.16, + "learning_rate": 4.384787696758446e-05, + "loss": 6.922, + "step": 56400 + }, + { + "epoch": 4.16, + "learning_rate": 4.37976514550331e-05, + "loss": 7.2125, + "step": 56450 + }, + { + "epoch": 4.17, + "learning_rate": 4.3747425942481745e-05, + "loss": 6.4719, + "step": 56500 + }, + { + "epoch": 4.17, + "learning_rate": 4.369720042993039e-05, + "loss": 7.204, + "step": 56550 + }, + { + "epoch": 4.17, + "learning_rate": 4.364697491737903e-05, + "loss": 7.2371, + "step": 56600 + }, + { + "epoch": 4.18, + "learning_rate": 4.359674940482768e-05, + "loss": 6.6131, + "step": 56650 + }, + { + "epoch": 4.18, + "learning_rate": 4.354652389227633e-05, + "loss": 6.6349, + "step": 56700 + }, + { + "epoch": 4.19, + "learning_rate": 4.3496298379724965e-05, + "loss": 5.9137, + "step": 56750 + }, + { + "epoch": 4.19, + "learning_rate": 4.344607286717361e-05, + "loss": 6.5402, + "step": 56800 + }, + { + "epoch": 4.19, + "learning_rate": 4.339584735462225e-05, + "loss": 7.3351, + "step": 56850 + }, + { + "epoch": 4.2, + "learning_rate": 4.33456218420709e-05, + "loss": 8.1387, + "step": 56900 + }, + { + "epoch": 4.2, + "learning_rate": 4.329539632951954e-05, + "loss": 7.0783, + "step": 56950 + }, + { + "epoch": 4.2, + "learning_rate": 4.324517081696819e-05, + "loss": 6.947, + "step": 57000 + }, + { + "epoch": 4.21, + "learning_rate": 4.3194945304416836e-05, + "loss": 6.1526, + "step": 57050 + }, + { + "epoch": 4.21, + "learning_rate": 4.314471979186548e-05, + "loss": 7.273, + "step": 57100 + }, + { + "epoch": 4.22, + "learning_rate": 4.3094494279314124e-05, + "loss": 7.0958, + "step": 57150 + }, + { + "epoch": 4.22, + "learning_rate": 4.304426876676277e-05, + "loss": 6.4413, + "step": 57200 + }, + { + "epoch": 4.22, + "learning_rate": 4.299404325421141e-05, + "loss": 6.597, + "step": 57250 + }, + { + "epoch": 4.23, + "learning_rate": 4.294381774166006e-05, + "loss": 6.6893, + "step": 57300 + }, + { + "epoch": 4.23, + "learning_rate": 4.28935922291087e-05, + "loss": 6.4746, + "step": 57350 + }, + { + "epoch": 4.23, + "learning_rate": 4.2843366716557345e-05, + "loss": 7.376, + "step": 57400 + }, + { + "epoch": 4.24, + "learning_rate": 4.279314120400599e-05, + "loss": 7.2823, + "step": 57450 + }, + { + "epoch": 4.24, + "learning_rate": 4.274291569145463e-05, + "loss": 6.3184, + "step": 57500 + }, + { + "epoch": 4.25, + "learning_rate": 4.269269017890328e-05, + "loss": 6.4526, + "step": 57550 + }, + { + "epoch": 4.25, + "learning_rate": 4.264246466635192e-05, + "loss": 6.7892, + "step": 57600 + }, + { + "epoch": 4.25, + "learning_rate": 4.2592239153800565e-05, + "loss": 6.2082, + "step": 57650 + }, + { + "epoch": 4.26, + "learning_rate": 4.254201364124921e-05, + "loss": 7.1488, + "step": 57700 + }, + { + "epoch": 4.26, + "learning_rate": 4.249178812869786e-05, + "loss": 6.9399, + "step": 57750 + }, + { + "epoch": 4.26, + "learning_rate": 4.2441562616146504e-05, + "loss": 6.8596, + "step": 57800 + }, + { + "epoch": 4.27, + "learning_rate": 4.239133710359515e-05, + "loss": 6.8899, + "step": 57850 + }, + { + "epoch": 4.27, + "learning_rate": 4.2341111591043785e-05, + "loss": 6.8196, + "step": 57900 + }, + { + "epoch": 4.27, + "learning_rate": 4.229088607849243e-05, + "loss": 7.5114, + "step": 57950 + }, + { + "epoch": 4.28, + "learning_rate": 4.224066056594107e-05, + "loss": 6.4122, + "step": 58000 + }, + { + "epoch": 4.28, + "learning_rate": 4.219043505338972e-05, + "loss": 6.9228, + "step": 58050 + }, + { + "epoch": 4.29, + "learning_rate": 4.214020954083837e-05, + "loss": 6.3687, + "step": 58100 + }, + { + "epoch": 4.29, + "learning_rate": 4.208998402828701e-05, + "loss": 6.8616, + "step": 58150 + }, + { + "epoch": 4.29, + "learning_rate": 4.2039758515735656e-05, + "loss": 6.002, + "step": 58200 + }, + { + "epoch": 4.3, + "learning_rate": 4.19895330031843e-05, + "loss": 6.0985, + "step": 58250 + }, + { + "epoch": 4.3, + "learning_rate": 4.1939307490632944e-05, + "loss": 6.5857, + "step": 58300 + }, + { + "epoch": 4.3, + "learning_rate": 4.188908197808159e-05, + "loss": 6.257, + "step": 58350 + }, + { + "epoch": 4.31, + "learning_rate": 4.183885646553023e-05, + "loss": 6.9222, + "step": 58400 + }, + { + "epoch": 4.31, + "learning_rate": 4.178863095297888e-05, + "loss": 6.7801, + "step": 58450 + }, + { + "epoch": 4.32, + "learning_rate": 4.173840544042752e-05, + "loss": 6.3861, + "step": 58500 + }, + { + "epoch": 4.32, + "learning_rate": 4.1688179927876165e-05, + "loss": 6.8685, + "step": 58550 + }, + { + "epoch": 4.32, + "learning_rate": 4.163795441532481e-05, + "loss": 6.9948, + "step": 58600 + }, + { + "epoch": 4.33, + "learning_rate": 4.158772890277345e-05, + "loss": 6.0965, + "step": 58650 + }, + { + "epoch": 4.33, + "learning_rate": 4.15375033902221e-05, + "loss": 7.282, + "step": 58700 + }, + { + "epoch": 4.33, + "learning_rate": 4.148727787767074e-05, + "loss": 7.6165, + "step": 58750 + }, + { + "epoch": 4.34, + "learning_rate": 4.143705236511939e-05, + "loss": 6.734, + "step": 58800 + }, + { + "epoch": 4.34, + "learning_rate": 4.1386826852568036e-05, + "loss": 6.0334, + "step": 58850 + }, + { + "epoch": 4.34, + "learning_rate": 4.133660134001668e-05, + "loss": 6.5306, + "step": 58900 + }, + { + "epoch": 4.35, + "learning_rate": 4.1286375827465324e-05, + "loss": 7.4324, + "step": 58950 + }, + { + "epoch": 4.35, + "learning_rate": 4.123615031491397e-05, + "loss": 7.234, + "step": 59000 + }, + { + "epoch": 4.36, + "learning_rate": 4.1185924802362605e-05, + "loss": 6.7196, + "step": 59050 + }, + { + "epoch": 4.36, + "learning_rate": 4.113569928981125e-05, + "loss": 6.0641, + "step": 59100 + }, + { + "epoch": 4.36, + "learning_rate": 4.10854737772599e-05, + "loss": 5.9373, + "step": 59150 + }, + { + "epoch": 4.37, + "learning_rate": 4.1035248264708544e-05, + "loss": 6.4428, + "step": 59200 + }, + { + "epoch": 4.37, + "learning_rate": 4.098502275215719e-05, + "loss": 6.7303, + "step": 59250 + }, + { + "epoch": 4.37, + "learning_rate": 4.093479723960583e-05, + "loss": 6.5585, + "step": 59300 + }, + { + "epoch": 4.38, + "learning_rate": 4.0884571727054476e-05, + "loss": 5.837, + "step": 59350 + }, + { + "epoch": 4.38, + "learning_rate": 4.083434621450312e-05, + "loss": 6.574, + "step": 59400 + }, + { + "epoch": 4.39, + "learning_rate": 4.0784120701951764e-05, + "loss": 7.4319, + "step": 59450 + }, + { + "epoch": 4.39, + "learning_rate": 4.073389518940041e-05, + "loss": 6.1092, + "step": 59500 + }, + { + "epoch": 4.39, + "learning_rate": 4.068366967684906e-05, + "loss": 6.4683, + "step": 59550 + }, + { + "epoch": 4.4, + "learning_rate": 4.06334441642977e-05, + "loss": 7.1323, + "step": 59600 + }, + { + "epoch": 4.4, + "learning_rate": 4.058321865174634e-05, + "loss": 6.7011, + "step": 59650 + }, + { + "epoch": 4.4, + "learning_rate": 4.0532993139194985e-05, + "loss": 6.7281, + "step": 59700 + }, + { + "epoch": 4.41, + "learning_rate": 4.048276762664363e-05, + "loss": 6.0361, + "step": 59750 + }, + { + "epoch": 4.41, + "learning_rate": 4.043254211409227e-05, + "loss": 6.5359, + "step": 59800 + }, + { + "epoch": 4.41, + "learning_rate": 4.0382316601540924e-05, + "loss": 7.3576, + "step": 59850 + }, + { + "epoch": 4.42, + "learning_rate": 4.033209108898957e-05, + "loss": 6.7324, + "step": 59900 + }, + { + "epoch": 4.42, + "learning_rate": 4.028186557643821e-05, + "loss": 7.1445, + "step": 59950 + }, + { + "epoch": 4.43, + "learning_rate": 4.0231640063886856e-05, + "loss": 5.426, + "step": 60000 + }, + { + "epoch": 4.43, + "eval_loss": 7.799332618713379, + "eval_runtime": 963.4222, + "eval_samples_per_second": 13.595, + "eval_steps_per_second": 3.399, + "eval_wer": 0.20775061946159337, + "step": 60000 + }, + { + "epoch": 4.43, + "learning_rate": 4.01814145513355e-05, + "loss": 6.577, + "step": 60050 + }, + { + "epoch": 4.43, + "learning_rate": 4.0131189038784144e-05, + "loss": 7.1169, + "step": 60100 + }, + { + "epoch": 4.44, + "learning_rate": 4.008096352623279e-05, + "loss": 7.2535, + "step": 60150 + }, + { + "epoch": 4.44, + "learning_rate": 4.003073801368143e-05, + "loss": 6.2427, + "step": 60200 + }, + { + "epoch": 4.44, + "learning_rate": 3.9980512501130076e-05, + "loss": 6.3184, + "step": 60250 + }, + { + "epoch": 4.45, + "learning_rate": 3.993028698857872e-05, + "loss": 6.9348, + "step": 60300 + }, + { + "epoch": 4.45, + "learning_rate": 3.9880061476027364e-05, + "loss": 6.5074, + "step": 60350 + }, + { + "epoch": 4.46, + "learning_rate": 3.982983596347601e-05, + "loss": 7.2028, + "step": 60400 + }, + { + "epoch": 4.46, + "learning_rate": 3.977961045092465e-05, + "loss": 7.1185, + "step": 60450 + }, + { + "epoch": 4.46, + "learning_rate": 3.9729384938373296e-05, + "loss": 6.5089, + "step": 60500 + }, + { + "epoch": 4.47, + "learning_rate": 3.967915942582194e-05, + "loss": 5.8853, + "step": 60550 + }, + { + "epoch": 4.47, + "learning_rate": 3.962893391327059e-05, + "loss": 6.0402, + "step": 60600 + }, + { + "epoch": 4.47, + "learning_rate": 3.9578708400719235e-05, + "loss": 6.6078, + "step": 60650 + }, + { + "epoch": 4.48, + "learning_rate": 3.952848288816788e-05, + "loss": 6.4986, + "step": 60700 + }, + { + "epoch": 4.48, + "learning_rate": 3.947825737561652e-05, + "loss": 6.9922, + "step": 60750 + }, + { + "epoch": 4.48, + "learning_rate": 3.942803186306516e-05, + "loss": 6.8327, + "step": 60800 + }, + { + "epoch": 4.49, + "learning_rate": 3.9377806350513805e-05, + "loss": 6.7131, + "step": 60850 + }, + { + "epoch": 4.49, + "learning_rate": 3.932758083796245e-05, + "loss": 6.3372, + "step": 60900 + }, + { + "epoch": 4.5, + "learning_rate": 3.92773553254111e-05, + "loss": 6.5337, + "step": 60950 + }, + { + "epoch": 4.5, + "learning_rate": 3.9227129812859744e-05, + "loss": 7.5148, + "step": 61000 + }, + { + "epoch": 4.5, + "learning_rate": 3.917690430030839e-05, + "loss": 6.2416, + "step": 61050 + }, + { + "epoch": 4.51, + "learning_rate": 3.912667878775703e-05, + "loss": 6.3845, + "step": 61100 + }, + { + "epoch": 4.51, + "learning_rate": 3.9076453275205676e-05, + "loss": 7.0156, + "step": 61150 + }, + { + "epoch": 4.51, + "learning_rate": 3.902622776265432e-05, + "loss": 6.4239, + "step": 61200 + }, + { + "epoch": 4.52, + "learning_rate": 3.8976002250102964e-05, + "loss": 7.2111, + "step": 61250 + }, + { + "epoch": 4.52, + "learning_rate": 3.892577673755161e-05, + "loss": 6.5958, + "step": 61300 + }, + { + "epoch": 4.53, + "learning_rate": 3.887555122500025e-05, + "loss": 7.3125, + "step": 61350 + }, + { + "epoch": 4.53, + "learning_rate": 3.8825325712448896e-05, + "loss": 6.0597, + "step": 61400 + }, + { + "epoch": 4.53, + "learning_rate": 3.877510019989754e-05, + "loss": 6.3881, + "step": 61450 + }, + { + "epoch": 4.54, + "learning_rate": 3.8724874687346184e-05, + "loss": 6.2309, + "step": 61500 + }, + { + "epoch": 4.54, + "learning_rate": 3.867464917479483e-05, + "loss": 6.7603, + "step": 61550 + }, + { + "epoch": 4.54, + "learning_rate": 3.862442366224347e-05, + "loss": 7.386, + "step": 61600 + }, + { + "epoch": 4.55, + "learning_rate": 3.857419814969212e-05, + "loss": 7.1897, + "step": 61650 + }, + { + "epoch": 4.55, + "learning_rate": 3.852397263714077e-05, + "loss": 6.8813, + "step": 61700 + }, + { + "epoch": 4.55, + "learning_rate": 3.847374712458941e-05, + "loss": 7.9826, + "step": 61750 + }, + { + "epoch": 4.56, + "learning_rate": 3.8423521612038055e-05, + "loss": 6.4649, + "step": 61800 + }, + { + "epoch": 4.56, + "learning_rate": 3.83732960994867e-05, + "loss": 6.0005, + "step": 61850 + }, + { + "epoch": 4.57, + "learning_rate": 3.832307058693534e-05, + "loss": 6.7249, + "step": 61900 + }, + { + "epoch": 4.57, + "learning_rate": 3.827284507438398e-05, + "loss": 7.3703, + "step": 61950 + }, + { + "epoch": 4.57, + "learning_rate": 3.822261956183263e-05, + "loss": 6.3157, + "step": 62000 + }, + { + "epoch": 4.58, + "learning_rate": 3.8172394049281276e-05, + "loss": 6.3982, + "step": 62050 + }, + { + "epoch": 4.58, + "learning_rate": 3.812216853672992e-05, + "loss": 6.3995, + "step": 62100 + }, + { + "epoch": 4.58, + "learning_rate": 3.8071943024178564e-05, + "loss": 6.9792, + "step": 62150 + }, + { + "epoch": 4.59, + "learning_rate": 3.802171751162721e-05, + "loss": 7.7098, + "step": 62200 + }, + { + "epoch": 4.59, + "learning_rate": 3.797149199907585e-05, + "loss": 7.3092, + "step": 62250 + }, + { + "epoch": 4.6, + "learning_rate": 3.7921266486524496e-05, + "loss": 6.7666, + "step": 62300 + }, + { + "epoch": 4.6, + "learning_rate": 3.787104097397314e-05, + "loss": 6.1829, + "step": 62350 + }, + { + "epoch": 4.6, + "learning_rate": 3.782081546142179e-05, + "loss": 8.2604, + "step": 62400 + }, + { + "epoch": 4.61, + "learning_rate": 3.777058994887043e-05, + "loss": 6.7275, + "step": 62450 + }, + { + "epoch": 4.61, + "learning_rate": 3.772036443631907e-05, + "loss": 6.8682, + "step": 62500 + }, + { + "epoch": 4.61, + "learning_rate": 3.7670138923767716e-05, + "loss": 7.4531, + "step": 62550 + }, + { + "epoch": 4.62, + "learning_rate": 3.761991341121636e-05, + "loss": 7.4792, + "step": 62600 + }, + { + "epoch": 4.62, + "learning_rate": 3.7569687898665004e-05, + "loss": 6.3364, + "step": 62650 + }, + { + "epoch": 4.62, + "learning_rate": 3.7519462386113655e-05, + "loss": 6.395, + "step": 62700 + }, + { + "epoch": 4.63, + "learning_rate": 3.74692368735623e-05, + "loss": 6.4644, + "step": 62750 + }, + { + "epoch": 4.63, + "learning_rate": 3.741901136101094e-05, + "loss": 7.6636, + "step": 62800 + }, + { + "epoch": 4.64, + "learning_rate": 3.736878584845959e-05, + "loss": 6.5346, + "step": 62850 + }, + { + "epoch": 4.64, + "learning_rate": 3.731856033590823e-05, + "loss": 7.7544, + "step": 62900 + }, + { + "epoch": 4.64, + "learning_rate": 3.7268334823356875e-05, + "loss": 7.1518, + "step": 62950 + }, + { + "epoch": 4.65, + "learning_rate": 3.721810931080552e-05, + "loss": 5.2845, + "step": 63000 + }, + { + "epoch": 4.65, + "learning_rate": 3.7167883798254163e-05, + "loss": 6.4635, + "step": 63050 + }, + { + "epoch": 4.65, + "learning_rate": 3.711765828570281e-05, + "loss": 6.7313, + "step": 63100 + }, + { + "epoch": 4.66, + "learning_rate": 3.706743277315145e-05, + "loss": 6.2767, + "step": 63150 + }, + { + "epoch": 4.66, + "learning_rate": 3.7017207260600096e-05, + "loss": 6.2349, + "step": 63200 + }, + { + "epoch": 4.67, + "learning_rate": 3.696698174804874e-05, + "loss": 7.2152, + "step": 63250 + }, + { + "epoch": 4.67, + "learning_rate": 3.6916756235497384e-05, + "loss": 6.4904, + "step": 63300 + }, + { + "epoch": 4.67, + "learning_rate": 3.686653072294603e-05, + "loss": 6.4779, + "step": 63350 + }, + { + "epoch": 4.68, + "learning_rate": 3.681630521039467e-05, + "loss": 7.0359, + "step": 63400 + }, + { + "epoch": 4.68, + "learning_rate": 3.676607969784332e-05, + "loss": 6.3846, + "step": 63450 + }, + { + "epoch": 4.68, + "learning_rate": 3.671585418529197e-05, + "loss": 6.3923, + "step": 63500 + }, + { + "epoch": 4.69, + "learning_rate": 3.666562867274061e-05, + "loss": 6.7271, + "step": 63550 + }, + { + "epoch": 4.69, + "learning_rate": 3.661540316018925e-05, + "loss": 6.6546, + "step": 63600 + }, + { + "epoch": 4.69, + "learning_rate": 3.656517764763789e-05, + "loss": 7.1482, + "step": 63650 + }, + { + "epoch": 4.7, + "learning_rate": 3.6514952135086536e-05, + "loss": 6.4931, + "step": 63700 + }, + { + "epoch": 4.7, + "learning_rate": 3.646472662253518e-05, + "loss": 5.1346, + "step": 63750 + }, + { + "epoch": 4.71, + "learning_rate": 3.641450110998383e-05, + "loss": 6.25, + "step": 63800 + }, + { + "epoch": 4.71, + "learning_rate": 3.6364275597432475e-05, + "loss": 6.5812, + "step": 63850 + }, + { + "epoch": 4.71, + "learning_rate": 3.631405008488112e-05, + "loss": 6.4797, + "step": 63900 + }, + { + "epoch": 4.72, + "learning_rate": 3.626382457232976e-05, + "loss": 6.2886, + "step": 63950 + }, + { + "epoch": 4.72, + "learning_rate": 3.621359905977841e-05, + "loss": 6.4452, + "step": 64000 + }, + { + "epoch": 4.72, + "learning_rate": 3.616337354722705e-05, + "loss": 6.5534, + "step": 64050 + }, + { + "epoch": 4.73, + "learning_rate": 3.6113148034675695e-05, + "loss": 6.9353, + "step": 64100 + }, + { + "epoch": 4.73, + "learning_rate": 3.606292252212434e-05, + "loss": 6.1219, + "step": 64150 + }, + { + "epoch": 4.74, + "learning_rate": 3.6012697009572984e-05, + "loss": 6.8233, + "step": 64200 + }, + { + "epoch": 4.74, + "learning_rate": 3.596247149702163e-05, + "loss": 7.1924, + "step": 64250 + }, + { + "epoch": 4.74, + "learning_rate": 3.591224598447027e-05, + "loss": 7.1116, + "step": 64300 + }, + { + "epoch": 4.75, + "learning_rate": 3.5862020471918916e-05, + "loss": 7.2818, + "step": 64350 + }, + { + "epoch": 4.75, + "learning_rate": 3.581179495936756e-05, + "loss": 6.3182, + "step": 64400 + }, + { + "epoch": 4.75, + "learning_rate": 3.5761569446816204e-05, + "loss": 6.7712, + "step": 64450 + }, + { + "epoch": 4.76, + "learning_rate": 3.5711343934264855e-05, + "loss": 6.7902, + "step": 64500 + }, + { + "epoch": 4.76, + "learning_rate": 3.56611184217135e-05, + "loss": 6.5504, + "step": 64550 + }, + { + "epoch": 4.77, + "learning_rate": 3.561089290916214e-05, + "loss": 6.3599, + "step": 64600 + }, + { + "epoch": 4.77, + "learning_rate": 3.556066739661079e-05, + "loss": 6.4758, + "step": 64650 + }, + { + "epoch": 4.77, + "learning_rate": 3.551044188405943e-05, + "loss": 7.2899, + "step": 64700 + }, + { + "epoch": 4.78, + "learning_rate": 3.546021637150807e-05, + "loss": 6.6164, + "step": 64750 + }, + { + "epoch": 4.78, + "learning_rate": 3.540999085895671e-05, + "loss": 6.0466, + "step": 64800 + }, + { + "epoch": 4.78, + "learning_rate": 3.535976534640536e-05, + "loss": 6.2209, + "step": 64850 + }, + { + "epoch": 4.79, + "learning_rate": 3.530953983385401e-05, + "loss": 6.6098, + "step": 64900 + }, + { + "epoch": 4.79, + "learning_rate": 3.525931432130265e-05, + "loss": 5.959, + "step": 64950 + }, + { + "epoch": 4.79, + "learning_rate": 3.5209088808751295e-05, + "loss": 6.6942, + "step": 65000 + }, + { + "epoch": 4.8, + "learning_rate": 3.515886329619994e-05, + "loss": 7.7404, + "step": 65050 + }, + { + "epoch": 4.8, + "learning_rate": 3.510863778364858e-05, + "loss": 6.5342, + "step": 65100 + }, + { + "epoch": 4.81, + "learning_rate": 3.505841227109723e-05, + "loss": 6.1434, + "step": 65150 + }, + { + "epoch": 4.81, + "learning_rate": 3.500818675854587e-05, + "loss": 5.8523, + "step": 65200 + }, + { + "epoch": 4.81, + "learning_rate": 3.495796124599452e-05, + "loss": 5.7111, + "step": 65250 + }, + { + "epoch": 4.82, + "learning_rate": 3.490773573344316e-05, + "loss": 6.4092, + "step": 65300 + }, + { + "epoch": 4.82, + "learning_rate": 3.4857510220891804e-05, + "loss": 6.5057, + "step": 65350 + }, + { + "epoch": 4.82, + "learning_rate": 3.480728470834045e-05, + "loss": 5.9863, + "step": 65400 + }, + { + "epoch": 4.83, + "learning_rate": 3.475705919578909e-05, + "loss": 6.0978, + "step": 65450 + }, + { + "epoch": 4.83, + "learning_rate": 3.4706833683237736e-05, + "loss": 6.6325, + "step": 65500 + }, + { + "epoch": 4.84, + "learning_rate": 3.4656608170686387e-05, + "loss": 5.9716, + "step": 65550 + }, + { + "epoch": 4.84, + "learning_rate": 3.460638265813503e-05, + "loss": 5.9812, + "step": 65600 + }, + { + "epoch": 4.84, + "learning_rate": 3.4556157145583675e-05, + "loss": 5.6784, + "step": 65650 + }, + { + "epoch": 4.85, + "learning_rate": 3.450593163303232e-05, + "loss": 6.7303, + "step": 65700 + }, + { + "epoch": 4.85, + "learning_rate": 3.445570612048096e-05, + "loss": 7.527, + "step": 65750 + }, + { + "epoch": 4.85, + "learning_rate": 3.440548060792961e-05, + "loss": 6.3649, + "step": 65800 + }, + { + "epoch": 4.86, + "learning_rate": 3.435525509537825e-05, + "loss": 6.6456, + "step": 65850 + }, + { + "epoch": 4.86, + "learning_rate": 3.4305029582826895e-05, + "loss": 6.2518, + "step": 65900 + }, + { + "epoch": 4.86, + "learning_rate": 3.425480407027554e-05, + "loss": 6.0104, + "step": 65950 + }, + { + "epoch": 4.87, + "learning_rate": 3.420457855772418e-05, + "loss": 7.0261, + "step": 66000 + }, + { + "epoch": 4.87, + "learning_rate": 3.415435304517283e-05, + "loss": 6.4373, + "step": 66050 + }, + { + "epoch": 4.88, + "learning_rate": 3.410412753262147e-05, + "loss": 6.3931, + "step": 66100 + }, + { + "epoch": 4.88, + "learning_rate": 3.4053902020070115e-05, + "loss": 7.0678, + "step": 66150 + }, + { + "epoch": 4.88, + "learning_rate": 3.400367650751876e-05, + "loss": 6.9086, + "step": 66200 + }, + { + "epoch": 4.89, + "learning_rate": 3.39534509949674e-05, + "loss": 6.3431, + "step": 66250 + }, + { + "epoch": 4.89, + "learning_rate": 3.3903225482416054e-05, + "loss": 7.298, + "step": 66300 + }, + { + "epoch": 4.89, + "learning_rate": 3.38529999698647e-05, + "loss": 6.4188, + "step": 66350 + }, + { + "epoch": 4.9, + "learning_rate": 3.380277445731334e-05, + "loss": 6.1998, + "step": 66400 + }, + { + "epoch": 4.9, + "learning_rate": 3.375254894476198e-05, + "loss": 6.7306, + "step": 66450 + }, + { + "epoch": 4.91, + "learning_rate": 3.3702323432210624e-05, + "loss": 6.0333, + "step": 66500 + }, + { + "epoch": 4.91, + "learning_rate": 3.365209791965927e-05, + "loss": 6.9375, + "step": 66550 + }, + { + "epoch": 4.91, + "learning_rate": 3.360187240710791e-05, + "loss": 6.4168, + "step": 66600 + }, + { + "epoch": 4.92, + "learning_rate": 3.355164689455656e-05, + "loss": 6.6486, + "step": 66650 + }, + { + "epoch": 4.92, + "learning_rate": 3.3501421382005207e-05, + "loss": 5.6693, + "step": 66700 + }, + { + "epoch": 4.92, + "learning_rate": 3.345119586945385e-05, + "loss": 6.7548, + "step": 66750 + }, + { + "epoch": 4.93, + "learning_rate": 3.3400970356902495e-05, + "loss": 8.7782, + "step": 66800 + }, + { + "epoch": 4.93, + "learning_rate": 3.335074484435114e-05, + "loss": 6.9741, + "step": 66850 + }, + { + "epoch": 4.93, + "learning_rate": 3.330051933179978e-05, + "loss": 6.423, + "step": 66900 + }, + { + "epoch": 4.94, + "learning_rate": 3.325029381924843e-05, + "loss": 6.0077, + "step": 66950 + }, + { + "epoch": 4.94, + "learning_rate": 3.320006830669707e-05, + "loss": 6.4085, + "step": 67000 + }, + { + "epoch": 4.95, + "learning_rate": 3.3149842794145715e-05, + "loss": 6.383, + "step": 67050 + }, + { + "epoch": 4.95, + "learning_rate": 3.309961728159436e-05, + "loss": 5.8758, + "step": 67100 + }, + { + "epoch": 4.95, + "learning_rate": 3.3049391769043e-05, + "loss": 7.1169, + "step": 67150 + }, + { + "epoch": 4.96, + "learning_rate": 3.299916625649165e-05, + "loss": 7.5655, + "step": 67200 + }, + { + "epoch": 4.96, + "learning_rate": 3.294894074394029e-05, + "loss": 6.1727, + "step": 67250 + }, + { + "epoch": 4.96, + "learning_rate": 3.2898715231388935e-05, + "loss": 6.0568, + "step": 67300 + }, + { + "epoch": 4.97, + "learning_rate": 3.2848489718837586e-05, + "loss": 6.9697, + "step": 67350 + }, + { + "epoch": 4.97, + "learning_rate": 3.279826420628623e-05, + "loss": 6.7938, + "step": 67400 + }, + { + "epoch": 4.98, + "learning_rate": 3.2748038693734874e-05, + "loss": 6.3006, + "step": 67450 + }, + { + "epoch": 4.98, + "learning_rate": 3.269781318118352e-05, + "loss": 6.3299, + "step": 67500 + }, + { + "epoch": 4.98, + "learning_rate": 3.264758766863216e-05, + "loss": 6.3615, + "step": 67550 + }, + { + "epoch": 4.99, + "learning_rate": 3.25973621560808e-05, + "loss": 6.1106, + "step": 67600 + }, + { + "epoch": 4.99, + "learning_rate": 3.2547136643529444e-05, + "loss": 5.571, + "step": 67650 + }, + { + "epoch": 4.99, + "learning_rate": 3.2496911130978094e-05, + "loss": 6.5922, + "step": 67700 + }, + { + "epoch": 5.0, + "learning_rate": 3.244668561842674e-05, + "loss": 5.663, + "step": 67750 + }, + { + "epoch": 5.0, + "learning_rate": 3.239646010587538e-05, + "loss": 7.3669, + "step": 67800 + }, + { + "epoch": 5.0, + "learning_rate": 3.2346234593324027e-05, + "loss": 6.015, + "step": 67850 + }, + { + "epoch": 5.01, + "learning_rate": 3.229600908077267e-05, + "loss": 5.8678, + "step": 67900 + }, + { + "epoch": 5.01, + "learning_rate": 3.2245783568221315e-05, + "loss": 5.5537, + "step": 67950 + }, + { + "epoch": 5.02, + "learning_rate": 3.219555805566996e-05, + "loss": 6.175, + "step": 68000 + }, + { + "epoch": 5.02, + "learning_rate": 3.21453325431186e-05, + "loss": 5.9018, + "step": 68050 + }, + { + "epoch": 5.02, + "learning_rate": 3.2095107030567254e-05, + "loss": 6.9064, + "step": 68100 + }, + { + "epoch": 5.03, + "learning_rate": 3.204488151801589e-05, + "loss": 6.8775, + "step": 68150 + }, + { + "epoch": 5.03, + "learning_rate": 3.1994656005464535e-05, + "loss": 5.6397, + "step": 68200 + }, + { + "epoch": 5.03, + "learning_rate": 3.194443049291318e-05, + "loss": 5.815, + "step": 68250 + }, + { + "epoch": 5.04, + "learning_rate": 3.189420498036182e-05, + "loss": 6.0795, + "step": 68300 + }, + { + "epoch": 5.04, + "learning_rate": 3.184397946781047e-05, + "loss": 6.8721, + "step": 68350 + }, + { + "epoch": 5.05, + "learning_rate": 3.179375395525912e-05, + "loss": 6.4936, + "step": 68400 + }, + { + "epoch": 5.05, + "learning_rate": 3.174352844270776e-05, + "loss": 5.7195, + "step": 68450 + }, + { + "epoch": 5.05, + "learning_rate": 3.1693302930156406e-05, + "loss": 5.6897, + "step": 68500 + }, + { + "epoch": 5.06, + "learning_rate": 3.164307741760505e-05, + "loss": 6.2271, + "step": 68550 + }, + { + "epoch": 5.06, + "learning_rate": 3.1592851905053694e-05, + "loss": 6.3731, + "step": 68600 + }, + { + "epoch": 5.06, + "learning_rate": 3.154262639250234e-05, + "loss": 5.8502, + "step": 68650 + }, + { + "epoch": 5.07, + "learning_rate": 3.149240087995098e-05, + "loss": 5.8768, + "step": 68700 + }, + { + "epoch": 5.07, + "learning_rate": 3.1442175367399626e-05, + "loss": 6.4265, + "step": 68750 + }, + { + "epoch": 5.07, + "learning_rate": 3.139194985484827e-05, + "loss": 5.8952, + "step": 68800 + }, + { + "epoch": 5.08, + "learning_rate": 3.1341724342296914e-05, + "loss": 5.3739, + "step": 68850 + }, + { + "epoch": 5.08, + "learning_rate": 3.129149882974556e-05, + "loss": 6.4317, + "step": 68900 + }, + { + "epoch": 5.09, + "learning_rate": 3.12412733171942e-05, + "loss": 5.6296, + "step": 68950 + }, + { + "epoch": 5.09, + "learning_rate": 3.1191047804642847e-05, + "loss": 6.2448, + "step": 69000 + }, + { + "epoch": 5.09, + "learning_rate": 3.114082229209149e-05, + "loss": 6.116, + "step": 69050 + }, + { + "epoch": 5.1, + "learning_rate": 3.1090596779540135e-05, + "loss": 6.4299, + "step": 69100 + }, + { + "epoch": 5.1, + "learning_rate": 3.1040371266988785e-05, + "loss": 6.7337, + "step": 69150 + }, + { + "epoch": 5.1, + "learning_rate": 3.099014575443743e-05, + "loss": 6.6103, + "step": 69200 + }, + { + "epoch": 5.11, + "learning_rate": 3.0939920241886074e-05, + "loss": 5.6155, + "step": 69250 + }, + { + "epoch": 5.11, + "learning_rate": 3.088969472933471e-05, + "loss": 5.7131, + "step": 69300 + }, + { + "epoch": 5.12, + "learning_rate": 3.0839469216783355e-05, + "loss": 5.6799, + "step": 69350 + }, + { + "epoch": 5.12, + "learning_rate": 3.0789243704232e-05, + "loss": 5.9907, + "step": 69400 + }, + { + "epoch": 5.12, + "learning_rate": 3.073901819168064e-05, + "loss": 5.7125, + "step": 69450 + }, + { + "epoch": 5.13, + "learning_rate": 3.0688792679129294e-05, + "loss": 6.3093, + "step": 69500 + }, + { + "epoch": 5.13, + "learning_rate": 3.063856716657794e-05, + "loss": 6.1113, + "step": 69550 + }, + { + "epoch": 5.13, + "learning_rate": 3.058834165402658e-05, + "loss": 5.5845, + "step": 69600 + }, + { + "epoch": 5.14, + "learning_rate": 3.0538116141475226e-05, + "loss": 5.9267, + "step": 69650 + }, + { + "epoch": 5.14, + "learning_rate": 3.048789062892387e-05, + "loss": 6.0062, + "step": 69700 + }, + { + "epoch": 5.14, + "learning_rate": 3.0437665116372514e-05, + "loss": 6.005, + "step": 69750 + }, + { + "epoch": 5.15, + "learning_rate": 3.038743960382116e-05, + "loss": 5.854, + "step": 69800 + }, + { + "epoch": 5.15, + "learning_rate": 3.0337214091269806e-05, + "loss": 6.3468, + "step": 69850 + }, + { + "epoch": 5.16, + "learning_rate": 3.0286988578718446e-05, + "loss": 5.9127, + "step": 69900 + }, + { + "epoch": 5.16, + "learning_rate": 3.023676306616709e-05, + "loss": 6.5192, + "step": 69950 + }, + { + "epoch": 5.16, + "learning_rate": 3.0186537553615734e-05, + "loss": 5.8418, + "step": 70000 + }, + { + "epoch": 5.17, + "learning_rate": 3.013631204106438e-05, + "loss": 6.0775, + "step": 70050 + }, + { + "epoch": 5.17, + "learning_rate": 3.0086086528513026e-05, + "loss": 5.926, + "step": 70100 + }, + { + "epoch": 5.17, + "learning_rate": 3.003586101596167e-05, + "loss": 5.8467, + "step": 70150 + }, + { + "epoch": 5.18, + "learning_rate": 2.9985635503410314e-05, + "loss": 6.441, + "step": 70200 + }, + { + "epoch": 5.18, + "learning_rate": 2.9935409990858958e-05, + "loss": 5.6337, + "step": 70250 + }, + { + "epoch": 5.19, + "learning_rate": 2.9885184478307606e-05, + "loss": 6.3408, + "step": 70300 + }, + { + "epoch": 5.19, + "learning_rate": 2.983495896575625e-05, + "loss": 6.0077, + "step": 70350 + }, + { + "epoch": 5.19, + "learning_rate": 2.9784733453204894e-05, + "loss": 5.7263, + "step": 70400 + }, + { + "epoch": 5.2, + "learning_rate": 2.9734507940653534e-05, + "loss": 6.8161, + "step": 70450 + }, + { + "epoch": 5.2, + "learning_rate": 2.968428242810218e-05, + "loss": 6.4292, + "step": 70500 + }, + { + "epoch": 5.2, + "learning_rate": 2.9634056915550822e-05, + "loss": 6.0751, + "step": 70550 + }, + { + "epoch": 5.21, + "learning_rate": 2.9583831402999466e-05, + "loss": 6.2439, + "step": 70600 + }, + { + "epoch": 5.21, + "learning_rate": 2.9533605890448114e-05, + "loss": 5.6406, + "step": 70650 + }, + { + "epoch": 5.22, + "learning_rate": 2.9483380377896758e-05, + "loss": 5.5623, + "step": 70700 + }, + { + "epoch": 5.22, + "learning_rate": 2.9433154865345402e-05, + "loss": 6.3392, + "step": 70750 + }, + { + "epoch": 5.22, + "learning_rate": 2.9382929352794046e-05, + "loss": 7.3752, + "step": 70800 + }, + { + "epoch": 5.23, + "learning_rate": 2.9332703840242693e-05, + "loss": 6.2126, + "step": 70850 + }, + { + "epoch": 5.23, + "learning_rate": 2.9282478327691338e-05, + "loss": 5.3583, + "step": 70900 + }, + { + "epoch": 5.23, + "learning_rate": 2.923225281513998e-05, + "loss": 5.4659, + "step": 70950 + }, + { + "epoch": 5.24, + "learning_rate": 2.9182027302588626e-05, + "loss": 6.1876, + "step": 71000 + }, + { + "epoch": 5.24, + "learning_rate": 2.9131801790037266e-05, + "loss": 5.8878, + "step": 71050 + }, + { + "epoch": 5.24, + "learning_rate": 2.908157627748591e-05, + "loss": 6.2974, + "step": 71100 + }, + { + "epoch": 5.25, + "learning_rate": 2.9031350764934558e-05, + "loss": 6.348, + "step": 71150 + }, + { + "epoch": 5.25, + "learning_rate": 2.8981125252383202e-05, + "loss": 5.9929, + "step": 71200 + }, + { + "epoch": 5.26, + "learning_rate": 2.8930899739831846e-05, + "loss": 5.9609, + "step": 71250 + }, + { + "epoch": 5.26, + "learning_rate": 2.888067422728049e-05, + "loss": 5.4301, + "step": 71300 + }, + { + "epoch": 5.26, + "learning_rate": 2.8830448714729137e-05, + "loss": 5.4559, + "step": 71350 + }, + { + "epoch": 5.27, + "learning_rate": 2.878022320217778e-05, + "loss": 7.1806, + "step": 71400 + }, + { + "epoch": 5.27, + "learning_rate": 2.8729997689626426e-05, + "loss": 5.6962, + "step": 71450 + }, + { + "epoch": 5.27, + "learning_rate": 2.867977217707507e-05, + "loss": 5.2751, + "step": 71500 + }, + { + "epoch": 5.28, + "learning_rate": 2.8629546664523717e-05, + "loss": 5.8732, + "step": 71550 + }, + { + "epoch": 5.28, + "learning_rate": 2.8579321151972354e-05, + "loss": 5.3111, + "step": 71600 + }, + { + "epoch": 5.29, + "learning_rate": 2.8529095639421e-05, + "loss": 6.269, + "step": 71650 + }, + { + "epoch": 5.29, + "learning_rate": 2.8478870126869646e-05, + "loss": 4.7494, + "step": 71700 + }, + { + "epoch": 5.29, + "learning_rate": 2.842864461431829e-05, + "loss": 6.2853, + "step": 71750 + }, + { + "epoch": 5.3, + "learning_rate": 2.8378419101766934e-05, + "loss": 6.7802, + "step": 71800 + }, + { + "epoch": 5.3, + "learning_rate": 2.8328193589215578e-05, + "loss": 7.7665, + "step": 71850 + }, + { + "epoch": 5.3, + "learning_rate": 2.8277968076664225e-05, + "loss": 5.7143, + "step": 71900 + }, + { + "epoch": 5.31, + "learning_rate": 2.822774256411287e-05, + "loss": 6.1485, + "step": 71950 + }, + { + "epoch": 5.31, + "learning_rate": 2.8177517051561514e-05, + "loss": 5.9611, + "step": 72000 + }, + { + "epoch": 5.31, + "learning_rate": 2.8127291539010158e-05, + "loss": 6.9089, + "step": 72050 + }, + { + "epoch": 5.32, + "learning_rate": 2.8077066026458805e-05, + "loss": 5.3067, + "step": 72100 + }, + { + "epoch": 5.32, + "learning_rate": 2.8026840513907442e-05, + "loss": 5.7255, + "step": 72150 + }, + { + "epoch": 5.33, + "learning_rate": 2.7976615001356086e-05, + "loss": 6.6141, + "step": 72200 + }, + { + "epoch": 5.33, + "learning_rate": 2.7926389488804734e-05, + "loss": 5.1587, + "step": 72250 + }, + { + "epoch": 5.33, + "learning_rate": 2.7876163976253378e-05, + "loss": 6.2063, + "step": 72300 + }, + { + "epoch": 5.34, + "learning_rate": 2.7825938463702022e-05, + "loss": 5.9984, + "step": 72350 + }, + { + "epoch": 5.34, + "learning_rate": 2.7775712951150666e-05, + "loss": 6.2301, + "step": 72400 + }, + { + "epoch": 5.34, + "learning_rate": 2.7725487438599313e-05, + "loss": 6.0619, + "step": 72450 + }, + { + "epoch": 5.35, + "learning_rate": 2.7675261926047957e-05, + "loss": 7.1879, + "step": 72500 + }, + { + "epoch": 5.35, + "learning_rate": 2.76250364134966e-05, + "loss": 6.8024, + "step": 72550 + }, + { + "epoch": 5.36, + "learning_rate": 2.7574810900945246e-05, + "loss": 7.412, + "step": 72600 + }, + { + "epoch": 5.36, + "learning_rate": 2.7524585388393893e-05, + "loss": 6.172, + "step": 72650 + }, + { + "epoch": 5.36, + "learning_rate": 2.7474359875842537e-05, + "loss": 5.9536, + "step": 72700 + }, + { + "epoch": 5.37, + "learning_rate": 2.7424134363291178e-05, + "loss": 6.4215, + "step": 72750 + }, + { + "epoch": 5.37, + "learning_rate": 2.7373908850739822e-05, + "loss": 5.6326, + "step": 72800 + }, + { + "epoch": 5.37, + "learning_rate": 2.7323683338188466e-05, + "loss": 5.8943, + "step": 72850 + }, + { + "epoch": 5.38, + "learning_rate": 2.727345782563711e-05, + "loss": 6.8689, + "step": 72900 + }, + { + "epoch": 5.38, + "learning_rate": 2.7223232313085757e-05, + "loss": 6.2079, + "step": 72950 + }, + { + "epoch": 5.38, + "learning_rate": 2.71730068005344e-05, + "loss": 6.4607, + "step": 73000 + }, + { + "epoch": 5.39, + "learning_rate": 2.7122781287983045e-05, + "loss": 6.0781, + "step": 73050 + }, + { + "epoch": 5.39, + "learning_rate": 2.707255577543169e-05, + "loss": 5.7624, + "step": 73100 + }, + { + "epoch": 5.4, + "learning_rate": 2.7022330262880337e-05, + "loss": 6.0385, + "step": 73150 + }, + { + "epoch": 5.4, + "learning_rate": 2.697210475032898e-05, + "loss": 5.9751, + "step": 73200 + }, + { + "epoch": 5.4, + "learning_rate": 2.6921879237777625e-05, + "loss": 6.3938, + "step": 73250 + }, + { + "epoch": 5.41, + "learning_rate": 2.6871653725226266e-05, + "loss": 5.9229, + "step": 73300 + }, + { + "epoch": 5.41, + "learning_rate": 2.682142821267491e-05, + "loss": 6.0674, + "step": 73350 + }, + { + "epoch": 5.41, + "learning_rate": 2.6771202700123554e-05, + "loss": 6.7223, + "step": 73400 + }, + { + "epoch": 5.42, + "learning_rate": 2.6720977187572198e-05, + "loss": 5.7889, + "step": 73450 + }, + { + "epoch": 5.42, + "learning_rate": 2.6670751675020845e-05, + "loss": 7.1486, + "step": 73500 + }, + { + "epoch": 5.43, + "learning_rate": 2.662052616246949e-05, + "loss": 6.1844, + "step": 73550 + }, + { + "epoch": 5.43, + "learning_rate": 2.6570300649918133e-05, + "loss": 6.198, + "step": 73600 + }, + { + "epoch": 5.43, + "learning_rate": 2.6520075137366777e-05, + "loss": 6.6778, + "step": 73650 + }, + { + "epoch": 5.44, + "learning_rate": 2.6469849624815425e-05, + "loss": 5.9788, + "step": 73700 + }, + { + "epoch": 5.44, + "learning_rate": 2.641962411226407e-05, + "loss": 6.3568, + "step": 73750 + }, + { + "epoch": 5.44, + "learning_rate": 2.6369398599712713e-05, + "loss": 5.9383, + "step": 73800 + }, + { + "epoch": 5.45, + "learning_rate": 2.6319173087161357e-05, + "loss": 6.4832, + "step": 73850 + }, + { + "epoch": 5.45, + "learning_rate": 2.6268947574609998e-05, + "loss": 5.883, + "step": 73900 + }, + { + "epoch": 5.45, + "learning_rate": 2.6218722062058642e-05, + "loss": 6.213, + "step": 73950 + }, + { + "epoch": 5.46, + "learning_rate": 2.616849654950729e-05, + "loss": 6.5404, + "step": 74000 + }, + { + "epoch": 5.46, + "learning_rate": 2.6118271036955933e-05, + "loss": 6.1246, + "step": 74050 + }, + { + "epoch": 5.47, + "learning_rate": 2.6068045524404577e-05, + "loss": 6.0739, + "step": 74100 + }, + { + "epoch": 5.47, + "learning_rate": 2.601782001185322e-05, + "loss": 6.2085, + "step": 74150 + }, + { + "epoch": 5.47, + "learning_rate": 2.596759449930187e-05, + "loss": 6.7059, + "step": 74200 + }, + { + "epoch": 5.48, + "learning_rate": 2.5917368986750513e-05, + "loss": 6.231, + "step": 74250 + }, + { + "epoch": 5.48, + "learning_rate": 2.5867143474199157e-05, + "loss": 6.1287, + "step": 74300 + }, + { + "epoch": 5.48, + "learning_rate": 2.58169179616478e-05, + "loss": 6.0583, + "step": 74350 + }, + { + "epoch": 5.49, + "learning_rate": 2.576669244909645e-05, + "loss": 6.1552, + "step": 74400 + }, + { + "epoch": 5.49, + "learning_rate": 2.5716466936545086e-05, + "loss": 6.4191, + "step": 74450 + }, + { + "epoch": 5.5, + "learning_rate": 2.566624142399373e-05, + "loss": 7.2899, + "step": 74500 + }, + { + "epoch": 5.5, + "learning_rate": 2.5616015911442377e-05, + "loss": 6.3234, + "step": 74550 + }, + { + "epoch": 5.5, + "learning_rate": 2.556579039889102e-05, + "loss": 5.6938, + "step": 74600 + }, + { + "epoch": 5.51, + "learning_rate": 2.5515564886339665e-05, + "loss": 5.1418, + "step": 74650 + }, + { + "epoch": 5.51, + "learning_rate": 2.546533937378831e-05, + "loss": 5.7747, + "step": 74700 + }, + { + "epoch": 5.51, + "learning_rate": 2.5415113861236957e-05, + "loss": 5.7191, + "step": 74750 + }, + { + "epoch": 5.52, + "learning_rate": 2.53648883486856e-05, + "loss": 6.0435, + "step": 74800 + }, + { + "epoch": 5.52, + "learning_rate": 2.5314662836134245e-05, + "loss": 6.1592, + "step": 74850 + }, + { + "epoch": 5.52, + "learning_rate": 2.526443732358289e-05, + "loss": 6.1625, + "step": 74900 + }, + { + "epoch": 5.53, + "learning_rate": 2.5214211811031536e-05, + "loss": 4.7958, + "step": 74950 + }, + { + "epoch": 5.53, + "learning_rate": 2.516398629848018e-05, + "loss": 6.0618, + "step": 75000 + }, + { + "epoch": 5.54, + "learning_rate": 2.5113760785928818e-05, + "loss": 5.754, + "step": 75050 + }, + { + "epoch": 5.54, + "learning_rate": 2.5063535273377465e-05, + "loss": 5.6346, + "step": 75100 + }, + { + "epoch": 5.54, + "learning_rate": 2.501330976082611e-05, + "loss": 5.6922, + "step": 75150 + }, + { + "epoch": 5.55, + "learning_rate": 2.4963084248274753e-05, + "loss": 6.6754, + "step": 75200 + }, + { + "epoch": 5.55, + "learning_rate": 2.4912858735723397e-05, + "loss": 5.2828, + "step": 75250 + }, + { + "epoch": 5.55, + "learning_rate": 2.4862633223172045e-05, + "loss": 5.799, + "step": 75300 + }, + { + "epoch": 5.56, + "learning_rate": 2.481240771062069e-05, + "loss": 5.8229, + "step": 75350 + }, + { + "epoch": 5.56, + "learning_rate": 2.4762182198069333e-05, + "loss": 5.1759, + "step": 75400 + }, + { + "epoch": 5.57, + "learning_rate": 2.4711956685517977e-05, + "loss": 5.9411, + "step": 75450 + }, + { + "epoch": 5.57, + "learning_rate": 2.466173117296662e-05, + "loss": 5.4522, + "step": 75500 + }, + { + "epoch": 5.57, + "learning_rate": 2.4611505660415265e-05, + "loss": 6.0731, + "step": 75550 + }, + { + "epoch": 5.58, + "learning_rate": 2.456128014786391e-05, + "loss": 5.9288, + "step": 75600 + }, + { + "epoch": 5.58, + "learning_rate": 2.4511054635312557e-05, + "loss": 5.7434, + "step": 75650 + }, + { + "epoch": 5.58, + "learning_rate": 2.44608291227612e-05, + "loss": 5.5638, + "step": 75700 + }, + { + "epoch": 5.59, + "learning_rate": 2.441060361020984e-05, + "loss": 6.423, + "step": 75750 + }, + { + "epoch": 5.59, + "learning_rate": 2.436037809765849e-05, + "loss": 5.4612, + "step": 75800 + }, + { + "epoch": 5.59, + "learning_rate": 2.4310152585107133e-05, + "loss": 7.1213, + "step": 75850 + }, + { + "epoch": 5.6, + "learning_rate": 2.4259927072555777e-05, + "loss": 6.366, + "step": 75900 + }, + { + "epoch": 5.6, + "learning_rate": 2.420970156000442e-05, + "loss": 5.8278, + "step": 75950 + }, + { + "epoch": 5.61, + "learning_rate": 2.4159476047453065e-05, + "loss": 6.1465, + "step": 76000 + }, + { + "epoch": 5.61, + "learning_rate": 2.410925053490171e-05, + "loss": 5.7868, + "step": 76050 + }, + { + "epoch": 5.61, + "learning_rate": 2.4059025022350353e-05, + "loss": 5.8116, + "step": 76100 + }, + { + "epoch": 5.62, + "learning_rate": 2.4008799509798997e-05, + "loss": 5.7459, + "step": 76150 + }, + { + "epoch": 5.62, + "learning_rate": 2.3958573997247645e-05, + "loss": 6.2053, + "step": 76200 + }, + { + "epoch": 5.62, + "learning_rate": 2.390834848469629e-05, + "loss": 5.768, + "step": 76250 + }, + { + "epoch": 5.63, + "learning_rate": 2.385812297214493e-05, + "loss": 5.9021, + "step": 76300 + }, + { + "epoch": 5.63, + "learning_rate": 2.3807897459593577e-05, + "loss": 6.2206, + "step": 76350 + }, + { + "epoch": 5.64, + "learning_rate": 2.375767194704222e-05, + "loss": 6.3088, + "step": 76400 + }, + { + "epoch": 5.64, + "learning_rate": 2.3707446434490865e-05, + "loss": 6.0105, + "step": 76450 + }, + { + "epoch": 5.64, + "learning_rate": 2.365722092193951e-05, + "loss": 5.738, + "step": 76500 + }, + { + "epoch": 5.65, + "learning_rate": 2.3606995409388156e-05, + "loss": 6.2838, + "step": 76550 + }, + { + "epoch": 5.65, + "learning_rate": 2.3556769896836797e-05, + "loss": 5.7041, + "step": 76600 + }, + { + "epoch": 5.65, + "learning_rate": 2.350654438428544e-05, + "loss": 6.7796, + "step": 76650 + }, + { + "epoch": 5.66, + "learning_rate": 2.345631887173409e-05, + "loss": 6.4432, + "step": 76700 + }, + { + "epoch": 5.66, + "learning_rate": 2.3406093359182733e-05, + "loss": 5.5805, + "step": 76750 + }, + { + "epoch": 5.66, + "learning_rate": 2.3355867846631377e-05, + "loss": 5.4049, + "step": 76800 + }, + { + "epoch": 5.67, + "learning_rate": 2.330564233408002e-05, + "loss": 5.0643, + "step": 76850 + }, + { + "epoch": 5.67, + "learning_rate": 2.3255416821528665e-05, + "loss": 5.4007, + "step": 76900 + }, + { + "epoch": 5.68, + "learning_rate": 2.320519130897731e-05, + "loss": 5.3506, + "step": 76950 + }, + { + "epoch": 5.68, + "learning_rate": 2.3154965796425953e-05, + "loss": 6.5889, + "step": 77000 + }, + { + "epoch": 5.68, + "learning_rate": 2.31047402838746e-05, + "loss": 6.206, + "step": 77050 + }, + { + "epoch": 5.69, + "learning_rate": 2.3054514771323244e-05, + "loss": 6.03, + "step": 77100 + }, + { + "epoch": 5.69, + "learning_rate": 2.3004289258771885e-05, + "loss": 5.6658, + "step": 77150 + }, + { + "epoch": 5.69, + "learning_rate": 2.295406374622053e-05, + "loss": 6.5901, + "step": 77200 + }, + { + "epoch": 5.7, + "learning_rate": 2.2903838233669176e-05, + "loss": 6.3019, + "step": 77250 + }, + { + "epoch": 5.7, + "learning_rate": 2.285361272111782e-05, + "loss": 5.5744, + "step": 77300 + }, + { + "epoch": 5.71, + "learning_rate": 2.2803387208566465e-05, + "loss": 5.8269, + "step": 77350 + }, + { + "epoch": 5.71, + "learning_rate": 2.275316169601511e-05, + "loss": 6.1005, + "step": 77400 + }, + { + "epoch": 5.71, + "learning_rate": 2.2702936183463753e-05, + "loss": 6.0196, + "step": 77450 + }, + { + "epoch": 5.72, + "learning_rate": 2.2652710670912397e-05, + "loss": 5.8475, + "step": 77500 + }, + { + "epoch": 5.72, + "learning_rate": 2.260248515836104e-05, + "loss": 6.4338, + "step": 77550 + }, + { + "epoch": 5.72, + "learning_rate": 2.2552259645809688e-05, + "loss": 4.958, + "step": 77600 + }, + { + "epoch": 5.73, + "learning_rate": 2.2502034133258332e-05, + "loss": 6.4737, + "step": 77650 + }, + { + "epoch": 5.73, + "learning_rate": 2.2451808620706976e-05, + "loss": 6.3223, + "step": 77700 + }, + { + "epoch": 5.74, + "learning_rate": 2.240158310815562e-05, + "loss": 7.171, + "step": 77750 + }, + { + "epoch": 5.74, + "learning_rate": 2.2351357595604264e-05, + "loss": 6.5725, + "step": 77800 + }, + { + "epoch": 5.74, + "learning_rate": 2.230113208305291e-05, + "loss": 5.7644, + "step": 77850 + }, + { + "epoch": 5.75, + "learning_rate": 2.2250906570501553e-05, + "loss": 5.6257, + "step": 77900 + }, + { + "epoch": 5.75, + "learning_rate": 2.22006810579502e-05, + "loss": 6.2325, + "step": 77950 + }, + { + "epoch": 5.75, + "learning_rate": 2.215045554539884e-05, + "loss": 6.7106, + "step": 78000 + }, + { + "epoch": 5.76, + "learning_rate": 2.2100230032847485e-05, + "loss": 5.0113, + "step": 78050 + }, + { + "epoch": 5.76, + "learning_rate": 2.205000452029613e-05, + "loss": 6.1309, + "step": 78100 + }, + { + "epoch": 5.76, + "learning_rate": 2.1999779007744776e-05, + "loss": 5.5098, + "step": 78150 + }, + { + "epoch": 5.77, + "learning_rate": 2.194955349519342e-05, + "loss": 6.6709, + "step": 78200 + }, + { + "epoch": 5.77, + "learning_rate": 2.1899327982642064e-05, + "loss": 5.8039, + "step": 78250 + }, + { + "epoch": 5.78, + "learning_rate": 2.184910247009071e-05, + "loss": 5.0271, + "step": 78300 + }, + { + "epoch": 5.78, + "learning_rate": 2.1798876957539352e-05, + "loss": 6.1648, + "step": 78350 + }, + { + "epoch": 5.78, + "learning_rate": 2.1748651444987996e-05, + "loss": 5.1838, + "step": 78400 + }, + { + "epoch": 5.79, + "learning_rate": 2.169842593243664e-05, + "loss": 7.1149, + "step": 78450 + }, + { + "epoch": 5.79, + "learning_rate": 2.1648200419885288e-05, + "loss": 5.9544, + "step": 78500 + }, + { + "epoch": 5.79, + "learning_rate": 2.1597974907333932e-05, + "loss": 6.4747, + "step": 78550 + }, + { + "epoch": 5.8, + "learning_rate": 2.1547749394782573e-05, + "loss": 5.6367, + "step": 78600 + }, + { + "epoch": 5.8, + "learning_rate": 2.149752388223122e-05, + "loss": 5.8395, + "step": 78650 + }, + { + "epoch": 5.81, + "learning_rate": 2.1447298369679864e-05, + "loss": 6.8058, + "step": 78700 + }, + { + "epoch": 5.81, + "learning_rate": 2.1397072857128508e-05, + "loss": 6.4977, + "step": 78750 + }, + { + "epoch": 5.81, + "learning_rate": 2.1346847344577152e-05, + "loss": 7.0943, + "step": 78800 + }, + { + "epoch": 5.82, + "learning_rate": 2.12966218320258e-05, + "loss": 6.0009, + "step": 78850 + }, + { + "epoch": 5.82, + "learning_rate": 2.124639631947444e-05, + "loss": 5.8074, + "step": 78900 + }, + { + "epoch": 5.82, + "learning_rate": 2.1196170806923084e-05, + "loss": 6.277, + "step": 78950 + }, + { + "epoch": 5.83, + "learning_rate": 2.114594529437173e-05, + "loss": 5.61, + "step": 79000 + }, + { + "epoch": 5.83, + "learning_rate": 2.1095719781820376e-05, + "loss": 5.6585, + "step": 79050 + }, + { + "epoch": 5.83, + "learning_rate": 2.104549426926902e-05, + "loss": 4.9836, + "step": 79100 + }, + { + "epoch": 5.84, + "learning_rate": 2.099526875671766e-05, + "loss": 6.1327, + "step": 79150 + }, + { + "epoch": 5.84, + "learning_rate": 2.0945043244166308e-05, + "loss": 6.2281, + "step": 79200 + }, + { + "epoch": 5.85, + "learning_rate": 2.0894817731614952e-05, + "loss": 5.9593, + "step": 79250 + }, + { + "epoch": 5.85, + "learning_rate": 2.0844592219063596e-05, + "loss": 5.1415, + "step": 79300 + }, + { + "epoch": 5.85, + "learning_rate": 2.079436670651224e-05, + "loss": 5.719, + "step": 79350 + }, + { + "epoch": 5.86, + "learning_rate": 2.0744141193960888e-05, + "loss": 6.1617, + "step": 79400 + }, + { + "epoch": 5.86, + "learning_rate": 2.069391568140953e-05, + "loss": 6.3103, + "step": 79450 + }, + { + "epoch": 5.86, + "learning_rate": 2.0643690168858172e-05, + "loss": 5.2091, + "step": 79500 + }, + { + "epoch": 5.87, + "learning_rate": 2.059346465630682e-05, + "loss": 6.1573, + "step": 79550 + }, + { + "epoch": 5.87, + "learning_rate": 2.0543239143755464e-05, + "loss": 6.3729, + "step": 79600 + }, + { + "epoch": 5.88, + "learning_rate": 2.0493013631204108e-05, + "loss": 6.1918, + "step": 79650 + }, + { + "epoch": 5.88, + "learning_rate": 2.0442788118652752e-05, + "loss": 6.7536, + "step": 79700 + }, + { + "epoch": 5.88, + "learning_rate": 2.0392562606101396e-05, + "loss": 5.7906, + "step": 79750 + }, + { + "epoch": 5.89, + "learning_rate": 2.034233709355004e-05, + "loss": 6.3394, + "step": 79800 + }, + { + "epoch": 5.89, + "learning_rate": 2.0292111580998684e-05, + "loss": 5.8182, + "step": 79850 + }, + { + "epoch": 5.89, + "learning_rate": 2.0241886068447328e-05, + "loss": 5.8381, + "step": 79900 + }, + { + "epoch": 5.9, + "learning_rate": 2.0191660555895976e-05, + "loss": 6.4559, + "step": 79950 + }, + { + "epoch": 5.9, + "learning_rate": 2.0141435043344616e-05, + "loss": 5.5812, + "step": 80000 + }, + { + "epoch": 5.9, + "eval_loss": 7.74003791809082, + "eval_runtime": 967.3759, + "eval_samples_per_second": 13.54, + "eval_steps_per_second": 3.385, + "eval_wer": 0.19946663306874973, + "step": 80000 + } + ], + "max_steps": 100051, + "num_train_epochs": 8, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-80000/training_args.bin b/checkpoint-80000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dbc064046a36220dd960e955c565bc3e2c9e3abd --- /dev/null +++ b/checkpoint-80000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b64c669f66dd7a2e54d3001ce7e31c26cc60dd58136e8ce90e6055bd0ae15eb +size 3503 diff --git a/conf/conformer_transducer_bpe_dummy.yaml b/conf/conformer_transducer_bpe_dummy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b37fb056b27d8c20d0bd0cded3eb664504f7dda6 --- /dev/null +++ b/conf/conformer_transducer_bpe_dummy.yaml @@ -0,0 +1,192 @@ +# It contains the default values for training a Conformer-Transducer ASR model, dummy size, with Transducer loss and sub-word encoding. + +name: "Conformer-Transducer-BPE" + +model: + sample_rate: 16000 + compute_eval_loss: false # eval samples can be very long and exhaust memory. Disable computation of transducer loss during validation/testing with this flag. + log_prediction: true # enables logging sample predictions in the output during training + skip_nan_grad: false + + model_defaults: + enc_hidden: ${model.encoder.d_model} + pred_hidden: 64 + joint_hidden: 64 + + train_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 16 # you may increase batch_size if your memory allows + shuffle: true + num_workers: 8 + pin_memory: true + use_start_end_token: false + trim_silence: false + max_duration: 16.7 # it is set for LibriSpeech, you may need to update it for your dataset + min_duration: 0.1 + # tarred datasets + is_tarred: false + tarred_audio_filepaths: null + shuffle_n: 2048 + # bucketing params + bucketing_strategy: "synced_randomized" + bucketing_batch_size: null + + validation_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 16 + shuffle: false + num_workers: 8 + pin_memory: true + use_start_end_token: false + + test_ds: + manifest_filepath: null + sample_rate: ${model.sample_rate} + batch_size: 16 + shuffle: false + num_workers: 8 + pin_memory: true + use_start_end_token: false + + # You may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py + tokenizer: + dir: ??? # path to directory which contains either tokenizer.model (bpe) or vocab.txt (for wpe) + type: bpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer) + + preprocessor: + _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor + sample_rate: ${model.sample_rate} + normalize: "per_feature" + window_size: 0.025 + window_stride: 0.01 + window: "hann" + features: 80 + n_fft: 512 + frame_splicing: 1 + dither: 0.00001 + pad_to: 0 + + spec_augment: + _target_: nemo.collections.asr.modules.SpectrogramAugmentation + freq_masks: 2 # set to zero to disable it + time_masks: 10 # set to zero to disable it + freq_width: 27 + time_width: 0.05 + + encoder: + _target_: nemo.collections.asr.modules.ConformerEncoder + feat_in: ${model.preprocessor.features} + feat_out: -1 # you may set it if you need different output size other than the default d_model + n_layers: 2 + d_model: 64 + + # Sub-sampling params + subsampling: striding # vggnet, striding, stacking or stacking_norm, dw_striding + subsampling_factor: 4 # must be power of 2 for striding and vggnet + subsampling_conv_channels: -1 # set to -1 to make it equal to the d_model + causal_downsampling: false + + # Feed forward module's params + ff_expansion_factor: 4 + + # Multi-headed Attention Module's params + self_attention_model: rel_pos # rel_pos or abs_pos + n_heads: 8 # may need to be lower for smaller d_models + # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention + att_context_size: [-1, -1] # -1 means unlimited context + att_context_style: regular # regular or chunked_limited + xscaling: true # scales up the input embeddings by sqrt(d_model) + untie_biases: true # unties the biases of the TransformerXL layers + pos_emb_max_len: 5000 + + # Convolution module's params + conv_kernel_size: 5 + conv_norm_type: 'batch_norm' # batch_norm or layer_norm or groupnormN (N specifies the number of groups) + # conv_context_size can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size + # null means [(kernel_size-1)//2, (kernel_size-1)//2], and 'causal' means [(kernel_size-1), 0] + conv_context_size: null + + ### regularization + dropout: 0.1 # The dropout used in most of the Conformer Modules + dropout_emb: 0.0 # The dropout used for embeddings + dropout_att: 0.1 # The dropout for multi-headed attention modules + + decoder: + _target_: nemo.collections.asr.modules.RNNTDecoder + normalization_mode: null # Currently only null is supported for export. + random_state_sampling: false # Random state sampling: https://arxiv.org/pdf/1910.11455.pdf + blank_as_pad: true # This flag must be set in order to support exporting of RNNT models + efficient inference. + + prednet: + pred_hidden: ${model.model_defaults.pred_hidden} + pred_rnn_layers: 1 + t_max: null + dropout: 0.2 + + joint: + _target_: nemo.collections.asr.modules.RNNTJoint + log_softmax: null # 'null' would set it automatically according to CPU/GPU device + preserve_memory: false # dramatically slows down training, but might preserve some memory + + # Fuses the computation of prediction net + joint net + loss + WER calculation + # to be run on sub-batches of size `fused_batch_size`. + # When this flag is set to true, consider the `batch_size` of *_ds to be just `encoder` batch size. + # `fused_batch_size` is the actual batch size of the prediction net, joint net and transducer loss. + # Using small values here will preserve a lot of memory during training, but will make training slower as well. + # An optimal ratio of fused_batch_size : *_ds.batch_size is 1:1. + # However, to preserve memory, this ratio can be 1:8 or even 1:16. + # Extreme case of 1:B (i.e. fused_batch_size=1) should be avoided as training speed would be very slow. + fuse_loss_wer: true + fused_batch_size: 16 + + jointnet: + joint_hidden: ${model.model_defaults.joint_hidden} + activation: "relu" + dropout: 0.2 + + decoding: + strategy: "greedy_batch" # can be greedy, greedy_batch, beam, tsd, alsd. + + # greedy strategy config + greedy: + max_symbols: 10 + + # beam strategy config + beam: + beam_size: 2 + return_best_hypothesis: False + score_norm: true + tsd_max_sym_exp: 50 # for Time Synchronous Decoding + alsd_max_target_len: 2.0 # for Alignment-Length Synchronous Decoding + + loss: + loss_name: "default" + + warprnnt_numba_kwargs: + # FastEmit regularization: https://arxiv.org/abs/2010.11148 + # You may enable FastEmit to reduce the latency of the model for streaming + fastemit_lambda: 0.0 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. + + # Adds Gaussian noise to the gradients of the decoder to avoid overfitting + variational_noise: + start_step: 0 + std: 0.0 + + optim: + name: adamw + lr: 5.0 + # optimizer arguments + betas: [0.9, 0.98] + weight_decay: 1e-3 + + # scheduler setup + sched: + name: NoamAnnealing + d_model: ${model.encoder.d_model} + # scheduler config override + warmup_steps: 10000 + warmup_ratio: null + min_lr: 1e-6 diff --git a/conf/conformer_transducer_bpe_large.yaml b/conf/conformer_transducer_bpe_large.yaml new file mode 100644 index 0000000000000000000000000000000000000000..878f71cb8f3a42dc76a4c172926ae72542bbd541 --- /dev/null +++ b/conf/conformer_transducer_bpe_large.yaml @@ -0,0 +1,212 @@ +# It contains the default values for training a Conformer-Transducer ASR model, large size (~120M) with Transducer loss and sub-word encoding. + +# Architecture and training config: +# Default learning parameters in this config are set for effective batch size of 2K. To train it with smaller effective +# batch sizes, you may need to re-tune the learning parameters or use higher accumulate_grad_batches. +# Here are the recommended configs for different variants of Conformer-Transducer, other parameters are the same as in this config file. +# +# +-------------+---------+---------+----------+--------------+--------------------------+ +# | Model | d_model | n_heads | n_layers | weight_decay | pred_hidden/joint_hidden | +# +=============+=========+========+===========+==============+==========================+ +# | Small (14M)| 176 | 4 | 16 | 0.0 | 320 | +# +-------------+---------+--------+-----------+--------------+--------------------------+ +# | Medium (32M)| 256 | 4 | 16 | 1e-3 | 640 | +# +-------------+---------+--------+-----------+--------------+--------------------------+ +# | Large (120M)| 512 | 8 | 17 | 1e-3 | 640 | +# +-----------------------------------------------------------+--------------------------+ +# + +# You may find more info about Conformer-Transducer here: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#conformer-transducer +# Pre-trained models of Conformer-Transducer can be found here: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/results.html +# The checkpoint of the large model trained on NeMo ASRSET with this recipe can be found here: https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_large + +name: "Conformer-Transducer-BPE" + +model: + sample_rate: 16000 + compute_eval_loss: false # eval samples can be very long and exhaust memory. Disable computation of transducer loss during validation/testing with this flag. + log_prediction: true # enables logging sample predictions in the output during training + skip_nan_grad: false + + model_defaults: + enc_hidden: ${model.encoder.d_model} + pred_hidden: 640 + joint_hidden: 640 + + train_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 16 # you may increase batch_size if your memory allows + shuffle: true + num_workers: 8 + pin_memory: true + use_start_end_token: false + trim_silence: false + max_duration: 16.7 # it is set for LibriSpeech, you may need to update it for your dataset + min_duration: 0.1 + # tarred datasets + is_tarred: false + tarred_audio_filepaths: null + shuffle_n: 2048 + # bucketing params + bucketing_strategy: "synced_randomized" + bucketing_batch_size: null + + validation_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 16 + shuffle: false + num_workers: 8 + pin_memory: true + use_start_end_token: false + + test_ds: + manifest_filepath: null + sample_rate: ${model.sample_rate} + batch_size: 16 + shuffle: false + num_workers: 8 + pin_memory: true + use_start_end_token: false + + # You may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py + tokenizer: + dir: ??? # path to directory which contains either tokenizer.model (bpe) or vocab.txt (for wpe) + type: bpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer) + + preprocessor: + _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor + sample_rate: ${model.sample_rate} + normalize: "per_feature" + window_size: 0.025 + window_stride: 0.01 + window: "hann" + features: 80 + n_fft: 512 + frame_splicing: 1 + dither: 0.00001 + pad_to: 0 + + spec_augment: + _target_: nemo.collections.asr.modules.SpectrogramAugmentation + freq_masks: 2 # set to zero to disable it + time_masks: 10 # set to zero to disable it + freq_width: 27 + time_width: 0.05 + + encoder: + _target_: nemo.collections.asr.modules.ConformerEncoder + feat_in: ${model.preprocessor.features} + feat_out: -1 # you may set it if you need different output size other than the default d_model + n_layers: 17 + d_model: 512 + + # Sub-sampling params + subsampling: striding # vggnet, striding, stacking or stacking_norm, dw_striding + subsampling_factor: 4 # must be power of 2 for striding and vggnet + subsampling_conv_channels: -1 # set to -1 to make it equal to the d_model + causal_downsampling: false + + # Feed forward module's params + ff_expansion_factor: 4 + + # Multi-headed Attention Module's params + self_attention_model: rel_pos # rel_pos or abs_pos + n_heads: 8 # may need to be lower for smaller d_models + # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention + att_context_size: [-1, -1] # -1 means unlimited context + att_context_style: regular # regular or chunked_limited + xscaling: true # scales up the input embeddings by sqrt(d_model) + untie_biases: true # unties the biases of the TransformerXL layers + pos_emb_max_len: 5000 + + # Convolution module's params + conv_kernel_size: 31 + conv_norm_type: 'batch_norm' # batch_norm or layer_norm or groupnormN (N specifies the number of groups) + # conv_context_size can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size + # null means [(kernel_size-1)//2, (kernel_size-1)//2], and 'causal' means [(kernel_size-1), 0] + conv_context_size: null + + ### regularization + dropout: 0.1 # The dropout used in most of the Conformer Modules + dropout_emb: 0.0 # The dropout used for embeddings + dropout_att: 0.1 # The dropout for multi-headed attention modules + + decoder: + _target_: nemo.collections.asr.modules.RNNTDecoder + normalization_mode: null # Currently only null is supported for export. + random_state_sampling: false # Random state sampling: https://arxiv.org/pdf/1910.11455.pdf + blank_as_pad: true # This flag must be set in order to support exporting of RNNT models + efficient inference. + + prednet: + pred_hidden: ${model.model_defaults.pred_hidden} + pred_rnn_layers: 1 + t_max: null + dropout: 0.2 + + joint: + _target_: nemo.collections.asr.modules.RNNTJoint + log_softmax: null # 'null' would set it automatically according to CPU/GPU device + preserve_memory: false # dramatically slows down training, but might preserve some memory + + # Fuses the computation of prediction net + joint net + loss + WER calculation + # to be run on sub-batches of size `fused_batch_size`. + # When this flag is set to true, consider the `batch_size` of *_ds to be just `encoder` batch size. + # `fused_batch_size` is the actual batch size of the prediction net, joint net and transducer loss. + # Using small values here will preserve a lot of memory during training, but will make training slower as well. + # An optimal ratio of fused_batch_size : *_ds.batch_size is 1:1. + # However, to preserve memory, this ratio can be 1:8 or even 1:16. + # Extreme case of 1:B (i.e. fused_batch_size=1) should be avoided as training speed would be very slow. + fuse_loss_wer: true + fused_batch_size: 16 + + jointnet: + joint_hidden: ${model.model_defaults.joint_hidden} + activation: "relu" + dropout: 0.2 + + decoding: + strategy: "greedy_batch" # can be greedy, greedy_batch, beam, tsd, alsd. + + # greedy strategy config + greedy: + max_symbols: 10 + + # beam strategy config + beam: + beam_size: 2 + return_best_hypothesis: False + score_norm: true + tsd_max_sym_exp: 50 # for Time Synchronous Decoding + alsd_max_target_len: 2.0 # for Alignment-Length Synchronous Decoding + + loss: + loss_name: "default" + + warprnnt_numba_kwargs: + # FastEmit regularization: https://arxiv.org/abs/2010.11148 + # You may enable FastEmit to reduce the latency of the model for streaming + fastemit_lambda: 0.0 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. + + # Adds Gaussian noise to the gradients of the decoder to avoid overfitting + variational_noise: + start_step: 0 + std: 0.0 + + optim: + name: adamw + lr: 5.0 + # optimizer arguments + betas: [0.9, 0.98] + weight_decay: 1e-3 + + # scheduler setup + sched: + name: NoamAnnealing + d_model: ${model.encoder.d_model} + # scheduler config override + warmup_steps: 10000 + warmup_ratio: null + min_lr: 1e-6 diff --git a/conf/conformer_transducer_bpe_xlarge.yaml b/conf/conformer_transducer_bpe_xlarge.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fa67ef678c7a9f8d4f5152e87e8c430d8f4fcefb --- /dev/null +++ b/conf/conformer_transducer_bpe_xlarge.yaml @@ -0,0 +1,196 @@ +# It contains the default values for training a Conformer-Transducer ASR model, XL size (~0.6B) with Transducer loss and sub-word encoding. + +# You may find more info about Conformer-Transducer here: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#conformer-transducer +# Pre-trained models of Conformer-Transducer can be found here: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/results.html +# The checkpoint of the xlarge model trained on NeMo ASRSET with this recipe can be found here: https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_en_conformer_transducer_xlarge + +name: "Conformer-Transducer-BPE" + +model: + sample_rate: 16000 + compute_eval_loss: false # eval samples can be very long and exhaust memory. Disable computation of transducer loss during validation/testing with this flag. + log_prediction: true # enables logging sample predictions in the output during training + skip_nan_grad: false + + model_defaults: + enc_hidden: ${model.encoder.d_model} + pred_hidden: 640 + joint_hidden: 640 + + train_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 16 # you may increase batch_size if your memory allows + shuffle: true + num_workers: 8 + pin_memory: true + use_start_end_token: false + trim_silence: false + max_duration: 16.7 # it is set for LibriSpeech, you may need to update it for your dataset + min_duration: 0.1 + # tarred datasets + is_tarred: false + tarred_audio_filepaths: null + shuffle_n: 2048 + # bucketing params + bucketing_strategy: "synced_randomized" + bucketing_batch_size: null + + validation_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 16 + shuffle: false + num_workers: 8 + pin_memory: true + use_start_end_token: false + + test_ds: + manifest_filepath: null + sample_rate: ${model.sample_rate} + batch_size: 16 + shuffle: false + num_workers: 8 + pin_memory: true + use_start_end_token: false + + # You may find more detail on how to train a tokenizer at: /scripts/tokenizers/process_asr_text_tokenizer.py + tokenizer: + dir: ??? # path to directory which contains either tokenizer.model (bpe) or vocab.txt (for wpe) + type: bpe # Can be either bpe (SentencePiece tokenizer) or wpe (WordPiece tokenizer) + + preprocessor: + _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor + sample_rate: ${model.sample_rate} + normalize: "per_feature" + window_size: 0.025 + window_stride: 0.01 + window: "hann" + features: 80 + n_fft: 512 + frame_splicing: 1 + dither: 0.00001 + pad_to: 0 + + spec_augment: + _target_: nemo.collections.asr.modules.SpectrogramAugmentation + freq_masks: 2 # set to zero to disable it + time_masks: 10 # set to zero to disable it + freq_width: 27 + time_width: 0.05 + + encoder: + _target_: nemo.collections.asr.modules.ConformerEncoder + feat_in: ${model.preprocessor.features} + feat_out: -1 # you may set it if you need different output size other than the default d_model + n_layers: 24 + d_model: 1024 + + # Sub-sampling params + subsampling: striding # vggnet, striding, stacking or stacking_norm, dw_striding + subsampling_factor: 4 # must be power of 2 for striding and vggnet + subsampling_conv_channels: -1 # set to -1 to make it equal to the d_model + causal_downsampling: false + + # Feed forward module's params + ff_expansion_factor: 4 + + # Multi-headed Attention Module's params + self_attention_model: rel_pos # rel_pos or abs_pos + n_heads: 8 # may need to be lower for smaller d_models + # [left, right] specifies the number of steps to be seen from left and right of each step in self-attention + att_context_size: [-1, -1] # -1 means unlimited context + att_context_style: regular # regular or chunked_limited + xscaling: true # scales up the input embeddings by sqrt(d_model) + untie_biases: true # unties the biases of the TransformerXL layers + pos_emb_max_len: 5000 + + # Convolution module's params + conv_kernel_size: 5 + conv_norm_type: 'batch_norm' # batch_norm or layer_norm or groupnormN (N specifies the number of groups) + # conv_context_size can be"causal" or a list of two integers while conv_context_size[0]+conv_context_size[1]+1==conv_kernel_size + # null means [(kernel_size-1)//2, (kernel_size-1)//2], and 'causal' means [(kernel_size-1), 0] + conv_context_size: null + + ### regularization + dropout: 0.1 # The dropout used in most of the Conformer Modules + dropout_emb: 0.0 # The dropout used for embeddings + dropout_att: 0.1 # The dropout for multi-headed attention modules + + decoder: + _target_: nemo.collections.asr.modules.RNNTDecoder + normalization_mode: null # Currently only null is supported for export. + random_state_sampling: false # Random state sampling: https://arxiv.org/pdf/1910.11455.pdf + blank_as_pad: true # This flag must be set in order to support exporting of RNNT models + efficient inference. + + prednet: + pred_hidden: ${model.model_defaults.pred_hidden} + pred_rnn_layers: 2 + t_max: null + dropout: 0.1 + + joint: + _target_: nemo.collections.asr.modules.RNNTJoint + log_softmax: null # 'null' would set it automatically according to CPU/GPU device + preserve_memory: false # dramatically slows down training, but might preserve some memory + + # Fuses the computation of prediction net + joint net + loss + WER calculation + # to be run on sub-batches of size `fused_batch_size`. + # When this flag is set to true, consider the `batch_size` of *_ds to be just `encoder` batch size. + # `fused_batch_size` is the actual batch size of the prediction net, joint net and transducer loss. + # Using small values here will preserve a lot of memory during training, but will make training slower as well. + # An optimal ratio of fused_batch_size : *_ds.batch_size is 1:1. + # However, to preserve memory, this ratio can be 1:8 or even 1:16. + # Extreme case of 1:B (i.e. fused_batch_size=1) should be avoided as training speed would be very slow. + fuse_loss_wer: true + fused_batch_size: 16 + + jointnet: + joint_hidden: ${model.model_defaults.joint_hidden} + activation: "relu" + dropout: 0.1 + + decoding: + strategy: "greedy_batch" # can be greedy, greedy_batch, beam, tsd, alsd. + + # greedy strategy config + greedy: + max_symbols: 10 + + # beam strategy config + beam: + beam_size: 2 + return_best_hypothesis: False + score_norm: true + tsd_max_sym_exp: 50 # for Time Synchronous Decoding + alsd_max_target_len: 2.0 # for Alignment-Length Synchronous Decoding + + loss: + loss_name: "default" + + warprnnt_numba_kwargs: + # FastEmit regularization: https://arxiv.org/abs/2010.11148 + # You may enable FastEmit to reduce the latency of the model for streaming + fastemit_lambda: 0.0 # Recommended values to be in range [1e-4, 1e-2], 0.001 is a good start. + clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. + + # Adds Gaussian noise to the gradients of the decoder to avoid overfitting + variational_noise: + start_step: 0 + std: 0.0 + + optim: + name: adamw + lr: 5.0 + # optimizer arguments + betas: [0.9, 0.98] + weight_decay: 1e-3 + + # scheduler setup + sched: + name: NoamAnnealing + d_model: ${model.encoder.d_model} + # scheduler config override + warmup_steps: 10000 + warmup_ratio: null + min_lr: 1e-6 diff --git a/conf/contextnet_rnnt.yaml b/conf/contextnet_rnnt.yaml new file mode 100644 index 0000000000000000000000000000000000000000..af00f716d851d817d879365579c90223f3463155 --- /dev/null +++ b/conf/contextnet_rnnt.yaml @@ -0,0 +1,472 @@ +# This config contains the default values for training a modified ContextNet model with Transducer loss and BPE-based vocabulary. +# In contrast to original ContextNet, the same number of filters is used throughout the model. +# Default learning parameters in this config are set for effective batch size of 1k on 32 GPUs. +# To train it with smaller batch sizes, you may need to re-tune the learning parameters or use higher accumulate_grad_batches. + +# It contains the default values for training a ContextNet ASR model, large size (~144M) with Transducer loss and sub-word encoding. + +# Architecture and training config: +# Default learning parameters in this config are set for effective batch size of 1K. To train it with smaller effective +# batch sizes, you may need to re-tune the learning parameters or use higher accumulate_grad_batches. +# Here are the recommended configs for different variants of ContextNet, other parameters are the same as in this config file. +# +# +-------------+---------+------------+ +# | Model | filters | time_masks | +# +=============+=========+============+ +# | Small (14M)| 256 | 2 | +# +-------------+---------+------------+ +# | Medium (40M)| 512 | 5 | +# +-------------+---------+------------+ +# | Large (145M)| 1024 | 10 | +# +------------------------------------- + +name: &name "ContextNet-8x-Stride-RNNT" + +model: + sample_rate: 16000 + compute_eval_loss: false # eval samples can be very long and exhaust memory. Disable computation of transducer loss during validation/testing with this flag. + + train_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 16 # Can be increased if memory allows or when using smaller model + trim_silence: false + max_duration: 16.7 + shuffle: true + use_start_end_token: false + num_workers: 16 + pin_memory: true + # tarred datasets + is_tarred: false + tarred_audio_filepaths: null + tarred_shard_strategy: "scatter" + shuffle_n: 2048 + # bucketing params + bucketing_strategy: "synced_randomized" + bucketing_batch_size: null + validation_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 8 + shuffle: false + use_start_end_token: false + num_workers: 16 + pin_memory: true + + test_ds: + manifest_filepath: null + sample_rate: ${model.sample_rate} + batch_size: 8 + shuffle: false + use_start_end_token: false + num_workers: 16 + pin_memory: true + + model_defaults: + filters: 1024 + repeat: 5 + dropout: 0.1 + separable: true + se: true + se_context_size: -1 + kernel_size_factor: 1.0 + # encoder / decoder / joint values + enc_hidden: 640 + pred_hidden: 640 + joint_hidden: 640 + + tokenizer: + dir: ??? # path to directory which contains either tokenizer.model (bpe) or vocab.txt (for wpe) + type: ??? # Can be either bpe or wpe + + preprocessor: + _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor + sample_rate: ${model.sample_rate} + normalize: "per_feature" + window_size: 0.025 + window_stride: 0.01 + window: "hann" + features: &n_mels 80 + n_fft: 512 + frame_splicing: 1 + dither: 0.00001 + pad_to: 16 + stft_conv: false + + spec_augment: + _target_: nemo.collections.asr.modules.SpectrogramAugmentation + freq_masks: 2 # should be kept at 2 + time_masks: 10 # can be 5 for small-med models, 10 for larger models. + freq_width: 27 + time_width: 0.05 + + encoder: + _target_: nemo.collections.asr.modules.ConvASREncoder + feat_in: *n_mels + activation: swish + conv_mask: true + init_mode: "tds_uniform" + + jasper: + - filters: ${model.model_defaults.filters} + repeat: 1 + kernel: [5] + stride: [1] + dilation: [1] + dropout: 0.0 + residual: false + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [2] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + stride_last: true + residual_mode: "stride_add" + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [2] # *stride + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + stride_last: true + residual_mode: "stride_add" + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [2] # stride + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + stride_last: true + residual_mode: "stride_add" + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: ${model.model_defaults.dropout} + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + - filters: ${model.model_defaults.enc_hidden} + repeat: 1 + kernel: [5] + stride: [1] + dilation: [1] + dropout: 0.0 + residual: false + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + + decoder: + _target_: nemo.collections.asr.modules.RNNTDecoder + normalization_mode: null # Currently only null is supported for export. + random_state_sampling: false # Random state sampling: https://arxiv.org/pdf/1910.11455.pdf + blank_as_pad: true # This flag must be set in order to support exporting of RNNT models + efficient inference. + + prednet: + pred_hidden: ${model.model_defaults.pred_hidden} + pred_rnn_layers: 1 # only 1 layer LSTM networks are exportable. + t_max: null # Maximum possible target seq length used for Chrono Initialization - https://arxiv.org/abs/1804.11188. Disabled by default. + dropout: 0.1 + + joint: + _target_: nemo.collections.asr.modules.RNNTJoint + log_softmax: null # sets it according to cpu/gpu device + preserve_memory: false # dramatically slows down training, but might preserve some memory + + # Fuses the computation of prediction net + joint net + loss + WER calculation + # to be run on sub-batches of size `fused_batch_size`. + # When this flag is set to true, consider the `batch_size` of *_ds to be just `encoder` batch size. + # `fused_batch_size` is the actual batch size of the prediction net, joint net and transducer loss. + # Using small values here will preserve a lot of memory during training, but will make training slower as well. + # An optimal ratio of fused_batch_size : *_ds.batch_size is 1:1. + # However, to preserve memory, this ratio can be 1:8 or even 1:16. + # Extreme case of 1:B (i.e. fused_batch_size=1) should be avoided as training speed would be very slow. + fuse_loss_wer: true + fused_batch_size: 16 + + jointnet: + joint_hidden: ${model.model_defaults.joint_hidden} + activation: "relu" + dropout: 0.1 + + # RNNT decoding strategy + decoding: + strategy: "greedy_batch" # can be greedy, greedy_batch, beam, tsd, alsd. + + # greedy strategy config + greedy: + max_symbols: 10 + + # beam strategy config + beam: + beam_size: 4 + score_norm: true + return_best_hypothesis: False + softmax_temperature: 1.0 # scale the logits by some temperature prior to softmax + tsd_max_sym_exp: 10 # for Time Synchronous Decoding, int > 0 + alsd_max_target_len: 5.0 # for Alignment-Length Synchronous Decoding, float > 1.0 + maes_num_steps: 2 # for modified Adaptive Expansion Search, int > 0 + maes_prefix_alpha: 1 # for modified Adaptive Expansion Search, int > 0 + maes_expansion_beta: 2 # for modified Adaptive Expansion Search, int >= 0 + maes_expansion_gamma: 2.3 # for modified Adaptive Expansion Search, float >= 0 + + # RNNT loss config + loss: + loss_name: "default" + + warprnnt_numba_kwargs: + # FastEmit regularization: https://arxiv.org/abs/2010.11148 + fastemit_lambda: 0.001 # Values can be in range [1e-4, 1e-2]. Generally, 0.001 is good start. + clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. + + optim: + name: novograd + lr: 0.05 + + # optimizer arguments + betas: [0.9, 0.0] + weight_decay: 0.001 + + # scheduler setup + sched: + name: CosineAnnealing + + # scheduler config override + warmup_steps: 5000 + warmup_ratio: null + min_lr: 1e-6 + last_epoch: -1 diff --git a/conf/contextnet_rnnt_dummy.yaml b/conf/contextnet_rnnt_dummy.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d3bc674cb5e83f180d76914bfc5017cda9c02978 --- /dev/null +++ b/conf/contextnet_rnnt_dummy.yaml @@ -0,0 +1,197 @@ +# This config contains the values for training a dummy ContextNet model with Transducer loss and BPE-based vocabulary. +# In contrast to original ContextNet, the same number of filters is used throughout the model. +# To train it with smaller batch sizes, you may need to re-tune the learning parameters or use higher accumulate_grad_batches. + +# It contains the default values for training a ContextNet ASR model, dummy size, with Transducer loss and sub-word encoding. + +name: &name "ContextNet-8x-Stride-RNNT" + +model: + sample_rate: 16000 + compute_eval_loss: false # eval samples can be very long and exhaust memory. Disable computation of transducer loss during validation/testing with this flag. + + train_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 4 # Can be increased if memory allows or when using smaller model + trim_silence: false + max_duration: 16.7 + shuffle: true + use_start_end_token: false + num_workers: 16 + pin_memory: true + # tarred datasets + is_tarred: false + tarred_audio_filepaths: null + tarred_shard_strategy: "scatter" + shuffle_n: 2048 + # bucketing params + bucketing_strategy: "synced_randomized" + bucketing_batch_size: null + validation_ds: + manifest_filepath: ??? + sample_rate: ${model.sample_rate} + batch_size: 8 + shuffle: false + use_start_end_token: false + num_workers: 16 + pin_memory: true + + test_ds: + manifest_filepath: null + sample_rate: ${model.sample_rate} + batch_size: 8 + shuffle: false + use_start_end_token: false + num_workers: 16 + pin_memory: true + + model_defaults: + filters: 64 + repeat: 1 + dropout: 0.1 + separable: true + se: true + se_context_size: -1 + kernel_size_factor: 1.0 + # encoder / decoder / joint values + enc_hidden: 64 + pred_hidden: 64 + joint_hidden: 64 + + tokenizer: + dir: ??? # path to directory which contains either tokenizer.model (bpe) or vocab.txt (for wpe) + type: ??? # Can be either bpe or wpe + + preprocessor: + _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor + sample_rate: ${model.sample_rate} + normalize: "per_feature" + window_size: 0.025 + window_stride: 0.01 + window: "hann" + features: &n_mels 80 + n_fft: 512 + frame_splicing: 1 + dither: 0.00001 + pad_to: 16 + stft_conv: false + + spec_augment: + _target_: nemo.collections.asr.modules.SpectrogramAugmentation + freq_masks: 2 # should be kept at 2 + time_masks: 10 # can be 5 for small-med models, 10 for larger models. + freq_width: 27 + time_width: 0.05 + + encoder: + _target_: nemo.collections.asr.modules.ConvASREncoder + feat_in: *n_mels + activation: swish + conv_mask: true + init_mode: "tds_uniform" + + jasper: + - filters: ${model.model_defaults.filters} + repeat: 1 + kernel: [5] + stride: [1] + dilation: [1] + dropout: 0.0 + residual: false + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + + - filters: ${model.model_defaults.filters} + repeat: ${model.model_defaults.repeat} + kernel: [5] + stride: [1] + dilation: [1] + dropout: 0.0 + residual: true + separable: ${model.model_defaults.separable} + se: ${model.model_defaults.se} + se_context_size: ${model.model_defaults.se_context_size} + kernel_size_factor: ${model.model_defaults.kernel_size_factor} + + decoder: + _target_: nemo.collections.asr.modules.RNNTDecoder + normalization_mode: null # Currently only null is supported for export. + random_state_sampling: false # Random state sampling: https://arxiv.org/pdf/1910.11455.pdf + blank_as_pad: true # This flag must be set in order to support exporting of RNNT models + efficient inference. + + prednet: + pred_hidden: ${model.model_defaults.pred_hidden} + pred_rnn_layers: 1 # only 1 layer LSTM networks are exportable. + t_max: null # Maximum possible target seq length used for Chrono Initialization - https://arxiv.org/abs/1804.11188. Disabled by default. + dropout: 0.1 + + joint: + _target_: nemo.collections.asr.modules.RNNTJoint + log_softmax: null # sets it according to cpu/gpu device + preserve_memory: false # dramatically slows down training, but might preserve some memory + + # Fuses the computation of prediction net + joint net + loss + WER calculation + # to be run on sub-batches of size `fused_batch_size`. + # When this flag is set to true, consider the `batch_size` of *_ds to be just `encoder` batch size. + # `fused_batch_size` is the actual batch size of the prediction net, joint net and transducer loss. + # Using small values here will preserve a lot of memory during training, but will make training slower as well. + # An optimal ratio of fused_batch_size : *_ds.batch_size is 1:1. + # However, to preserve memory, this ratio can be 1:8 or even 1:16. + # Extreme case of 1:B (i.e. fused_batch_size=1) should be avoided as training speed would be very slow. + fuse_loss_wer: true + fused_batch_size: 16 + + jointnet: + joint_hidden: ${model.model_defaults.joint_hidden} + activation: "relu" + dropout: 0.1 + + # RNNT decoding strategy + decoding: + strategy: "greedy_batch" # can be greedy, greedy_batch, beam, tsd, alsd. + + # greedy strategy config + greedy: + max_symbols: 10 + + # beam strategy config + beam: + beam_size: 4 + score_norm: true + return_best_hypothesis: False + softmax_temperature: 1.0 # scale the logits by some temperature prior to softmax + tsd_max_sym_exp: 10 # for Time Synchronous Decoding, int > 0 + alsd_max_target_len: 5.0 # for Alignment-Length Synchronous Decoding, float > 1.0 + maes_num_steps: 2 # for modified Adaptive Expansion Search, int > 0 + maes_prefix_alpha: 1 # for modified Adaptive Expansion Search, int > 0 + maes_expansion_beta: 2 # for modified Adaptive Expansion Search, int >= 0 + maes_expansion_gamma: 2.3 # for modified Adaptive Expansion Search, float >= 0 + + # RNNT loss config + loss: + loss_name: "default" + + warprnnt_numba_kwargs: + # FastEmit regularization: https://arxiv.org/abs/2010.11148 + fastemit_lambda: 0.001 # Values can be in range [1e-4, 1e-2]. Generally, 0.001 is good start. + clamp: -1.0 # if > 0, applies gradient clamping in range [-clamp, clamp] for the joint tensor only. + + optim: + name: novograd + lr: 0.05 + + # optimizer arguments + betas: [0.9, 0.0] + weight_decay: 0.001 + + # scheduler setup + sched: + name: CosineAnnealing + + # scheduler config override + warmup_steps: 5000 + warmup_ratio: null + min_lr: 1e-6 + last_epoch: -1 diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000000000000000000000000000000000000..511fc1b2e1bdcd55e5ca4cd7045f3bad969bb057 --- /dev/null +++ b/eval_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 7.38, + "eval_loss": 8.706663131713867, + "eval_runtime": 970.2156, + "eval_samples": 13098, + "eval_samples_per_second": 13.5, + "eval_steps_per_second": 3.376, + "eval_wer": 0.20430683297635546 +} \ No newline at end of file diff --git a/models/__init__.py b/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..9d0171dd81bc85ee01f5350f1be2ac821d0ae725 --- /dev/null +++ b/models/__init__.py @@ -0,0 +1 @@ +from .modeling_rnnt import RNNTBPEModel \ No newline at end of file diff --git a/models/__pycache__/__init__.cpython-39.pyc b/models/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d12ccd384dae700e115654d53154f7c3f01e65d8 Binary files /dev/null and b/models/__pycache__/__init__.cpython-39.pyc differ diff --git a/models/__pycache__/modeling_rnnt.cpython-39.pyc b/models/__pycache__/modeling_rnnt.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..41eb00910b2d6b6e8cbac3a21fa8812f30d3ed09 Binary files /dev/null and b/models/__pycache__/modeling_rnnt.cpython-39.pyc differ diff --git a/models/modeling_rnnt.py b/models/modeling_rnnt.py new file mode 100644 index 0000000000000000000000000000000000000000..c91a6907d47fb8a85a9df16a116aa857117be5c5 --- /dev/null +++ b/models/modeling_rnnt.py @@ -0,0 +1,115 @@ +from dataclasses import dataclass +from typing import Optional + +import torch +from nemo.collections.asr.models import EncDecRNNTBPEModel +from omegaconf import DictConfig +from transformers.utils import ModelOutput + + +@dataclass +class RNNTOutput(ModelOutput): + """ + Base class for RNNT outputs. + """ + + loss: Optional[torch.FloatTensor] = None + wer: Optional[float] = None + wer_num: Optional[float] = None + wer_denom: Optional[float] = None + + +# Adapted from https://github.com/NVIDIA/NeMo/blob/66c7677cd4a68d78965d4905dd1febbf5385dff3/nemo/collections/asr/models/rnnt_bpe_models.py#L33 +class RNNTBPEModel(EncDecRNNTBPEModel): + def __init__(self, cfg: DictConfig): + super().__init__(cfg=cfg, trainer=None) + + def encoding( + self, input_signal=None, input_signal_length=None, processed_signal=None, processed_signal_length=None + ): + """ + Forward pass of the acoustic model. Note that for RNNT Models, the forward pass of the model is a 3 step process, + and this method only performs the first step - forward of the acoustic model. + + Please refer to the `forward` in order to see the full `forward` step for training - which + performs the forward of the acoustic model, the prediction network and then the joint network. + Finally, it computes the loss and possibly compute the detokenized text via the `decoding` step. + + Please refer to the `validation_step` in order to see the full `forward` step for inference - which + performs the forward of the acoustic model, the prediction network and then the joint network. + Finally, it computes the decoded tokens via the `decoding` step and possibly compute the batch metrics. + + Args: + input_signal: Tensor that represents a batch of raw audio signals, + of shape [B, T]. T here represents timesteps, with 1 second of audio represented as + `self.sample_rate` number of floating point values. + input_signal_length: Vector of length B, that contains the individual lengths of the audio + sequences. + processed_signal: Tensor that represents a batch of processed audio signals, + of shape (B, D, T) that has undergone processing via some DALI preprocessor. + processed_signal_length: Vector of length B, that contains the individual lengths of the + processed audio sequences. + + Returns: + A tuple of 2 elements - + 1) The log probabilities tensor of shape [B, T, D]. + 2) The lengths of the acoustic sequence after propagation through the encoder, of shape [B]. + """ + has_input_signal = input_signal is not None and input_signal_length is not None + has_processed_signal = processed_signal is not None and processed_signal_length is not None + if (has_input_signal ^ has_processed_signal) is False: + raise ValueError( + f"{self} Arguments ``input_signal`` and ``input_signal_length`` are mutually exclusive " + " with ``processed_signal`` and ``processed_signal_len`` arguments." + ) + + if not has_processed_signal: + processed_signal, processed_signal_length = self.preprocessor( + input_signal=input_signal, length=input_signal_length, + ) + + # Spec augment is not applied during evaluation/testing + if self.spec_augmentation is not None and self.training: + processed_signal = self.spec_augmentation(input_spec=processed_signal, length=processed_signal_length) + + encoded, encoded_len = self.encoder(audio_signal=processed_signal, length=processed_signal_length) + return encoded, encoded_len + + def forward(self, input_ids, input_lengths=None, labels=None, label_lengths=None): + # encoding() only performs encoder forward + encoded, encoded_len = self.encoding(input_signal=input_ids, input_signal_length=input_lengths) + del input_ids + + # During training, loss must be computed, so decoder forward is necessary + decoder, target_length, states = self.decoder(targets=labels, target_length=label_lengths) + + # If experimental fused Joint-Loss-WER is not used + if not self.joint.fuse_loss_wer: + # Compute full joint and loss + joint = self.joint(encoder_outputs=encoded, decoder_outputs=decoder) + loss_value = self.loss( + log_probs=joint, targets=labels, input_lengths=encoded_len, target_lengths=target_length + ) + # Add auxiliary losses, if registered + loss_value = self.add_auxiliary_losses(loss_value) + wer = wer_num = wer_denom = None + if not self.training: + self.wer.update(encoded, encoded_len, labels, target_length) + wer, wer_num, wer_denom = self.wer.compute() + self.wer.reset() + + else: + # If experimental fused Joint-Loss-WER is used + # Fused joint step + loss_value, wer, wer_num, wer_denom = self.joint( + encoder_outputs=encoded, + decoder_outputs=decoder, + encoder_lengths=encoded_len, + transcripts=labels, + transcript_lengths=label_lengths, + compute_wer=not self.training, + ) + # Add auxiliary losses, if registered + loss_value = self.add_auxiliary_losses(loss_value) + + return RNNTOutput(loss=loss_value, wer=wer, wer_num=wer_num, wer_denom=wer_denom) diff --git a/process_asr_text_tokenizer.py b/process_asr_text_tokenizer.py new file mode 100644 index 0000000000000000000000000000000000000000..79d0ed64ddbdc7d29c6cf839f24fdd636fd1ce9a --- /dev/null +++ b/process_asr_text_tokenizer.py @@ -0,0 +1,221 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# USAGE: python process_asr_text_tokenizer.py --manifest= \ +# --data_root="" \ +# --vocab_size= \ +# --tokenizer=<"spe" or "wpe"> \ +# --log +# where can be: train_clean_100, train_clean_360, train_other_500 +# You can also put more than one data_set comma-separated: +# --manifest="train_clean_100,train_clean_360,train_other_500" +# or +# python process_asr_text_tokenizer.py --data_file= \ +# --data_root="" \ +# --vocab_size= \ +# --tokenizer=<"bpe" or "wpe"> \ +# --log +# where can be: train_clean_100, train_clean_360, train_other_500 +# You can also put more than one data_set comma-separated: +# --manifest="train_clean_100,train_clean_360,train_other_500" +# +# Args: +# --manifest or --data_file: If your text data lies inside of an ASR manifest file, +# then use the --manifest path. If instead the text data is inside a file with separate lines +# corresponding to different text lines, then use --data_file. +# In either case, you can add commas to concatenate different manifests or different data files. +# +# --data_root: The output directory (whose subdirectories will be created if not present) where +# the tokenizers will be placed. +# +# --vocab_size: The size of the tokenizer vocabulary. Larger vocabularies can accommodate almost entire, +# words but the decoder size of any model will grow proportionally. +# +# --tokenizer: Can be either spe or wpe . spe refers to the Google sentencepiece library tokenizer. +# wpe refers to the HuggingFace BERT Word Piece tokenizer. +# +# --no_lower_case: When this flag is passed, it will force the tokenizer to create seperate tokens for +# upper and lower case characters. By default, the script will turn all the text to lower case +# before tokenization (and if upper case characters are passed during training/inference, the +# tokenizer will emit a token equivalent to Out-Of-Vocabulary). Used primarily for the +# English language. +# +# --spe_type: The sentencepiece library has a few implementations of the tokenization technique, and +# spe_type refers to these implementations. Currently supported types are unigram, bpe, char, word. +# Defaults to bpe. +# +# --spe_character_coverage: The sentencepiece library considers how much of the original vocabulary it +# should cover in its "base set" of tokens (akin to the lower and upper case characters of the +# English language). For almost all languages with small base token sets (<1000 tokens), this +# should be kept at its default of 1.0. For languages with larger vocabularies (say Japanese, +# Mandarin, Korean etc), the suggested value is 0.9995. +# +# --spe_sample_size: If the dataset is too large, consider using a sampled dataset indicated by a +# positive integer. By default, any negative value (default = -1) will use the entire dataset. +# +# --spe_train_extremely_large_corpus: When training a sentencepiece tokenizer on very large amounts of text, +# sometimes the tokenizer will run out of memory or wont be able to process so much data on RAM. +# At some point you might receive the following error - "Input corpus too large, try with +# train_extremely_large_corpus=true". If your machine has large amounts of RAM, it might still be possible +# to build the tokenizer using the above flag. Will silently fail if it runs out of RAM. +# +# --spe_max_sentencepiece_length: Limits the maximum length that any any SentencePiece subword can be. +# Using this will change the subword tokens generated. +# +# --spe_pad: Adds as special token. +# +# --spe_bos: Adds as Begining-of-Sentence special token. +# +# --spe_eos: Adds as End-of-Sentence special token. +# +# --log: Whether the script should display log messages + +import json +import logging +import os + +import tokenizers + +from nemo.collections.common.tokenizers.sentencepiece_tokenizer import create_spt_model + + +def __build_document_from_manifests( + data_root: str, manifests: str, +): + if ',' in manifests: + manifests = manifests.split(',') + else: + manifests = [manifests] + + document_dir = os.path.join(data_root, 'text_corpus') + if not os.path.exists(document_dir): + os.makedirs(document_dir) + + document_path = os.path.join(document_dir, 'document.txt') + + if os.path.exists(document_path): + logging.info('Corpus already exists at path : %s', document_path) + return document_path + + num_lines = 0 + with open(document_path, 'w') as out_writer: + for manifest in manifests: + with open(manifest, 'r') as in_reader: + for line in in_reader: + item = json.loads(line) + text = item['text'] + + out_writer.write(text + '\n') + out_writer.flush() + + num_lines += 1 + + logging.info(f"Finished extracting manifest : {manifest}") + + logging.info("Finished extracting all manifests ! Number of sentences : {}".format(num_lines)) + return document_path + + +def __process_data( + text_path: str, + dst_folder: str, + vocab_size: int, + tokenizer_type: str, + spe_type: str, + spe_character_coverage: float, + spe_train_extremely_large_corpus: bool, + spe_sample_size: int, + spe_max_sentencepiece_length: int, + spe_bos: bool, + spe_eos: bool, + spe_pad: bool, + lower_case: bool, +): + """ + Converts flac to wav and build manifests's json + Args: + text_path: source with text lines + dst_folder: where wav files will be stored + vocab_size: vocabular size used in encoding the text + tokenizer_type: type of tokenization to perform - wpe or spe + spe_type: type of tokenization model used for spe. + spe_character_coverage: float value between 0 and 1 (as a percentage). For languages with a vast charset, + can be < 1.0, but for all other languages, it should be set as 1.0 + spe_sample_size: int, default of -1. If positive integer is used, samples the dataset + by given sample size. + spe_train_extremely_large_corpus: bool. If dataset is too large, and user has sufficient RAM, + this flag can be set to try to trained the tokenizer. Will silently fail if it runs out of RAM. + spe_max_sentencepiece_length: Limits the maximum length of the SentencePiece subword that can be constructed. + By default, no limit is placed. + spe_bos: Bool flag, whether to add to SentencePiece tokenizer vocabulary. + spe_eos: Bool flag, whether to add to SentencePiece tokenizer vocabulary. + spe_pad: Bool flag, whether to add to SentencePiece tokenizer vocabulary. + lower_case: whether to tokenize with lower case character set only (for english) + + Returns: + """ + if tokenizer_type == 'spe': + + # Prepare directory of tokenizer + if spe_max_sentencepiece_length > 0: + tokenizer_dir = os.path.join(dst_folder, 'tokenizer_{}_{}_v{}_max_{}').format( + tokenizer_type, spe_type, vocab_size, spe_max_sentencepiece_length + ) + else: + tokenizer_dir = os.path.join(dst_folder, 'tokenizer_{}_{}_v{}').format( + tokenizer_type, spe_type, vocab_size + ) + + if spe_pad: + tokenizer_dir = f'{tokenizer_dir}_pad' + if spe_bos: + tokenizer_dir = f'{tokenizer_dir}_bos' + if spe_eos: + tokenizer_dir = f'{tokenizer_dir}_eos' + + if not os.path.exists(tokenizer_dir): + os.makedirs(tokenizer_dir) + + if os.path.exists(os.path.join(tokenizer_dir, 'tokenizer.model')): + logging.warning("Model file already exists, overriding old model file !") + os.remove(os.path.join(tokenizer_dir, 'tokenizer.model')) + + # Build tokenizer + tokenizer_path, vocab_path = create_spt_model( + data_file=text_path, + vocab_size=vocab_size, + sample_size=spe_sample_size, + do_lower_case=lower_case, + output_dir=tokenizer_dir, + tokenizer_type=spe_type, + character_coverage=spe_character_coverage, + train_extremely_large_corpus=spe_train_extremely_large_corpus, + max_sentencepiece_length=spe_max_sentencepiece_length, + bos=spe_bos, + eos=spe_eos, + pad=spe_pad, + ) + + else: + tokenizer_dir = os.path.join(dst_folder, 'tokenizer_{}_v{}').format(tokenizer_type, vocab_size) + + if not os.path.exists(tokenizer_dir): + os.makedirs(tokenizer_dir) + + tokenizer = tokenizers.BertWordPieceTokenizer(lowercase=lower_case) + + tokenizer.train(text_path, vocab_size=vocab_size) + tokenizer.save_model(tokenizer_dir) + + return tokenizer_dir diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..fc72682c271fd65eaa048ba560b3b345f38c1588 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +pip install transformers +pip install datasets +pip install jiwer +pip install wandb +pip install soundfile +pip install librosa +pip install bitsandbytes diff --git a/run_ami.sh b/run_ami.sh new file mode 100644 index 0000000000000000000000000000000000000000..cddeaddfd58fdf3765665cdf9cfc22cdb6ea8eba --- /dev/null +++ b/run_ami.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +CUDA_VISIBLE_DEVICES=0 python run_speech_recognition_rnnt.py \ + --config_path="conf/conformer_transducer_bpe_xlarge.yaml" \ + --model_name_or_path="stt_en_conformer_transducer_xlarge" \ + --dataset_name="speech-seq2seq/ami" \ + --tokenizer_path="tokenizer" \ + --vocab_size="1024" \ + --num_train_epochs="7.38" \ + --dataset_config_name="ihm" \ + --train_split_name="train" \ + --eval_split_name="validation" \ + --test_split_name="test" \ + --text_column_name="text" \ + --output_dir="./" \ + --run_name="rnnt-ami-baseline" \ + --wandb_project="rnnt" \ + --per_device_train_batch_size="8" \ + --per_device_eval_batch_size="4" \ + --logging_steps="50" \ + --learning_rate="1e-4" \ + --warmup_steps="500" \ + --save_strategy="steps" \ + --save_steps="20000" \ + --evaluation_strategy="steps" \ + --eval_steps="20000" \ + --report_to="wandb" \ + --preprocessing_num_workers="4" \ + --fused_batch_size="8" \ + --length_column_name="input_lengths" \ + --do_lower_case="False" \ + --fuse_loss_wer \ + --group_by_length \ + --overwrite_output_dir \ + --do_train \ + --do_eval \ + --do_predict \ + --push_to_hub \ + --use_auth_token diff --git a/run_speech_recognition_rnnt.py b/run_speech_recognition_rnnt.py new file mode 100644 index 0000000000000000000000000000000000000000..37bf7e9aa6927633be720b2217f23235f8c57f47 --- /dev/null +++ b/run_speech_recognition_rnnt.py @@ -0,0 +1,935 @@ +#!/usr/bin/env python +# coding=utf-8 +# Copyright 2022 The HuggingFace Team All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +Fine-tuning NVIDIA RNN-T models for speech recognition. +""" +# You can also adapt this script on your own sequence to sequence task. Pointers for this are left as comments. +import copy +import logging +import os +import re +import sys +from dataclasses import dataclass, field + +from tqdm import tqdm +import json +from typing import Optional, Dict, Union, List + +import numpy as np +import torch +import torch.nn as nn + +from omegaconf import OmegaConf, open_dict +from models import RNNTBPEModel +from nemo.core import adapter_mixins +from nemo.collections.common.parts.adapter_modules import LinearAdapterConfig + +import datasets +from datasets import DatasetDict, load_dataset +import transformers +from transformers import ( + HfArgumentParser, + Seq2SeqTrainingArguments, + set_seed, + Trainer, + TrainerCallback, + TrainingArguments, + TrainerState, + TrainerControl, +) +from transformers.trainer_pt_utils import get_parameter_names +from transformers.trainer_utils import get_last_checkpoint, is_main_process +from transformers.utils import check_min_version +from transformers.utils.versions import require_version + +from process_asr_text_tokenizer import __process_data as nemo_process_data, \ + __build_document_from_manifests as nemo_build_document_from_manifests + +import bitsandbytes as bnb + +# Will error if the minimal version of Transformers is not installed. Remove at your own risks. +check_min_version("4.17.0.dev0") + +require_version("datasets>=1.18.0", "To fix: pip install -r examples/pytorch/speech-recognition/requirements.txt") + +logger = logging.getLogger(__name__) + + +@dataclass +class ModelArguments: + """ + Arguments pertaining to which model/config/tokenizer we are going to fine-tune from. + """ + + config_path: str = field( + metadata={"help": "Path to pretrained model or model identifier from huggingface.co/models."}, + ) + model_name_or_path: Optional[str] = field( + default=None, + metadata={"help": "Path to pretrained model or model identifier from NVIDIA NeMo NGC."} + ) + cache_dir: Optional[str] = field( + default=None, + metadata={"help": "Where to store the pretrained models downloaded from huggingface.co or NVIDIA NeMo NGC."}, + ) + use_auth_token: bool = field( + default=False, + metadata={ + "help": "Will use the token generated when running `transformers-cli login` (necessary to use this script " + "with private models)." + }, + ) + manifest_path: str = field( + default="data", + metadata={ + "help": "Manifest path." + }, + ) + tokenizer_path: str = field( + default="tokenizers", + metadata={ + "help": "Tokenizer path." + }, + ) + vocab_size: int = field( + default=1024, + metadata={"help": "Tokenizer vocab size."} + ) + tokenizer_type: str = field( + default="spe", + metadata={ + "help": "Can be either spe or wpe. spe refers to the Google sentencepiece library tokenizer." + "wpe refers to the HuggingFace BERT Word Piece tokenizer." + }, + ) + spe_type: str = field( + default="bpe", + metadata={ + "help": "Type of the SentencePiece model. Can be `bpe`, `unigram`, `char` or `word`." + "Used only if `tokenizer_type` == `spe`" + }, + ) + cutoff_freq: str = field( + default=0.001, + metadata={"help": "Drop the least frequent chars from the train set when building the tokenizer."} + ) + fuse_loss_wer: bool = field( + default=True, + metadata={ + "help": "Whether to fuse the computation of prediction net + joint net + loss + WER calculation to be run " + "on sub-batches of size `fused_batch_size`" + } + ) + fused_batch_size: int = field( + default=8, + metadata={ + "help": "`fused_batch_size` is the actual batch size of the prediction net, joint net and transducer loss." + "Using small values here will preserve a lot of memory during training, but will make training slower as well." + "An optimal ratio of fused_batch_size : per_device_train_batch_size is 1:1." + "However, to preserve memory, this ratio can be 1:8 or even 1:16." + } + ) + final_decoding_strategy: str = field( + default="greedy_batch", + metadata={ + "help": "Decoding strategy for final eval/prediction steps. One of: [`greedy`, `greedy_batch`, `beam`, " + "`tsd`, `alsd`]." + } + ) + final_num_beams: int = field( + default=1, + metadata={ + "help": "Number of beams for final eval/prediction steps. Increase beam size for better scores, " + "but it will take much longer for transcription!" + } + ) + freeze_encoder: bool = field( + default=False, + metadata={"help": "Freeze the acoustic encoder of the model. Recommend when fine-tuning on small datasets."} + ) + unfreeze_encoder: bool = field( + default=False, + metadata={"help": "Unfreeze the acoustic encoder of the model after first evaluation step."} + ) + add_adapter: bool = field( + default=False, + metadata={"help": "Add an adapter layer to the encoder of the model."} + ) + use_adam8bit: bool = field( + default=False, + metadata={"help": "Whether to use bitsandbytes 8bit AdamW optimiser."} + ) + + +@dataclass +class DataTrainingArguments: + """ + Arguments pertaining to what data we are going to input our model for training and eval. + """ + + dataset_name: str = field( + default=None, metadata={"help": "The name of the dataset to use (via the datasets library)."} + ) + dataset_config_name: Optional[str] = field( + default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."} + ) + text_column: Optional[str] = field( + default=None, + metadata={"help": "The name of the column in the datasets containing the full texts (for summarization)."}, + ) + dataset_cache_dir: Optional[str] = field( + default=None, metadata={"help": "Path to cache directory for saving and loading datasets"} + ) + overwrite_cache: bool = field( + default=False, metadata={"help": "Overwrite the cached training and evaluation sets"} + ) + preprocessing_num_workers: Optional[int] = field( + default=None, + metadata={"help": "The number of processes to use for the preprocessing."}, + ) + max_train_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of training examples to this " + "value if set." + }, + ) + max_eval_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of evaluation examples to this " + "value if set." + }, + ) + max_predict_samples: Optional[int] = field( + default=None, + metadata={ + "help": "For debugging purposes or quicker training, truncate the number of test examples to this " + "value if set." + }, + ) + audio_column_name: str = field( + default="audio", + metadata={"help": "The name of the dataset column containing the audio data. Defaults to 'audio'"}, + ) + text_column_name: str = field( + default="text", + metadata={"help": "The name of the dataset column containing the text data. Defaults to 'text'"}, + ) + max_duration_in_seconds: float = field( + default=20.0, + metadata={ + "help": "Truncate training audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + min_duration_in_seconds: float = field( + default=0.0, metadata={"help": "Filter audio files that are shorter than `min_duration_in_seconds` seconds"} + ) + max_eval_duration_in_seconds: float = field( + default=None, + metadata={ + "help": "Truncate eval/test audio files that are longer than `max_duration_in_seconds` seconds to 'max_duration_in_seconds`" + }, + ) + max_target_length: Optional[int] = field( + default=128, + metadata={ + "help": "The maximum total sequence length for target text after tokenization. Sequences longer " + "than this will be truncated, sequences shorter will be padded." + }, + ) + min_target_length: Optional[int] = field( + default=2, + metadata={ + "help": "The minimum total sequence length for target text after tokenization. Sequences shorter " + "than this will be filtered." + }, + ) + preprocessing_only: bool = field( + default=False, + metadata={ + "help": "Whether to only do data preprocessing and skip training. " + "This is especially useful when data preprocessing errors out in distributed training due to timeout. " + "In this case, one should run the preprocessing in a non-distributed setup with `preprocessing_only=True` " + "so that the cached datasets can consequently be loaded in distributed training" + }, + ) + train_split_name: str = field( + default="train", + metadata={ + "help": "The name of the training data set split to use (via the datasets library). Defaults to 'train'" + }, + ) + eval_split_name: str = field( + default="validation", + metadata={ + "help": "The name of the evaluation data set split to use (via the datasets library). Defaults to 'validation'" + }, + ) + test_split_name: str = field( + default="test", + metadata={"help": "The name of the test data set split to use (via the datasets library). Defaults to 'test'"}, + ) + do_lower_case: bool = field( + default=True, + metadata={"help": "Whether the target text should be lower cased."}, + ) + wandb_project: str = field( + default="speech-recognition-rnnt", + metadata={"help": "The name of the wandb project."}, + ) + + +def build_tokenizer(model_args, data_args, manifests): + """ + Function to build a NeMo tokenizer from manifest file(s). + Copied from https://github.com/NVIDIA/NeMo/blob/66c7677cd4a68d78965d4905dd1febbf5385dff3/scripts/tokenizers/process_asr_text_tokenizer.py#L268 + """ + data_root = model_args.tokenizer_path + if isinstance(manifests, list): + joint_manifests = ",".join(manifests) + else: + joint_manifests = manifests + vocab_size = model_args.vocab_size + tokenizer = model_args.tokenizer_type + spe_type = model_args.spe_type + if not 0 <= model_args.cutoff_freq < 1: + raise ValueError(f"`cutoff_freq` must be between zero and one, got {model_args.cutoff_freq}") + spe_character_coverage = 1 - model_args.cutoff_freq + + logger.info("Building tokenizer...") + if not os.path.exists(data_root): + os.makedirs(data_root) + + text_corpus_path = nemo_build_document_from_manifests(data_root, joint_manifests) + + tokenizer_path = nemo_process_data( + text_corpus_path, + data_root, + vocab_size, + tokenizer, + spe_type, + lower_case=data_args.do_lower_case, + spe_character_coverage=spe_character_coverage, + spe_sample_size=-1, + spe_train_extremely_large_corpus=False, + spe_max_sentencepiece_length=-1, + spe_bos=False, + spe_eos=False, + spe_pad=False, + ) + + print("Serialized tokenizer at location :", tokenizer_path) + logger.info('Done!') + + # Tokenizer path + if tokenizer == 'spe': + tokenizer_dir = os.path.join(data_root, f"tokenizer_spe_{spe_type}_v{vocab_size}") + tokenizer_type_cfg = "bpe" + else: + tokenizer_dir = os.path.join(data_root, f"tokenizer_wpe_v{vocab_size}") + tokenizer_type_cfg = "wpe" + + return tokenizer_dir, tokenizer_type_cfg + + +def NeMoDataCollator(features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]: + """ + Data collator that will dynamically pad the inputs received. + Since NeMo models don't have a HF processor defined (feature extractor + tokenizer), we'll pad by hand... + The padding idx is arbitrary: we provide the model with the input lengths and label lengths, from which + all the relevant padding information is inferred. Thus, we'll use the default np.pad padding idx (0). + """ + # split inputs and labels since they have to be of different lengths + # and need different padding methods + input_ids = [feature["input_ids"] for feature in features] + labels = [feature["labels"] for feature in features] + + # first, pad the audio inputs to max_len + input_lengths = [feature["input_lengths"] for feature in features] + max_input_len = max(input_lengths) + input_ids = [np.pad(input_val, (0, max_input_len - input_len), 'constant') for input_val, input_len in + zip(input_ids, input_lengths)] + + # next, pad the target labels to max_len + label_lengths = [len(lab) for lab in labels] + max_label_len = max(label_lengths) + labels = [np.pad(lab, (0, max_label_len - lab_len), 'constant') for lab, lab_len in zip(labels, label_lengths)] + + batch = {"input_lengths": input_lengths, "labels": labels, "label_lengths": label_lengths} + + # return batch as a pt tensor (list -> np.array -> torch.tensor) + batch = {k: torch.tensor(np.array(v), requires_grad=False) for k, v in batch.items()} + + # leave all ints as are, convert float64 to pt float + batch["input_ids"] = torch.tensor(np.array(input_ids, dtype=np.float32), requires_grad=False) + + return batch + + +def main(): + # See all possible arguments in src/transformers/training_args.py + # or by passing the --help flag to this script. + # We now keep distinct sets of args, for a cleaner separation of concerns. + + parser = HfArgumentParser((ModelArguments, DataTrainingArguments, Seq2SeqTrainingArguments)) + if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): + # If we pass only one argument to the script and it's the path to a json file, + # let's parse it to get our arguments. + model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) + else: + model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # Set wandb project ID before instantiating the Trainer + os.environ["WANDB_PROJECT"] = data_args.wandb_project + + # Detecting last checkpoint. + last_checkpoint = None + if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: + last_checkpoint = get_last_checkpoint(training_args.output_dir) + if last_checkpoint is None and len(os.listdir(training_args.output_dir)) > 0: + raise ValueError( + f"Output directory ({training_args.output_dir}) already exists and is not empty. " + "Use --overwrite_output_dir to overcome." + ) + elif last_checkpoint is not None: + logger.info( + f"Checkpoint detected, resuming training at {last_checkpoint}. To avoid this behavior, change " + "the `--output_dir` or add `--overwrite_output_dir` to train from scratch." + ) + + # Setup logging + logging.basicConfig( + format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", + datefmt="%m/%d/%Y %H:%M:%S", + handlers=[logging.StreamHandler(sys.stdout)], + ) + logger.setLevel(logging.INFO if is_main_process(training_args.local_rank) else logging.WARN) + + # Log on each process the small summary: + logger.warning( + f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" + ) + # Set the verbosity to info of the Transformers logger (on main process only): + if is_main_process(training_args.local_rank): + transformers.utils.logging.set_verbosity_info() + logger.info("Training/evaluation parameters %s", training_args) + + # Set seed before initializing model. + set_seed(training_args.seed) + + # load the model config (discarding optimiser and trainer attributes) + config = OmegaConf.load(model_args.config_path).model + + # 4. Load dataset + raw_datasets = DatasetDict() + + if training_args.do_train: + raw_datasets["train"] = load_dataset( + data_args.dataset_name, + data_args.dataset_config_name, + split=data_args.train_split_name, + cache_dir=data_args.dataset_cache_dir, + use_auth_token=True if model_args.use_auth_token else None, + ) + + if training_args.do_eval: + raw_datasets["eval"] = load_dataset( + data_args.dataset_name, + data_args.dataset_config_name, + split=data_args.eval_split_name, + cache_dir=data_args.dataset_cache_dir, + use_auth_token=True if model_args.use_auth_token else None, + ) + + if training_args.do_predict: + test_split = data_args.test_split_name.split("+") + for split in test_split: + raw_datasets[split] = load_dataset( + data_args.dataset_name, + data_args.dataset_config_name, + split=split, + cache_dir=data_args.dataset_cache_dir, + use_auth_token=True if model_args.use_auth_token else None, + ) + + if not training_args.do_train and not training_args.do_eval and not training_args.do_predict: + raise ValueError( + "Cannot not train, not do evaluation and not do prediction. At least one of " + "training, evaluation or prediction has to be done." + ) + + # if not training, there is no need to run multiple epochs + if not training_args.do_train: + training_args.num_train_epochs = 1 + + if data_args.audio_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--audio_column_name '{data_args.audio_column_name}' not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--audio_column_name` to the correct audio column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + if data_args.text_column_name not in next(iter(raw_datasets.values())).column_names: + raise ValueError( + f"--text_column_name {data_args.text_column_name} not found in dataset '{data_args.dataset_name}'. " + "Make sure to set `--text_column_name` to the correct text column - one of " + f"{', '.join(next(iter(raw_datasets.values())).column_names)}." + ) + + # 6. Resample speech dataset ALWAYS + raw_datasets = raw_datasets.cast_column( + data_args.audio_column_name, datasets.features.Audio(sampling_rate=config.sample_rate) + ) + + # 7. Preprocessing the datasets. + # We need to read the audio files as arrays and tokenize the targets. + max_input_length = int(data_args.max_duration_in_seconds * config.sample_rate) + min_input_length = min(int(data_args.min_duration_in_seconds * config.sample_rate), 1) + max_eval_input_length = int(data_args.max_eval_duration_in_seconds * config.sample_rate) if data_args.max_eval_duration_in_seconds else None + max_target_length = data_args.max_target_length + min_target_length = data_args.min_target_length + audio_column_name = data_args.audio_column_name + num_workers = data_args.preprocessing_num_workers + text_column_name = data_args.text_column_name + do_lower_case = data_args.do_lower_case + dataset_name = data_args.dataset_name + + # Define tokens to ignore/replace + tedlium_contractions = [" 's", " 't", " 're", " 've", " 'm", " 'll", " 'd", " 'clock", " 'all"] + gigaspeech_punctuation = {" ": ",", " ": ".", " ": "?", " ": "!"} + gigaspeech_disfluencies = ["", ""] + swb_disfluencies = ["[noise]", "[laughter]", "[silence]", "", "", "", "[laughter-", + "[vocalized-noise]", "_1"] + swb_punctuations = ["{", "}", "[", "]-", "]"] + earnings_disfluencies = ["", "", "", "inaudible", ""] + ignore_segments = ["ignore_time_segment_in_scoring", "", "", "[noise]", "[laughter]", "[silence]", + "[vocalized-noise]", "", "", "", "", "", + "", ""] + + if training_args.do_train and data_args.max_train_samples is not None: + raw_datasets["train"] = raw_datasets["train"].select(range(data_args.max_train_samples)) + + if training_args.do_eval and data_args.max_eval_samples is not None: + raw_datasets["eval"] = raw_datasets["eval"].select(range(data_args.max_eval_samples)) + + if training_args.do_predict and data_args.max_predict_samples is not None: + for split in test_split: + raw_datasets[split] = raw_datasets[split].select(range(data_args.max_predict_samples)) + + # filter data where the targets are ignored in scoring + def is_target_labels(input_str): + return input_str.lower() not in ignore_segments + + raw_datasets = raw_datasets.filter( + is_target_labels, + num_proc=num_workers, + input_columns=[text_column_name], + desc="filtering data where the targets are ignored in scoring", + ) + + def prepare_dataset(batch): + # pre-process audio + try: + sample = batch[audio_column_name] + except ValueError: + # E22: some samples are empty (no audio). Reading the empty audio array will trigger + # a soundfile ValueError. For now, we'll manually set these arrays to a zero array. + # They will be filtered in the subsequent filtering stage and so are + # explicitly ignored during training. + sample = {"array": np.array([0.]), "sampling_rate": config.sampling_rate} + + # NeMo RNNT model performs the audio preprocessing in the `.forward()` call + # => we only need to supply it with the raw audio values + batch["input_ids"] = sample["array"] + batch["input_lengths"] = len(sample["array"]) + + # 'Error correction' of targets + input_str = batch[text_column_name].lower() if do_lower_case else batch[text_column_name] + + # LibriSpeech ASR + if dataset_name == "librispeech_asr": + pass # no error correction necessary + + # VoxPopuli + if dataset_name == "google/xtreme_s": + pass # no error correction necessary + + # Common Voice 9 + if dataset_name == "mozilla-foundation/common_voice_9_0": + if input_str.startswith('"') and input_str.endswith('"'): + # we can remove trailing quotation marks as they do not affect the transcription + input_str = input_str[1:-1] + # replace double quotation marks with single + input_str = input_str.replace('""', '"') + + # TED-LIUM (Release 3) + if dataset_name == "LIUM/tedlium": + # delete the token from the text + input_str = input_str.replace("", "") + # replace spaced apostrophes with un-spaced (it 's -> it's) + for contraction in tedlium_contractions: + input_str = input_str.replace(contraction, contraction[1:]) + + # GigaSpeech + if dataset_name == "speechcolab/gigaspeech": + for disfluency in gigaspeech_disfluencies: + input_str = input_str.replace(disfluency, "") + # convert spelled out punctuation to symbolic form + for punctuation, replacement in gigaspeech_punctuation.items(): + input_str = input_str.replace(punctuation, replacement) + + # SWB: hide the path to the private HF dataset + if "switchboard" in dataset_name: + for disfluency in swb_disfluencies: + input_str = input_str.replace(disfluency, "") + # remove parenthesised text (test data only) + input_str = re.sub("[\(].*?[\)]", "", input_str) + for punctuation in swb_punctuations: + input_str = input_str.replace(punctuation, "") + # replace anomalous words with their correct transcriptions + split_str = input_str.split("/") + if len(split_str) > 1: + input_str = " ".join( + [" ".join([" ".join(i.split(" ")[:-1]) for i in split_str])] + [split_str[-1].split(" ")[-1]]) + + # Earnings 22: still figuring out best segmenting method. Thus, dataset name subject to change + if "earnings22" in dataset_name: + for disfluency in earnings_disfluencies: + input_str = input_str.replace(disfluency, "") + + # SPGISpeech + if dataset_name == "kensho/spgispeech": + pass # no error correction necessary + + # JIWER compliance (for WER/CER calc.) + # remove multiple spaces + input_str = re.sub(r"\s\s+", " ", input_str) + # strip trailing spaces + input_str = input_str.strip() + + # We can't currently tokenize the dataset... we need the pre-processed text data in order to + # build our SPE tokenizer. Once we've defined our tokenizer, we can come back and + # tokenize the text. For now, just return the pre-processed text data + batch[text_column_name] = input_str + return batch + + vectorized_datasets = raw_datasets.map( + prepare_dataset, + num_proc=num_workers, + desc="preprocess train dataset", + ) + + # filter training data with inputs shorter than min_input_length or longer than max_input_length + def is_audio_in_length_range(length): + return length > min_input_length and length < max_input_length + + if training_args.do_train: + vectorized_datasets["train"] = vectorized_datasets["train"].filter( + is_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_lengths"], + ) + + if max_eval_input_length is not None: + # filter training data with inputs longer than max_input_length + def is_eval_audio_in_length_range(length): + return min_input_length < length < max_eval_input_length + + vectorized_datasets = vectorized_datasets.filter( + is_eval_audio_in_length_range, + num_proc=num_workers, + input_columns=["input_length"], + ) + + def is_labels_non_zero(transcription): + return len(transcription) > 0 + + vectorized_datasets = vectorized_datasets.filter( + is_labels_non_zero, + num_proc=num_workers, + input_columns=[text_column_name], + ) + + # for large datasets it is advised to run the preprocessing on a + # single machine first with `args.preprocessing_only` since there will mostly likely + # be a timeout when running the script in distributed mode. + # In a second step `args.preprocessing_only` can then be set to `False` to load the + # cached dataset + if data_args.preprocessing_only: + cache = {k: v.cache_files for k, v in vectorized_datasets.items()} + logger.info(f"Data preprocessing finished. Files cached at {cache}.") + return + + # Function to build a NeMo tokenizer manifest from a HF dataset + # TODO: with a bit of hacking around we can probably bypass this step entirely + def build_manifest(ds, manifest_path): + with open(manifest_path, 'w') as fout: + for sample in tqdm(ds[text_column_name]): + # Write the metadata to the manifest + metadata = { + "text": sample + } + json.dump(metadata, fout) + fout.write('\n') + + config.train_ds = config.validation_ds = config.test_ds = None + + if not os.path.exists(model_args.manifest_path) and training_args.do_train: + os.makedirs(model_args.manifest_path) + manifest = os.path.join(model_args.manifest_path, "train.json") + logger.info(f"Building training manifest at {manifest}") + build_manifest(vectorized_datasets["train"], manifest) + else: + manifest = os.path.join(model_args.manifest_path, "train.json") + logger.info(f"Re-using training manifest at {manifest}") + + tokenizer_dir, tokenizer_type_cfg = build_tokenizer(model_args, data_args, manifest) + + # generalise the script later to load a pre-built tokenizer for eval only + config.tokenizer.dir = tokenizer_dir + config.tokenizer.type = tokenizer_type_cfg + + if model_args.add_adapter: + # Utility method to check and update the model config + def update_model_config_to_support_adapter(model_cfg): + with open_dict(model_cfg): + adapter_metadata = adapter_mixins.get_registered_adapter(model_cfg.encoder._target_) + if adapter_metadata is not None: + model_cfg.encoder._target_ = adapter_metadata.adapter_class_path + + logging.info("Updated encoder _target_ model :", model_cfg.encoder._target_) + return model_cfg + + config = update_model_config_to_support_adapter(config) + + # possibly fused-computation of prediction net + joint net + loss + WER calculation + config.joint.fuse_loss_wer = model_args.fuse_loss_wer + if model_args.fuse_loss_wer: + config.joint.fused_batch_size = model_args.fused_batch_size + + if model_args.model_name_or_path is not None: + # load pre-trained model weights + model = RNNTBPEModel.from_pretrained(model_args.model_name_or_path, override_config_path=config, map_location="cpu") + model.save_name = model_args.model_name_or_path + + pretrained_decoder = model.decoder.state_dict() + pretrained_joint = model.joint.state_dict() + model.change_vocabulary(new_tokenizer_dir=tokenizer_dir, new_tokenizer_type=tokenizer_type_cfg) + + # TODO: add checks for loading decoder/joint state dict + model.decoder.load_state_dict(pretrained_decoder) + model.joint.load_state_dict(pretrained_joint) + + else: + model = RNNTBPEModel(cfg=config) + model.save_name = model_args.config_path.split("/")[-1].split(".")[0] + model.change_vocabulary(new_tokenizer_dir=tokenizer_dir, new_tokenizer_type=tokenizer_type_cfg) + + if model_args.add_adapter: + adapter_name = model_args.config_path.split("/")[-1].split(".")[0] + adapter_dim = model.cfg.encoder.d_model + adapter_activation = "swish" + adapter_norm_position = "post" + adapter_cfg = LinearAdapterConfig( + in_features=model.cfg.encoder.d_model, + # conformer specific model dim. Every layer emits this dim at its output. + dim=adapter_dim, # the bottleneck dimension of the adapter + activation=adapter_activation, # activation used in bottleneck block + norm_position=adapter_norm_position, # whether to use LayerNorm at the beginning or the end of the adapter + ) + logger.info("Adapter config: ", adapter_cfg) + model.add_adapter(name=adapter_name, cfg=adapter_cfg) + model.set_enabled_adapters(enabled=False) # disable all adapters + model.set_enabled_adapters(name=adapter_name, enabled=True) # enable only the current adapter we want to train + + def enable_bn(m): + if type(m) == nn.BatchNorm1d: + m.train() + for param in m.parameters(): + param.requires_grad_(True) + + if model_args.freeze_encoder: + model.encoder.freeze() + model.encoder.apply(enable_bn) + logging.info("Model encoder has been frozen, and batch normalization has been unfrozen") + + if model_args.add_adapter: + model.unfreeze_enabled_adapters() + logging.info("Model adapter has been unfrozen") + + # now that we have our model and tokenizer defined, we can tokenize the text data + tokenizer = model.tokenizer.tokenizer.encode_as_ids + + def tokenize_transcripts(batch): + batch["labels"] = tokenizer(batch[text_column_name]) + return batch + + vectorized_datasets = vectorized_datasets.map(tokenize_transcripts, num_proc=num_workers, + desc="Tokenizing datasets...", + remove_columns=next(iter(raw_datasets.values())).column_names) + + def compute_metrics(pred): + # Tuple of WERs returned by the model during eval: (wer, wer_num, wer_denom) + wer_num = pred.predictions[1] + wer_denom = pred.predictions[2] + # compute WERs over concat batches + wer = sum(wer_num) / sum(wer_denom) + return {"wer": wer} + + class UnfreezeEncoderCallback(TrainerCallback): + def on_evaluate(self, args: TrainingArguments, state: TrainerState, control: TrainerControl, **kwargs): + model.encoder.unfreeze() + print("Model encoder has been unfrozen") + + class NeMoTrainer(Trainer): + def _save(self, output_dir: Optional[str] = None, state_dict=None): + # If we are executing this function, we are the process zero, so we don't check for that. + output_dir = output_dir if output_dir is not None else self.args.output_dir + os.makedirs(output_dir, exist_ok=True) + logger.info(f"Saving model checkpoint to {output_dir}") + # Save a trained model and configuration using `save_pretrained()`. + # They can then be reloaded using `from_pretrained()` + self.model.save_to(save_path=os.path.join(output_dir, model.save_name + ".nemo")) + # Good practice: save your training arguments together with the trained model + torch.save(self.args, os.path.join(output_dir, "training_args.bin")) + + # Initialize Trainer + trainer = NeMoTrainer( + model=model, + args=training_args, + compute_metrics=compute_metrics, + train_dataset=vectorized_datasets['train'] if training_args.do_train else None, + eval_dataset=vectorized_datasets['eval'] if training_args.do_eval else None, + data_collator=NeMoDataCollator, + callbacks=[UnfreezeEncoderCallback] if model_args.unfreeze_encoder else None, + ) + + # 8. Finally, we can start training + + # Training + if training_args.do_train: + + # use last checkpoint if exist + if last_checkpoint is not None: + checkpoint = last_checkpoint + elif model_args.model_name_or_path is not None and os.path.isdir(model_args.model_name_or_path): + checkpoint = model_args.model_name_or_path + else: + checkpoint = None + + train_result = trainer.train(resume_from_checkpoint=checkpoint) + trainer.save_model() + + metrics = train_result.metrics + max_train_samples = ( + data_args.max_train_samples + if data_args.max_train_samples is not None + else len(vectorized_datasets["train"]) + ) + metrics["train_samples"] = min(max_train_samples, len(vectorized_datasets["train"])) + + trainer.log_metrics("train", metrics) + trainer.save_metrics("train", metrics) + trainer.save_state() + + # Change decoding strategy for final eval/predict + if training_args.do_eval or training_args.do_predict: + # set beam search decoding config + beam_decoding_config = copy.deepcopy(trainer.model.cfg.decoding) + beam_decoding_config.strategy = model_args.final_decoding_strategy + beam_decoding_config.beam.beam_size = model_args.final_num_beams + + trainer.model.change_decoding_strategy(beam_decoding_config) + + results = {} + if training_args.do_eval: + logger.info(f"*** Running Final Evaluation ({model_args.final_decoding_strategy}) ***") + + metrics = trainer.evaluate() + max_eval_samples = ( + data_args.max_eval_samples if data_args.max_eval_samples is not None else len(vectorized_datasets["eval"]) + ) + metrics["eval_samples"] = min(max_eval_samples, len(vectorized_datasets["eval"])) + + trainer.log_metrics("eval", metrics) + trainer.save_metrics("eval", metrics) + + if training_args.do_predict: + logger.info(f"*** Running Final Prediction ({model_args.final_decoding_strategy}) ***") + + for split in test_split: + predict_results = trainer.predict( + vectorized_datasets[split], metric_key_prefix=split, ) + metrics = predict_results.metrics + max_predict_samples = ( + data_args.max_predict_samples if data_args.max_predict_samples is not None else len(vectorized_datasets[split]) + ) + metrics[f"{split}_samples"] = min(max_predict_samples, len(vectorized_datasets[split])) + + trainer.log_metrics(split, metrics) + trainer.save_metrics(split, metrics) + + if "wandb" in training_args.report_to: + import wandb + metrics = {os.path.join(split, k[len(split)+1:]): v for k, v in metrics.items()} + wandb.log(metrics) + + # re-evaluate on the test set, this time computing the CER + # this is pretty wasteful to run eval twice, but very fast to implement + trainer.model.wer.use_cer = True + trainer.model.change_decoding_strategy(trainer.model.cfg.decoding) + + for split in test_split: + predict_results = trainer.predict( + vectorized_datasets[split], metric_key_prefix=split, ) + metrics = predict_results.metrics + # the returned metric is the CER, but under an erroneous key; we swap them here + metrics = {f"{split}_cer": metrics[f"{split}_wer"]} + + trainer.log_metrics(split, metrics) + trainer.save_metrics(split, metrics) + + if "wandb" in training_args.report_to: + metrics = {os.path.join(split, k[len(split) + 1:]): v for k, v in metrics.items()} + wandb.log(metrics) + + # Write model card and (optionally) push to hub + config_name = data_args.dataset_config_name if data_args.dataset_config_name is not None else "na" + kwargs = { + "finetuned_from": model_args.model_name_or_path, + "tasks": "speech-recognition", + "tags": ["automatic-speech-recognition", data_args.dataset_name], + "dataset_args": ( + f"Config: {config_name}, Training split: {data_args.train_split_name}, Eval split:" + f" {data_args.eval_split_name}" + ), + "dataset": f"{data_args.dataset_name.upper()} - {config_name.upper()}", + } + if "common_voice" in data_args.dataset_name: + kwargs["language"] = config_name + + if training_args.push_to_hub: + trainer.push_to_hub(**kwargs) + #else: + #trainer.create_model_card(**kwargs) + + return results + + +if __name__ == "__main__": + main() diff --git a/scripts/run_batch_size_sweep.yaml b/scripts/run_batch_size_sweep.yaml new file mode 100644 index 0000000000000000000000000000000000000000..98c5961c10a2df8e57f3e53ea72b4feda6c3278c --- /dev/null +++ b/scripts/run_batch_size_sweep.yaml @@ -0,0 +1,61 @@ +command: + - python3 + - ${program} + - --use_auth_token + - --do_eval + - --group_by_length + - --overwrite_output_dir + - --fp16 + - --do_lower_case + - --do_eval + - --do_train + - --fuse_loss_wer + - ${args} +method: grid +metric: + goal: minimize + name: train/train_loss +parameters: + config_path: + value: conf/conformer_transducer_bpe_xlarge.yaml + dataset_config_name: + value: clean + dataset_name: + value: librispeech_asr + max_steps: + value: 50 + model_name_or_path: + value: stt_en_conformer_transducer_xlarge + output_dir: + value: ./sweep_output_dir + gradient_accumulation_steps: + values: + - 1 + - 2 + per_device_train_batch_size: + values: + - 8 + - 16 + fused_batch_size: + values: + - 4 + - 8 + - 16 + per_device_eval_batch_size: + value: 4 + preprocessing_num_workers: + value: 1 + train_split_name: + value: train.100[:500] + eval_split_name: + value: validation[:100] + tokenizer_path: + value: tokenizer + vocab_size: + value: 1024 + wandb_project: + value: rnnt-debug + logging_steps: + value: 5 +program: run_speech_recognition_rnnt.py +project: rnnt-debug \ No newline at end of file diff --git a/scripts/run_common_voice_9.sh b/scripts/run_common_voice_9.sh new file mode 100644 index 0000000000000000000000000000000000000000..a6c590f24e5a1db1a1652dfb0d1382dc9111218c --- /dev/null +++ b/scripts/run_common_voice_9.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +CUDA_VISIBLE_DEVICES=1 python run_speech_recognition_rnnt.py \ + --config_path="conf/conformer_transducer_bpe_xlarge.yaml" \ + --model_name_or_path="stt_en_conformer_transducer_xlarge" \ + --dataset_name="mozilla-foundation/common_voice_9_0" \ + --tokenizer_path="tokenizer" \ + --vocab_size="1024" \ + --num_train_epochs="0.90" \ + --dataset_config_name="en" \ + --train_split_name="train" \ + --eval_split_name="validation" \ + --test_split_name="test" \ + --text_column_name="sentence" \ + --output_dir="./conformer-transducer-xl-cv9" \ + --run_name="rnnt-cv9-baseline" \ + --wandb_project="rnnt" \ + --per_device_train_batch_size="8" \ + --per_device_eval_batch_size="4" \ + --logging_steps="50" \ + --learning_rate="1e-4" \ + --warmup_steps="500" \ + --save_strategy="steps" \ + --save_steps="20000" \ + --evaluation_strategy="steps" \ + --eval_steps="20000" \ + --report_to="wandb" \ + --preprocessing_num_workers="4" \ + --fused_batch_size="8" \ + --length_column_name="input_lengths" \ + --do_lower_case="False" \ + --fuse_loss_wer \ + --group_by_length \ + --overwrite_output_dir \ + --do_train \ + --do_eval \ + --do_predict \ + --push_to_hub \ + --use_auth_token diff --git a/scripts/run_dummy.sh b/scripts/run_dummy.sh new file mode 100644 index 0000000000000000000000000000000000000000..dde05ef9ede8d43f94652400096d8fa44ea9a5f9 --- /dev/null +++ b/scripts/run_dummy.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +CUDA_VISIBLE_DEVICES=1 python run_speech_recognition_rnnt.py \ + --config_path="conf/conformer_transducer_bpe_dummy.yaml" \ + --dataset_name="hf-internal-testing/librispeech_asr_dummy" \ + --tokenizer_path="tokenizer" \ + --vocab_size="1024" \ + --num_train_epochs="1" \ + --evaluation_strategy="epoch" \ + --dataset_config_name="clean" \ + --train_split_name="validation[:32]" \ + --eval_split_name="validation" \ + --test_split_name="validation[:90%]" \ + --text_column_name="text" \ + --output_dir="./output_dir" \ + --run_name="rnnt-ls-dummy" \ + --wandb_project="rnnt-dummy" \ + --per_device_train_batch_size="8" \ + --per_device_eval_batch_size="4" \ + --logging_steps="1" \ + --learning_rate="1e-4" \ + --warmup_steps="3" \ + --report_to="wandb" \ + --push_to_hub="False" \ + --preprocessing_num_workers="4" \ + --evaluation_strategy="epoch" \ + --max_eval_samples="8" \ + --max_predict_samples="8" \ + --final_num_beams="1" \ + --length_column_name="input_lengths" \ + --save_strategy="epoch" \ + --group_by_length \ + --overwrite_output_dir \ + --fp16 \ + --freeze_encoder \ + --do_lower_case \ + --do_train \ + --do_eval \ + --do_predict \ diff --git a/scripts/run_earnings22.sh b/scripts/run_earnings22.sh new file mode 100644 index 0000000000000000000000000000000000000000..ebca9b7a02f71e9f9d5a34b4aa778a9a2712905f --- /dev/null +++ b/scripts/run_earnings22.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash +CUDA_VISIBLE_DEVICES=1 python run_speech_recognition_rnnt.py \ + --config_path="conf/conformer_transducer_bpe_xlarge.yaml" \ + --model_name_or_path="stt_en_conformer_transducer_xlarge" \ + --dataset_name="sanchit-gandhi/earnings22_robust_split" \ + --tokenizer_path="tokenizer" \ + --vocab_size="1024" \ + --max_steps="10000" \ + --dataset_config_name="all" \ + --train_split_name="train" \ + --eval_split_name="validation" \ + --test_split_name="test" \ + --text_column_name="sentence" \ + --output_dir="./" \ + --run_name="rnnt-earnings22-1e-2-freeze-encoder-weight-decay" \ + --wandb_project="rnnt-debug-earnings22" \ + --per_device_train_batch_size="8" \ + --per_device_eval_batch_size="4" \ + --logging_steps="25" \ + --learning_rate="1e-2" \ + --warmup_steps="500" \ + --save_strategy="no" \ + --evaluation_strategy="steps" \ + --eval_steps="2000" \ + --report_to="wandb" \ + --preprocessing_num_workers="4" \ + --fused_batch_size="8" \ + --length_column_name="input_lengths" \ + --final_decoding_strategy="beam" \ + --final_num_beams="4" \ + --weight_decay="1e-3" \ + --freeze_encoder \ + --fuse_loss_wer \ + --group_by_length \ + --overwrite_output_dir \ + --do_lower_case \ + --do_train \ + --do_eval \ + --use_auth_token diff --git a/scripts/run_learning_rate_sweep.yaml b/scripts/run_learning_rate_sweep.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6297a39a7e1ef650efb2035dd39e19a2c944a88f --- /dev/null +++ b/scripts/run_learning_rate_sweep.yaml @@ -0,0 +1,58 @@ +command: + - python3 + - ${program} + - --use_auth_token + - --do_eval + - --group_by_length + - --overwrite_output_dir + - --fp16 + - --do_lower_case + - --do_eval + - --do_train + - --fuse_loss_wer + - ${args} +method: grid +metric: + goal: minimize + name: train/loss +parameters: + config_path: + value: conf/conformer_transducer_bpe_xlarge.yaml + dataset_config_name: + value: clean + dataset_name: + value: librispeech_asr + max_steps: + value: 50 + model_name_or_path: + value: stt_en_conformer_transducer_xlarge + output_dir: + value: ./sweep_output_dir + per_device_train_batch_size: + value: 8 + fused_batch_size: + value: 8 + per_device_eval_batch_size: + value: 4 + gradient_accumulation_steps: + value: 1 #TBD. + grad checkpointing? + preprocessing_num_workers: + value: 1 + train_split_name: + value: train.100 + eval_split_name: + value: validation + tokenizer_path: + value: tokenizer + vocab_size: + value: 1024 + learning_rate: + values: + - 3e-4 + - 1e-4 + - 3e-5 + - 1e-5 + num_train_epochs: + value: 3 +program: run_speech_recognition_rnnt.py +project: rnnt-debug \ No newline at end of file diff --git a/scripts/run_librispeech.sh b/scripts/run_librispeech.sh new file mode 100644 index 0000000000000000000000000000000000000000..0e06499a10be2bf497e1d02a455ede7970140ecc --- /dev/null +++ b/scripts/run_librispeech.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +CUDA_VISIBLE_DEVICES=0 python run_speech_recognition_rnnt.py \ + --config_path="conf/conformer_transducer_bpe_xlarge.yaml" \ + --model_name_or_path="stt_en_conformer_transducer_xlarge" \ + --dataset_name="librispeech_asr" \ + --tokenizer_path="tokenizer" \ + --vocab_size="1024" \ + --num_train_epochs="2.84" \ + --dataset_config_name="all" \ + --train_split_name="train.clean.100+train.clean.360+train.other.500" \ + --eval_split_name="validation.clean" \ + --test_split_name="test.clean+test.other" \ + --text_column_name="text" \ + --output_dir="./conformer-transducer-xl-ls-960h" \ + --run_name="rnnt-ls-960h-baseline" \ + --wandb_project="rnnt" \ + --per_device_train_batch_size="8" \ + --per_device_eval_batch_size="4" \ + --logging_steps="50" \ + --learning_rate="1e-4" \ + --warmup_steps="500" \ + --save_strategy="steps" \ + --save_steps="20000" \ + --evaluation_strategy="steps" \ + --eval_steps="20000" \ + --report_to="wandb" \ + --preprocessing_num_workers="4" \ + --fused_batch_size="8" \ + --length_column_name="input_lengths" \ + --fuse_loss_wer \ + --group_by_length \ + --overwrite_output_dir \ + --do_lower_case \ + --do_train \ + --do_eval \ + --do_predict \ + --push_to_hub \ + --use_auth_token diff --git a/scripts/run_spgispeech.sh b/scripts/run_spgispeech.sh new file mode 100644 index 0000000000000000000000000000000000000000..5cbdf2a5c8fe010453df8f2a8e3dd3d9864f23be --- /dev/null +++ b/scripts/run_spgispeech.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +CUDA_VISIBLE_DEVICES=1 python run_speech_recognition_rnnt.py \ + --config_path="conf/conformer_transducer_bpe_xlarge.yaml" \ + --model_name_or_path="stt_en_conformer_transducer_xlarge" \ + --dataset_name="kensho/spgispeech" \ + --tokenizer_path="tokenizer" \ + --vocab_size="1024" \ + --num_train_epochs="3" \ + --dataset_config_name="S" \ + --train_split_name="train" \ + --eval_split_name="validation" \ + --test_split_name="test" \ + --text_column_name="transcript" \ + --output_dir="./" \ + --run_name="rnnt-spgispeech-1e-3-freeze-encoder-unfreeze-encoder" \ + --wandb_project="rnnt-debug-spgispeech" \ + --per_device_train_batch_size="8" \ + --per_device_eval_batch_size="4" \ + --logging_steps="25" \ + --learning_rate="1e-3" \ + --warmup_steps="500" \ + --save_strategy="no" \ + --evaluation_strategy="epoch" \ + --report_to="wandb" \ + --preprocessing_num_workers="4" \ + --fused_batch_size="8" \ + --length_column_name="input_lengths" \ + --final_decoding_strategy="greedy_batch" \ + --final_num_beams="1" \ + --do_lower_case="False" \ + --freeze_encoder \ + --fuse_loss_wer \ + --group_by_length \ + --overwrite_output_dir \ + --do_train \ + --do_eval \ + --use_auth_token diff --git a/scripts/run_switchboard.sh b/scripts/run_switchboard.sh new file mode 100644 index 0000000000000000000000000000000000000000..d6395123c5ddc5e11fa0ec252fd5714635d3dee1 --- /dev/null +++ b/scripts/run_switchboard.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +CUDA_VISIBLE_DEVICES=1 python run_speech_recognition_rnnt.py \ + --config_path="conf/conformer_transducer_bpe_xlarge.yaml" \ + --model_name_or_path="stt_en_conformer_transducer_xlarge" \ + --dataset_name="ldc/switchboard" \ + --tokenizer_path="tokenizer" \ + --vocab_size="1024" \ + --num_train_epochs="14" \ + --evaluation_strategy="epoch" \ + --dataset_config_name="switchboard" \ + --train_split_name="train" \ + --eval_split_name="validation" \ + --test_split_name="test" \ + --text_column_name="text" \ + --output_dir="./" \ + --run_name="rnnt-switchboard-baseline" \ + --wandb_project="rnnt" \ + --per_device_train_batch_size="8" \ + --per_device_eval_batch_size="4" \ + --logging_steps="25" \ + --learning_rate="1e-4" \ + --warmup_steps="2000" \ + --save_steps="200000" \ + --evaluation_strategy="steps" \ + --eval_steps="80000" \ + --report_to="wandb" \ + --preprocessing_num_workers="4" \ + --fused_batch_size="8" \ + --length_column_name="input_lengths" \ + --fuse_loss_wer \ + --group_by_length \ + --overwrite_output_dir \ + --fp16 \ + --do_lower_case \ + --do_train \ + --do_eval \ + --do_predict \ + --push_to_hub \ + --freeze_encoder \ + --use_auth_token diff --git a/scripts/run_tedlium.sh b/scripts/run_tedlium.sh new file mode 100644 index 0000000000000000000000000000000000000000..a83c4b6ee9f250116dda50cc423f7fee3d34a5dc --- /dev/null +++ b/scripts/run_tedlium.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +CUDA_VISIBLE_DEVICES=1 python run_speech_recognition_rnnt.py \ + --config_path="conf/conformer_transducer_bpe_xlarge.yaml" \ + --model_name_or_path="stt_en_conformer_transducer_xlarge" \ + --dataset_name="LIUM/tedlium" \ + --tokenizer_path="tokenizer" \ + --vocab_size="1024" \ + --num_train_epochs="2.98" \ + --dataset_config_name="release3" \ + --train_split_name="train" \ + --eval_split_name="validation" \ + --test_split_name="test" \ + --text_column_name="text" \ + --output_dir="./conformer-transducer-xl-tedlium" \ + --run_name="rnnt-tedlium-baseline" \ + --wandb_project="rnnt" \ + --per_device_train_batch_size="8" \ + --per_device_eval_batch_size="4" \ + --logging_steps="50" \ + --learning_rate="1e-4" \ + --warmup_steps="500" \ + --save_strategy="steps" \ + --save_steps="20000" \ + --evaluation_strategy="steps" \ + --eval_steps="20000" \ + --report_to="wandb" \ + --preprocessing_num_workers="4" \ + --fused_batch_size="8" \ + --length_column_name="input_lengths" \ + --fuse_loss_wer \ + --group_by_length \ + --overwrite_output_dir \ + --do_lower_case \ + --do_train \ + --do_eval \ + --do_predict \ + --push_to_hub \ + --use_auth_token diff --git a/scripts/run_tedlium_sweep.yaml b/scripts/run_tedlium_sweep.yaml new file mode 100644 index 0000000000000000000000000000000000000000..87bf1dea71e1032bbe51b6f3b9cb817b2191a9d5 --- /dev/null +++ b/scripts/run_tedlium_sweep.yaml @@ -0,0 +1,82 @@ +command: + - python3 + - ${program} + - --use_auth_token + - --group_by_length + - --overwrite_output_dir + - --fp16 + - --do_lower_case + - --do_eval + - --do_train + - --fuse_loss_wer + - ${args} +method: random +metric: + goal: minimize + name: eval/wer +parameters: + config_path: + value: conf/conformer_transducer_bpe_xlarge.yaml + dataset_config_name: + value: release3 + dataset_name: + value: LIUM/tedlium + eval_split_name: + value: validation + evaluation_strategy: + value: steps + eval_steps: + value: 2000 + fused_batch_size: + value: 8 + learning_rate: + values: + - 1e-1 + - 3e-2 + - 1e-2 + - 3e-3 + - 1e-3 + - 3e-4 + - 1e-4 + logging_steps: + value: 25 + model_name_or_path: + value: stt_en_conformer_transducer_xlarge + max_steps: + value: 8000 + output_dir: + value: ./sweep_output_dir + per_device_eval_batch_size: + value: 4 + per_device_train_batch_size: + value: 8 + preprocessing_num_workers: + value: 4 + save_strategy: + value: "no" + tokenizer_path: + value: tokenizer + train_split_name: + value: train + vocab_size: + value: 1024 + warmup_steps: + value: 500 + wandb_project: + value: rnnt-debug-tedlium + freeze_encoder: + values: + - true + - false + add_adapter: + values: + - true + - false + unfreeze_encoder: + values: + - true + - false + length_column_name: + value: input_lengths +program: run_speech_recognition_rnnt.py +project: rnnt-debug-tedlium \ No newline at end of file diff --git a/scripts/run_voxpopuli.sh b/scripts/run_voxpopuli.sh new file mode 100644 index 0000000000000000000000000000000000000000..01ee61ac5eefed859c2dbdfa518163089a96f376 --- /dev/null +++ b/scripts/run_voxpopuli.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +CUDA_VISIBLE_DEVICES=0 python run_speech_recognition_rnnt.py \ + --config_path="conf/conformer_transducer_bpe_xlarge.yaml" \ + --model_name_or_path="stt_en_conformer_transducer_xlarge" \ + --dataset_name="polinaeterna/voxpopuli" \ + --tokenizer_path="tokenizer" \ + --vocab_size="1024" \ + --num_train_epochs="4.79" \ + --dataset_config_name="en" \ + --train_split_name="train" \ + --eval_split_name="validation" \ + --test_split_name="test" \ + --text_column_name="normalized_text" \ + --output_dir="./conformer-transducer-xl-voxpopuli" \ + --run_name="rnnt-voxpopuli-baseline" \ + --wandb_project="rnnt" \ + --per_device_train_batch_size="8" \ + --per_device_eval_batch_size="4" \ + --logging_steps="50" \ + --learning_rate="1e-4" \ + --warmup_steps="500" \ + --save_strategy="steps" \ + --save_steps="20000" \ + --evaluation_strategy="steps" \ + --eval_steps="20000" \ + --report_to="wandb" \ + --preprocessing_num_workers="4" \ + --fused_batch_size="8" \ + --length_column_name="input_lengths" \ + --fuse_loss_wer \ + --group_by_length \ + --overwrite_output_dir \ + --do_lower_case \ + --do_train \ + --do_eval \ + --do_predict \ + --push_to_hub \ + --use_auth_token diff --git a/stt_en_conformer_transducer_xlarge.nemo b/stt_en_conformer_transducer_xlarge.nemo new file mode 100644 index 0000000000000000000000000000000000000000..d7c73159c119233fe6507debbb73f1849697cb70 --- /dev/null +++ b/stt_en_conformer_transducer_xlarge.nemo @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ddae862e437a58c6de0e823116aae2ce45992db914111c3c33df05104572feb +size 2577971200 diff --git a/test_results.json b/test_results.json new file mode 100644 index 0000000000000000000000000000000000000000..127245841c764f094cb3e3c93167939cdf23ba10 --- /dev/null +++ b/test_results.json @@ -0,0 +1,3 @@ +{ + "test_cer": 0.08093431359873023 +} \ No newline at end of file diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..e27808ed8c8c03ab80b34df7d5ebbac0623d047e --- /dev/null +++ b/train_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 7.38, + "train_loss": 10.025987887954182, + "train_runtime": 56856.134, + "train_samples": 108449, + "train_samples_per_second": 14.077, + "train_steps_per_second": 1.76 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9bd479f48fc13d979c2582d99065a5e684419e36 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,12076 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 7.380025079294829, + "global_step": 100051, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 178.9465, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 164.9707, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 3e-05, + "loss": 142.2782, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4e-05, + "loss": 121.5122, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 91.8622, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 6e-05, + "loss": 82.2062, + "step": 300 + }, + { + "epoch": 0.03, + "learning_rate": 7e-05, + "loss": 72.6893, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 8e-05, + "loss": 71.8709, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 9e-05, + "loss": 69.9995, + "step": 450 + }, + { + "epoch": 0.04, + "learning_rate": 0.0001, + "loss": 70.6458, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 9.994977448744865e-05, + "loss": 73.9929, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 9.989954897489729e-05, + "loss": 66.52, + "step": 600 + }, + { + "epoch": 0.05, + "learning_rate": 9.984932346234594e-05, + "loss": 65.8947, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 9.979909794979458e-05, + "loss": 62.5809, + "step": 700 + }, + { + "epoch": 0.06, + "learning_rate": 9.974887243724323e-05, + "loss": 61.212, + "step": 750 + }, + { + "epoch": 0.06, + "learning_rate": 9.969864692469187e-05, + "loss": 68.2408, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 9.964842141214051e-05, + "loss": 61.5308, + "step": 850 + }, + { + "epoch": 0.07, + "learning_rate": 9.959819589958916e-05, + "loss": 58.9116, + "step": 900 + }, + { + "epoch": 0.07, + "learning_rate": 9.95479703870378e-05, + "loss": 60.0702, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 9.949774487448646e-05, + "loss": 57.6135, + "step": 1000 + }, + { + "epoch": 0.08, + "learning_rate": 9.944751936193509e-05, + "loss": 50.9231, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 9.939729384938373e-05, + "loss": 51.187, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 9.934706833683238e-05, + "loss": 52.1127, + "step": 1150 + }, + { + "epoch": 0.09, + "learning_rate": 9.929684282428102e-05, + "loss": 47.4608, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 9.924661731172968e-05, + "loss": 51.6108, + "step": 1250 + }, + { + "epoch": 0.1, + "learning_rate": 9.919639179917831e-05, + "loss": 46.5874, + "step": 1300 + }, + { + "epoch": 0.1, + "learning_rate": 9.914616628662697e-05, + "loss": 41.4706, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 9.90959407740756e-05, + "loss": 43.7544, + "step": 1400 + }, + { + "epoch": 0.11, + "learning_rate": 9.904571526152426e-05, + "loss": 44.6039, + "step": 1450 + }, + { + "epoch": 0.11, + "learning_rate": 9.899548974897289e-05, + "loss": 41.4384, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 9.894526423642154e-05, + "loss": 42.8289, + "step": 1550 + }, + { + "epoch": 0.12, + "learning_rate": 9.889503872387019e-05, + "loss": 39.9726, + "step": 1600 + }, + { + "epoch": 0.12, + "learning_rate": 9.884481321131882e-05, + "loss": 43.9533, + "step": 1650 + }, + { + "epoch": 0.13, + "learning_rate": 9.879458769876748e-05, + "loss": 38.7605, + "step": 1700 + }, + { + "epoch": 0.13, + "learning_rate": 9.87443621862161e-05, + "loss": 39.5425, + "step": 1750 + }, + { + "epoch": 0.13, + "learning_rate": 9.869413667366476e-05, + "loss": 37.588, + "step": 1800 + }, + { + "epoch": 0.14, + "learning_rate": 9.86439111611134e-05, + "loss": 39.7744, + "step": 1850 + }, + { + "epoch": 0.14, + "learning_rate": 9.859368564856205e-05, + "loss": 38.2154, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 9.85434601360107e-05, + "loss": 35.0806, + "step": 1950 + }, + { + "epoch": 0.15, + "learning_rate": 9.849323462345934e-05, + "loss": 39.061, + "step": 2000 + }, + { + "epoch": 0.15, + "learning_rate": 9.844300911090798e-05, + "loss": 35.1544, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 9.839278359835663e-05, + "loss": 38.123, + "step": 2100 + }, + { + "epoch": 0.16, + "learning_rate": 9.834255808580527e-05, + "loss": 33.1144, + "step": 2150 + }, + { + "epoch": 0.16, + "learning_rate": 9.829233257325392e-05, + "loss": 34.3476, + "step": 2200 + }, + { + "epoch": 0.17, + "learning_rate": 9.824210706070256e-05, + "loss": 29.5665, + "step": 2250 + }, + { + "epoch": 0.17, + "learning_rate": 9.81918815481512e-05, + "loss": 35.8756, + "step": 2300 + }, + { + "epoch": 0.17, + "learning_rate": 9.814165603559985e-05, + "loss": 37.2579, + "step": 2350 + }, + { + "epoch": 0.18, + "learning_rate": 9.809143052304849e-05, + "loss": 33.6245, + "step": 2400 + }, + { + "epoch": 0.18, + "learning_rate": 9.804120501049714e-05, + "loss": 35.6543, + "step": 2450 + }, + { + "epoch": 0.18, + "learning_rate": 9.799097949794578e-05, + "loss": 36.7847, + "step": 2500 + }, + { + "epoch": 0.19, + "learning_rate": 9.794075398539442e-05, + "loss": 33.463, + "step": 2550 + }, + { + "epoch": 0.19, + "learning_rate": 9.789052847284307e-05, + "loss": 32.2215, + "step": 2600 + }, + { + "epoch": 0.2, + "learning_rate": 9.784030296029171e-05, + "loss": 33.4301, + "step": 2650 + }, + { + "epoch": 0.2, + "learning_rate": 9.779007744774036e-05, + "loss": 29.9579, + "step": 2700 + }, + { + "epoch": 0.2, + "learning_rate": 9.773985193518901e-05, + "loss": 31.9141, + "step": 2750 + }, + { + "epoch": 0.21, + "learning_rate": 9.768962642263764e-05, + "loss": 33.2049, + "step": 2800 + }, + { + "epoch": 0.21, + "learning_rate": 9.763940091008629e-05, + "loss": 32.8774, + "step": 2850 + }, + { + "epoch": 0.21, + "learning_rate": 9.758917539753493e-05, + "loss": 29.0858, + "step": 2900 + }, + { + "epoch": 0.22, + "learning_rate": 9.753894988498358e-05, + "loss": 30.1145, + "step": 2950 + }, + { + "epoch": 0.22, + "learning_rate": 9.748872437243222e-05, + "loss": 27.6986, + "step": 3000 + }, + { + "epoch": 0.22, + "learning_rate": 9.743849885988087e-05, + "loss": 31.7807, + "step": 3050 + }, + { + "epoch": 0.23, + "learning_rate": 9.738827334732952e-05, + "loss": 30.5108, + "step": 3100 + }, + { + "epoch": 0.23, + "learning_rate": 9.733804783477815e-05, + "loss": 31.0909, + "step": 3150 + }, + { + "epoch": 0.24, + "learning_rate": 9.728782232222681e-05, + "loss": 27.9057, + "step": 3200 + }, + { + "epoch": 0.24, + "learning_rate": 9.723759680967544e-05, + "loss": 29.7323, + "step": 3250 + }, + { + "epoch": 0.24, + "learning_rate": 9.71873712971241e-05, + "loss": 29.7527, + "step": 3300 + }, + { + "epoch": 0.25, + "learning_rate": 9.713714578457273e-05, + "loss": 29.1442, + "step": 3350 + }, + { + "epoch": 0.25, + "learning_rate": 9.708692027202137e-05, + "loss": 30.8906, + "step": 3400 + }, + { + "epoch": 0.25, + "learning_rate": 9.703669475947003e-05, + "loss": 26.8419, + "step": 3450 + }, + { + "epoch": 0.26, + "learning_rate": 9.698646924691866e-05, + "loss": 29.2181, + "step": 3500 + }, + { + "epoch": 0.26, + "learning_rate": 9.693624373436732e-05, + "loss": 27.6549, + "step": 3550 + }, + { + "epoch": 0.27, + "learning_rate": 9.688601822181595e-05, + "loss": 34.0701, + "step": 3600 + }, + { + "epoch": 0.27, + "learning_rate": 9.683579270926461e-05, + "loss": 24.7487, + "step": 3650 + }, + { + "epoch": 0.27, + "learning_rate": 9.678556719671325e-05, + "loss": 30.0266, + "step": 3700 + }, + { + "epoch": 0.28, + "learning_rate": 9.67353416841619e-05, + "loss": 25.5011, + "step": 3750 + }, + { + "epoch": 0.28, + "learning_rate": 9.668511617161054e-05, + "loss": 26.1437, + "step": 3800 + }, + { + "epoch": 0.28, + "learning_rate": 9.663489065905918e-05, + "loss": 23.2303, + "step": 3850 + }, + { + "epoch": 0.29, + "learning_rate": 9.658466514650783e-05, + "loss": 26.357, + "step": 3900 + }, + { + "epoch": 0.29, + "learning_rate": 9.653443963395646e-05, + "loss": 27.2201, + "step": 3950 + }, + { + "epoch": 0.3, + "learning_rate": 9.648421412140512e-05, + "loss": 25.5695, + "step": 4000 + }, + { + "epoch": 0.3, + "learning_rate": 9.643398860885376e-05, + "loss": 24.8346, + "step": 4050 + }, + { + "epoch": 0.3, + "learning_rate": 9.63837630963024e-05, + "loss": 22.3957, + "step": 4100 + }, + { + "epoch": 0.31, + "learning_rate": 9.633353758375105e-05, + "loss": 24.9532, + "step": 4150 + }, + { + "epoch": 0.31, + "learning_rate": 9.628331207119969e-05, + "loss": 23.1574, + "step": 4200 + }, + { + "epoch": 0.31, + "learning_rate": 9.623308655864834e-05, + "loss": 23.7018, + "step": 4250 + }, + { + "epoch": 0.32, + "learning_rate": 9.618286104609698e-05, + "loss": 25.1433, + "step": 4300 + }, + { + "epoch": 0.32, + "learning_rate": 9.613263553354562e-05, + "loss": 25.0571, + "step": 4350 + }, + { + "epoch": 0.32, + "learning_rate": 9.608241002099427e-05, + "loss": 24.2231, + "step": 4400 + }, + { + "epoch": 0.33, + "learning_rate": 9.603218450844291e-05, + "loss": 23.0983, + "step": 4450 + }, + { + "epoch": 0.33, + "learning_rate": 9.598195899589156e-05, + "loss": 25.0078, + "step": 4500 + }, + { + "epoch": 0.34, + "learning_rate": 9.59317334833402e-05, + "loss": 20.6933, + "step": 4550 + }, + { + "epoch": 0.34, + "learning_rate": 9.588150797078884e-05, + "loss": 23.6196, + "step": 4600 + }, + { + "epoch": 0.34, + "learning_rate": 9.583128245823749e-05, + "loss": 25.2331, + "step": 4650 + }, + { + "epoch": 0.35, + "learning_rate": 9.578105694568613e-05, + "loss": 24.7932, + "step": 4700 + }, + { + "epoch": 0.35, + "learning_rate": 9.573083143313478e-05, + "loss": 24.3586, + "step": 4750 + }, + { + "epoch": 0.35, + "learning_rate": 9.568060592058342e-05, + "loss": 22.7161, + "step": 4800 + }, + { + "epoch": 0.36, + "learning_rate": 9.563038040803208e-05, + "loss": 22.4188, + "step": 4850 + }, + { + "epoch": 0.36, + "learning_rate": 9.558015489548071e-05, + "loss": 21.6516, + "step": 4900 + }, + { + "epoch": 0.37, + "learning_rate": 9.552992938292937e-05, + "loss": 21.78, + "step": 4950 + }, + { + "epoch": 0.37, + "learning_rate": 9.5479703870378e-05, + "loss": 21.0172, + "step": 5000 + }, + { + "epoch": 0.37, + "learning_rate": 9.542947835782665e-05, + "loss": 22.4624, + "step": 5050 + }, + { + "epoch": 0.38, + "learning_rate": 9.537925284527528e-05, + "loss": 23.6615, + "step": 5100 + }, + { + "epoch": 0.38, + "learning_rate": 9.532902733272393e-05, + "loss": 21.8091, + "step": 5150 + }, + { + "epoch": 0.38, + "learning_rate": 9.527880182017259e-05, + "loss": 21.4173, + "step": 5200 + }, + { + "epoch": 0.39, + "learning_rate": 9.522857630762122e-05, + "loss": 20.5415, + "step": 5250 + }, + { + "epoch": 0.39, + "learning_rate": 9.517835079506987e-05, + "loss": 21.0639, + "step": 5300 + }, + { + "epoch": 0.39, + "learning_rate": 9.51281252825185e-05, + "loss": 21.6078, + "step": 5350 + }, + { + "epoch": 0.4, + "learning_rate": 9.507789976996716e-05, + "loss": 19.4142, + "step": 5400 + }, + { + "epoch": 0.4, + "learning_rate": 9.50276742574158e-05, + "loss": 20.2504, + "step": 5450 + }, + { + "epoch": 0.41, + "learning_rate": 9.497744874486445e-05, + "loss": 23.8683, + "step": 5500 + }, + { + "epoch": 0.41, + "learning_rate": 9.49272232323131e-05, + "loss": 19.7559, + "step": 5550 + }, + { + "epoch": 0.41, + "learning_rate": 9.487699771976174e-05, + "loss": 21.1743, + "step": 5600 + }, + { + "epoch": 0.42, + "learning_rate": 9.482677220721038e-05, + "loss": 21.1908, + "step": 5650 + }, + { + "epoch": 0.42, + "learning_rate": 9.477654669465901e-05, + "loss": 20.9591, + "step": 5700 + }, + { + "epoch": 0.42, + "learning_rate": 9.472632118210767e-05, + "loss": 20.9036, + "step": 5750 + }, + { + "epoch": 0.43, + "learning_rate": 9.46760956695563e-05, + "loss": 22.249, + "step": 5800 + }, + { + "epoch": 0.43, + "learning_rate": 9.462587015700496e-05, + "loss": 19.1093, + "step": 5850 + }, + { + "epoch": 0.44, + "learning_rate": 9.45756446444536e-05, + "loss": 21.2714, + "step": 5900 + }, + { + "epoch": 0.44, + "learning_rate": 9.452541913190225e-05, + "loss": 21.3794, + "step": 5950 + }, + { + "epoch": 0.44, + "learning_rate": 9.447519361935089e-05, + "loss": 20.0326, + "step": 6000 + }, + { + "epoch": 0.45, + "learning_rate": 9.442496810679954e-05, + "loss": 19.8004, + "step": 6050 + }, + { + "epoch": 0.45, + "learning_rate": 9.437474259424818e-05, + "loss": 19.0229, + "step": 6100 + }, + { + "epoch": 0.45, + "learning_rate": 9.432451708169682e-05, + "loss": 17.6587, + "step": 6150 + }, + { + "epoch": 0.46, + "learning_rate": 9.427429156914547e-05, + "loss": 21.9247, + "step": 6200 + }, + { + "epoch": 0.46, + "learning_rate": 9.422406605659411e-05, + "loss": 19.743, + "step": 6250 + }, + { + "epoch": 0.46, + "learning_rate": 9.417384054404276e-05, + "loss": 22.9746, + "step": 6300 + }, + { + "epoch": 0.47, + "learning_rate": 9.41236150314914e-05, + "loss": 19.6693, + "step": 6350 + }, + { + "epoch": 0.47, + "learning_rate": 9.407338951894004e-05, + "loss": 19.1141, + "step": 6400 + }, + { + "epoch": 0.48, + "learning_rate": 9.402316400638869e-05, + "loss": 18.3847, + "step": 6450 + }, + { + "epoch": 0.48, + "learning_rate": 9.397293849383733e-05, + "loss": 18.9357, + "step": 6500 + }, + { + "epoch": 0.48, + "learning_rate": 9.392271298128598e-05, + "loss": 18.9316, + "step": 6550 + }, + { + "epoch": 0.49, + "learning_rate": 9.387248746873462e-05, + "loss": 20.9141, + "step": 6600 + }, + { + "epoch": 0.49, + "learning_rate": 9.382226195618326e-05, + "loss": 18.7472, + "step": 6650 + }, + { + "epoch": 0.49, + "learning_rate": 9.377203644363192e-05, + "loss": 18.8577, + "step": 6700 + }, + { + "epoch": 0.5, + "learning_rate": 9.372181093108055e-05, + "loss": 17.8061, + "step": 6750 + }, + { + "epoch": 0.5, + "learning_rate": 9.36715854185292e-05, + "loss": 19.4687, + "step": 6800 + }, + { + "epoch": 0.51, + "learning_rate": 9.362135990597784e-05, + "loss": 19.5103, + "step": 6850 + }, + { + "epoch": 0.51, + "learning_rate": 9.357113439342648e-05, + "loss": 18.5319, + "step": 6900 + }, + { + "epoch": 0.51, + "learning_rate": 9.352090888087514e-05, + "loss": 20.16, + "step": 6950 + }, + { + "epoch": 0.52, + "learning_rate": 9.347068336832377e-05, + "loss": 18.1913, + "step": 7000 + }, + { + "epoch": 0.52, + "learning_rate": 9.342045785577243e-05, + "loss": 21.341, + "step": 7050 + }, + { + "epoch": 0.52, + "learning_rate": 9.337023234322106e-05, + "loss": 16.7701, + "step": 7100 + }, + { + "epoch": 0.53, + "learning_rate": 9.332000683066972e-05, + "loss": 18.045, + "step": 7150 + }, + { + "epoch": 0.53, + "learning_rate": 9.326978131811835e-05, + "loss": 16.0393, + "step": 7200 + }, + { + "epoch": 0.53, + "learning_rate": 9.3219555805567e-05, + "loss": 17.4833, + "step": 7250 + }, + { + "epoch": 0.54, + "learning_rate": 9.316933029301565e-05, + "loss": 17.3978, + "step": 7300 + }, + { + "epoch": 0.54, + "learning_rate": 9.31191047804643e-05, + "loss": 18.2649, + "step": 7350 + }, + { + "epoch": 0.55, + "learning_rate": 9.306887926791294e-05, + "loss": 16.3891, + "step": 7400 + }, + { + "epoch": 0.55, + "learning_rate": 9.301865375536157e-05, + "loss": 21.4399, + "step": 7450 + }, + { + "epoch": 0.55, + "learning_rate": 9.296842824281023e-05, + "loss": 16.3082, + "step": 7500 + }, + { + "epoch": 0.56, + "learning_rate": 9.291820273025886e-05, + "loss": 14.8713, + "step": 7550 + }, + { + "epoch": 0.56, + "learning_rate": 9.286797721770751e-05, + "loss": 16.3099, + "step": 7600 + }, + { + "epoch": 0.56, + "learning_rate": 9.281775170515616e-05, + "loss": 17.8771, + "step": 7650 + }, + { + "epoch": 0.57, + "learning_rate": 9.27675261926048e-05, + "loss": 17.1421, + "step": 7700 + }, + { + "epoch": 0.57, + "learning_rate": 9.271730068005345e-05, + "loss": 16.6478, + "step": 7750 + }, + { + "epoch": 0.58, + "learning_rate": 9.266707516750209e-05, + "loss": 15.3247, + "step": 7800 + }, + { + "epoch": 0.58, + "learning_rate": 9.261684965495073e-05, + "loss": 17.6577, + "step": 7850 + }, + { + "epoch": 0.58, + "learning_rate": 9.256662414239938e-05, + "loss": 18.8549, + "step": 7900 + }, + { + "epoch": 0.59, + "learning_rate": 9.251639862984802e-05, + "loss": 17.4187, + "step": 7950 + }, + { + "epoch": 0.59, + "learning_rate": 9.246617311729667e-05, + "loss": 15.6643, + "step": 8000 + }, + { + "epoch": 0.59, + "learning_rate": 9.241594760474531e-05, + "loss": 17.1987, + "step": 8050 + }, + { + "epoch": 0.6, + "learning_rate": 9.236572209219396e-05, + "loss": 18.1712, + "step": 8100 + }, + { + "epoch": 0.6, + "learning_rate": 9.23154965796426e-05, + "loss": 15.8015, + "step": 8150 + }, + { + "epoch": 0.6, + "learning_rate": 9.226527106709124e-05, + "loss": 19.064, + "step": 8200 + }, + { + "epoch": 0.61, + "learning_rate": 9.221504555453989e-05, + "loss": 18.2748, + "step": 8250 + }, + { + "epoch": 0.61, + "learning_rate": 9.216482004198853e-05, + "loss": 15.0679, + "step": 8300 + }, + { + "epoch": 0.62, + "learning_rate": 9.211459452943718e-05, + "loss": 17.995, + "step": 8350 + }, + { + "epoch": 0.62, + "learning_rate": 9.206436901688582e-05, + "loss": 17.467, + "step": 8400 + }, + { + "epoch": 0.62, + "learning_rate": 9.201414350433448e-05, + "loss": 18.6665, + "step": 8450 + }, + { + "epoch": 0.63, + "learning_rate": 9.196391799178311e-05, + "loss": 17.2848, + "step": 8500 + }, + { + "epoch": 0.63, + "learning_rate": 9.191369247923175e-05, + "loss": 14.4767, + "step": 8550 + }, + { + "epoch": 0.63, + "learning_rate": 9.18634669666804e-05, + "loss": 17.5444, + "step": 8600 + }, + { + "epoch": 0.64, + "learning_rate": 9.181324145412904e-05, + "loss": 14.4661, + "step": 8650 + }, + { + "epoch": 0.64, + "learning_rate": 9.176301594157768e-05, + "loss": 16.3339, + "step": 8700 + }, + { + "epoch": 0.65, + "learning_rate": 9.171279042902633e-05, + "loss": 17.5122, + "step": 8750 + }, + { + "epoch": 0.65, + "learning_rate": 9.166256491647499e-05, + "loss": 16.7631, + "step": 8800 + }, + { + "epoch": 0.65, + "learning_rate": 9.161233940392362e-05, + "loss": 16.5193, + "step": 8850 + }, + { + "epoch": 0.66, + "learning_rate": 9.156211389137227e-05, + "loss": 17.8364, + "step": 8900 + }, + { + "epoch": 0.66, + "learning_rate": 9.15118883788209e-05, + "loss": 16.2916, + "step": 8950 + }, + { + "epoch": 0.66, + "learning_rate": 9.146166286626956e-05, + "loss": 14.1719, + "step": 9000 + }, + { + "epoch": 0.67, + "learning_rate": 9.141143735371819e-05, + "loss": 18.2987, + "step": 9050 + }, + { + "epoch": 0.67, + "learning_rate": 9.136121184116684e-05, + "loss": 17.4248, + "step": 9100 + }, + { + "epoch": 0.67, + "learning_rate": 9.13109863286155e-05, + "loss": 16.1862, + "step": 9150 + }, + { + "epoch": 0.68, + "learning_rate": 9.126076081606412e-05, + "loss": 16.3134, + "step": 9200 + }, + { + "epoch": 0.68, + "learning_rate": 9.121053530351278e-05, + "loss": 14.9158, + "step": 9250 + }, + { + "epoch": 0.69, + "learning_rate": 9.116030979096141e-05, + "loss": 15.2504, + "step": 9300 + }, + { + "epoch": 0.69, + "learning_rate": 9.111008427841007e-05, + "loss": 14.1967, + "step": 9350 + }, + { + "epoch": 0.69, + "learning_rate": 9.105985876585871e-05, + "loss": 17.3165, + "step": 9400 + }, + { + "epoch": 0.7, + "learning_rate": 9.100963325330736e-05, + "loss": 14.5912, + "step": 9450 + }, + { + "epoch": 0.7, + "learning_rate": 9.0959407740756e-05, + "loss": 17.5593, + "step": 9500 + }, + { + "epoch": 0.7, + "learning_rate": 9.090918222820465e-05, + "loss": 16.3421, + "step": 9550 + }, + { + "epoch": 0.71, + "learning_rate": 9.085895671565329e-05, + "loss": 16.2821, + "step": 9600 + }, + { + "epoch": 0.71, + "learning_rate": 9.080873120310192e-05, + "loss": 16.4985, + "step": 9650 + }, + { + "epoch": 0.72, + "learning_rate": 9.075850569055058e-05, + "loss": 16.1138, + "step": 9700 + }, + { + "epoch": 0.72, + "learning_rate": 9.070828017799922e-05, + "loss": 16.3997, + "step": 9750 + }, + { + "epoch": 0.72, + "learning_rate": 9.065805466544787e-05, + "loss": 15.518, + "step": 9800 + }, + { + "epoch": 0.73, + "learning_rate": 9.060782915289651e-05, + "loss": 13.8424, + "step": 9850 + }, + { + "epoch": 0.73, + "learning_rate": 9.055760364034515e-05, + "loss": 15.0784, + "step": 9900 + }, + { + "epoch": 0.73, + "learning_rate": 9.05073781277938e-05, + "loss": 14.0163, + "step": 9950 + }, + { + "epoch": 0.74, + "learning_rate": 9.045715261524244e-05, + "loss": 16.7863, + "step": 10000 + }, + { + "epoch": 0.74, + "learning_rate": 9.040692710269109e-05, + "loss": 13.6715, + "step": 10050 + }, + { + "epoch": 0.75, + "learning_rate": 9.035670159013973e-05, + "loss": 15.1071, + "step": 10100 + }, + { + "epoch": 0.75, + "learning_rate": 9.030647607758837e-05, + "loss": 14.2658, + "step": 10150 + }, + { + "epoch": 0.75, + "learning_rate": 9.025625056503703e-05, + "loss": 15.1115, + "step": 10200 + }, + { + "epoch": 0.76, + "learning_rate": 9.020602505248566e-05, + "loss": 14.028, + "step": 10250 + }, + { + "epoch": 0.76, + "learning_rate": 9.015579953993431e-05, + "loss": 13.3066, + "step": 10300 + }, + { + "epoch": 0.76, + "learning_rate": 9.010557402738295e-05, + "loss": 14.1185, + "step": 10350 + }, + { + "epoch": 0.77, + "learning_rate": 9.00553485148316e-05, + "loss": 14.061, + "step": 10400 + }, + { + "epoch": 0.77, + "learning_rate": 9.000512300228024e-05, + "loss": 15.2439, + "step": 10450 + }, + { + "epoch": 0.77, + "learning_rate": 8.995489748972888e-05, + "loss": 13.3617, + "step": 10500 + }, + { + "epoch": 0.78, + "learning_rate": 8.990467197717754e-05, + "loss": 14.5514, + "step": 10550 + }, + { + "epoch": 0.78, + "learning_rate": 8.985444646462617e-05, + "loss": 15.2426, + "step": 10600 + }, + { + "epoch": 0.79, + "learning_rate": 8.980422095207483e-05, + "loss": 16.6418, + "step": 10650 + }, + { + "epoch": 0.79, + "learning_rate": 8.975399543952346e-05, + "loss": 13.3146, + "step": 10700 + }, + { + "epoch": 0.79, + "learning_rate": 8.970376992697212e-05, + "loss": 14.9333, + "step": 10750 + }, + { + "epoch": 0.8, + "learning_rate": 8.965354441442075e-05, + "loss": 14.4502, + "step": 10800 + }, + { + "epoch": 0.8, + "learning_rate": 8.960331890186939e-05, + "loss": 14.7886, + "step": 10850 + }, + { + "epoch": 0.8, + "learning_rate": 8.955309338931805e-05, + "loss": 15.0266, + "step": 10900 + }, + { + "epoch": 0.81, + "learning_rate": 8.950286787676668e-05, + "loss": 14.543, + "step": 10950 + }, + { + "epoch": 0.81, + "learning_rate": 8.945264236421534e-05, + "loss": 15.8078, + "step": 11000 + }, + { + "epoch": 0.82, + "learning_rate": 8.940241685166397e-05, + "loss": 13.6052, + "step": 11050 + }, + { + "epoch": 0.82, + "learning_rate": 8.935219133911263e-05, + "loss": 14.2995, + "step": 11100 + }, + { + "epoch": 0.82, + "learning_rate": 8.930196582656126e-05, + "loss": 15.732, + "step": 11150 + }, + { + "epoch": 0.83, + "learning_rate": 8.925174031400991e-05, + "loss": 14.0573, + "step": 11200 + }, + { + "epoch": 0.83, + "learning_rate": 8.920151480145856e-05, + "loss": 17.5941, + "step": 11250 + }, + { + "epoch": 0.83, + "learning_rate": 8.91512892889072e-05, + "loss": 14.7829, + "step": 11300 + }, + { + "epoch": 0.84, + "learning_rate": 8.910106377635585e-05, + "loss": 14.6669, + "step": 11350 + }, + { + "epoch": 0.84, + "learning_rate": 8.905083826380448e-05, + "loss": 14.3315, + "step": 11400 + }, + { + "epoch": 0.84, + "learning_rate": 8.900061275125313e-05, + "loss": 14.2639, + "step": 11450 + }, + { + "epoch": 0.85, + "learning_rate": 8.895038723870176e-05, + "loss": 14.3226, + "step": 11500 + }, + { + "epoch": 0.85, + "learning_rate": 8.890016172615042e-05, + "loss": 14.4975, + "step": 11550 + }, + { + "epoch": 0.86, + "learning_rate": 8.884993621359907e-05, + "loss": 14.8436, + "step": 11600 + }, + { + "epoch": 0.86, + "learning_rate": 8.879971070104771e-05, + "loss": 13.8481, + "step": 11650 + }, + { + "epoch": 0.86, + "learning_rate": 8.874948518849635e-05, + "loss": 12.8151, + "step": 11700 + }, + { + "epoch": 0.87, + "learning_rate": 8.8699259675945e-05, + "loss": 13.1659, + "step": 11750 + }, + { + "epoch": 0.87, + "learning_rate": 8.864903416339364e-05, + "loss": 15.0919, + "step": 11800 + }, + { + "epoch": 0.87, + "learning_rate": 8.859880865084229e-05, + "loss": 14.4382, + "step": 11850 + }, + { + "epoch": 0.88, + "learning_rate": 8.854858313829093e-05, + "loss": 14.0989, + "step": 11900 + }, + { + "epoch": 0.88, + "learning_rate": 8.849835762573957e-05, + "loss": 14.5763, + "step": 11950 + }, + { + "epoch": 0.89, + "learning_rate": 8.844813211318822e-05, + "loss": 13.4144, + "step": 12000 + }, + { + "epoch": 0.89, + "learning_rate": 8.839790660063686e-05, + "loss": 15.6018, + "step": 12050 + }, + { + "epoch": 0.89, + "learning_rate": 8.83476810880855e-05, + "loss": 14.7849, + "step": 12100 + }, + { + "epoch": 0.9, + "learning_rate": 8.829745557553415e-05, + "loss": 14.441, + "step": 12150 + }, + { + "epoch": 0.9, + "learning_rate": 8.82472300629828e-05, + "loss": 14.2135, + "step": 12200 + }, + { + "epoch": 0.9, + "learning_rate": 8.819700455043144e-05, + "loss": 17.1245, + "step": 12250 + }, + { + "epoch": 0.91, + "learning_rate": 8.814677903788008e-05, + "loss": 14.6629, + "step": 12300 + }, + { + "epoch": 0.91, + "learning_rate": 8.809655352532873e-05, + "loss": 16.6715, + "step": 12350 + }, + { + "epoch": 0.91, + "learning_rate": 8.804632801277738e-05, + "loss": 13.0133, + "step": 12400 + }, + { + "epoch": 0.92, + "learning_rate": 8.799610250022601e-05, + "loss": 14.1551, + "step": 12450 + }, + { + "epoch": 0.92, + "learning_rate": 8.794587698767466e-05, + "loss": 14.019, + "step": 12500 + }, + { + "epoch": 0.93, + "learning_rate": 8.78956514751233e-05, + "loss": 14.4279, + "step": 12550 + }, + { + "epoch": 0.93, + "learning_rate": 8.784542596257195e-05, + "loss": 12.5293, + "step": 12600 + }, + { + "epoch": 0.93, + "learning_rate": 8.77952004500206e-05, + "loss": 15.0403, + "step": 12650 + }, + { + "epoch": 0.94, + "learning_rate": 8.774497493746924e-05, + "loss": 13.8193, + "step": 12700 + }, + { + "epoch": 0.94, + "learning_rate": 8.769474942491789e-05, + "loss": 13.1564, + "step": 12750 + }, + { + "epoch": 0.94, + "learning_rate": 8.764452391236652e-05, + "loss": 14.6415, + "step": 12800 + }, + { + "epoch": 0.95, + "learning_rate": 8.759429839981518e-05, + "loss": 12.2339, + "step": 12850 + }, + { + "epoch": 0.95, + "learning_rate": 8.754407288726381e-05, + "loss": 12.1604, + "step": 12900 + }, + { + "epoch": 0.96, + "learning_rate": 8.749384737471247e-05, + "loss": 15.4939, + "step": 12950 + }, + { + "epoch": 0.96, + "learning_rate": 8.744362186216111e-05, + "loss": 13.9713, + "step": 13000 + }, + { + "epoch": 0.96, + "learning_rate": 8.739339634960976e-05, + "loss": 14.0986, + "step": 13050 + }, + { + "epoch": 0.97, + "learning_rate": 8.73431708370584e-05, + "loss": 13.6334, + "step": 13100 + }, + { + "epoch": 0.97, + "learning_rate": 8.729294532450703e-05, + "loss": 13.5201, + "step": 13150 + }, + { + "epoch": 0.97, + "learning_rate": 8.724271981195569e-05, + "loss": 14.3793, + "step": 13200 + }, + { + "epoch": 0.98, + "learning_rate": 8.719249429940432e-05, + "loss": 13.1741, + "step": 13250 + }, + { + "epoch": 0.98, + "learning_rate": 8.714226878685298e-05, + "loss": 11.7782, + "step": 13300 + }, + { + "epoch": 0.98, + "learning_rate": 8.709204327430162e-05, + "loss": 12.2758, + "step": 13350 + }, + { + "epoch": 0.99, + "learning_rate": 8.704181776175027e-05, + "loss": 13.1723, + "step": 13400 + }, + { + "epoch": 0.99, + "learning_rate": 8.699159224919891e-05, + "loss": 14.0858, + "step": 13450 + }, + { + "epoch": 1.0, + "learning_rate": 8.694136673664755e-05, + "loss": 11.2836, + "step": 13500 + }, + { + "epoch": 1.0, + "learning_rate": 8.68911412240962e-05, + "loss": 15.7226, + "step": 13550 + }, + { + "epoch": 1.0, + "learning_rate": 8.684091571154484e-05, + "loss": 15.8889, + "step": 13600 + }, + { + "epoch": 1.01, + "learning_rate": 8.679069019899349e-05, + "loss": 12.2185, + "step": 13650 + }, + { + "epoch": 1.01, + "learning_rate": 8.674046468644213e-05, + "loss": 11.4647, + "step": 13700 + }, + { + "epoch": 1.01, + "learning_rate": 8.669023917389077e-05, + "loss": 13.1238, + "step": 13750 + }, + { + "epoch": 1.02, + "learning_rate": 8.664001366133942e-05, + "loss": 11.909, + "step": 13800 + }, + { + "epoch": 1.02, + "learning_rate": 8.658978814878806e-05, + "loss": 12.5478, + "step": 13850 + }, + { + "epoch": 1.03, + "learning_rate": 8.65395626362367e-05, + "loss": 13.017, + "step": 13900 + }, + { + "epoch": 1.03, + "learning_rate": 8.648933712368535e-05, + "loss": 12.9134, + "step": 13950 + }, + { + "epoch": 1.03, + "learning_rate": 8.6439111611134e-05, + "loss": 13.3485, + "step": 14000 + }, + { + "epoch": 1.04, + "learning_rate": 8.638888609858264e-05, + "loss": 11.4706, + "step": 14050 + }, + { + "epoch": 1.04, + "learning_rate": 8.633866058603128e-05, + "loss": 11.1063, + "step": 14100 + }, + { + "epoch": 1.04, + "learning_rate": 8.628843507347994e-05, + "loss": 12.7408, + "step": 14150 + }, + { + "epoch": 1.05, + "learning_rate": 8.623820956092857e-05, + "loss": 12.0689, + "step": 14200 + }, + { + "epoch": 1.05, + "learning_rate": 8.618798404837721e-05, + "loss": 11.0724, + "step": 14250 + }, + { + "epoch": 1.05, + "learning_rate": 8.613775853582586e-05, + "loss": 12.5685, + "step": 14300 + }, + { + "epoch": 1.06, + "learning_rate": 8.60875330232745e-05, + "loss": 12.7776, + "step": 14350 + }, + { + "epoch": 1.06, + "learning_rate": 8.603730751072315e-05, + "loss": 11.3066, + "step": 14400 + }, + { + "epoch": 1.07, + "learning_rate": 8.598708199817179e-05, + "loss": 13.06, + "step": 14450 + }, + { + "epoch": 1.07, + "learning_rate": 8.593685648562045e-05, + "loss": 15.6523, + "step": 14500 + }, + { + "epoch": 1.07, + "learning_rate": 8.588663097306908e-05, + "loss": 12.019, + "step": 14550 + }, + { + "epoch": 1.08, + "learning_rate": 8.583640546051774e-05, + "loss": 11.0941, + "step": 14600 + }, + { + "epoch": 1.08, + "learning_rate": 8.578617994796637e-05, + "loss": 12.4755, + "step": 14650 + }, + { + "epoch": 1.08, + "learning_rate": 8.573595443541502e-05, + "loss": 13.7012, + "step": 14700 + }, + { + "epoch": 1.09, + "learning_rate": 8.568572892286366e-05, + "loss": 12.2024, + "step": 14750 + }, + { + "epoch": 1.09, + "learning_rate": 8.56355034103123e-05, + "loss": 12.4744, + "step": 14800 + }, + { + "epoch": 1.1, + "learning_rate": 8.558527789776096e-05, + "loss": 12.3234, + "step": 14850 + }, + { + "epoch": 1.1, + "learning_rate": 8.553505238520959e-05, + "loss": 12.5616, + "step": 14900 + }, + { + "epoch": 1.1, + "learning_rate": 8.548482687265824e-05, + "loss": 11.9559, + "step": 14950 + }, + { + "epoch": 1.11, + "learning_rate": 8.543460136010688e-05, + "loss": 12.0734, + "step": 15000 + }, + { + "epoch": 1.11, + "learning_rate": 8.538437584755553e-05, + "loss": 13.0341, + "step": 15050 + }, + { + "epoch": 1.11, + "learning_rate": 8.533415033500418e-05, + "loss": 12.7406, + "step": 15100 + }, + { + "epoch": 1.12, + "learning_rate": 8.528392482245282e-05, + "loss": 11.7258, + "step": 15150 + }, + { + "epoch": 1.12, + "learning_rate": 8.523369930990147e-05, + "loss": 11.8709, + "step": 15200 + }, + { + "epoch": 1.12, + "learning_rate": 8.518347379735011e-05, + "loss": 11.7021, + "step": 15250 + }, + { + "epoch": 1.13, + "learning_rate": 8.513324828479875e-05, + "loss": 13.2674, + "step": 15300 + }, + { + "epoch": 1.13, + "learning_rate": 8.508302277224738e-05, + "loss": 11.9099, + "step": 15350 + }, + { + "epoch": 1.14, + "learning_rate": 8.503279725969604e-05, + "loss": 11.7841, + "step": 15400 + }, + { + "epoch": 1.14, + "learning_rate": 8.498257174714469e-05, + "loss": 11.9573, + "step": 15450 + }, + { + "epoch": 1.14, + "learning_rate": 8.493234623459333e-05, + "loss": 11.7211, + "step": 15500 + }, + { + "epoch": 1.15, + "learning_rate": 8.488212072204197e-05, + "loss": 12.3513, + "step": 15550 + }, + { + "epoch": 1.15, + "learning_rate": 8.483189520949062e-05, + "loss": 11.0709, + "step": 15600 + }, + { + "epoch": 1.15, + "learning_rate": 8.478166969693926e-05, + "loss": 11.6544, + "step": 15650 + }, + { + "epoch": 1.16, + "learning_rate": 8.47314441843879e-05, + "loss": 11.8285, + "step": 15700 + }, + { + "epoch": 1.16, + "learning_rate": 8.468121867183655e-05, + "loss": 10.4208, + "step": 15750 + }, + { + "epoch": 1.17, + "learning_rate": 8.46309931592852e-05, + "loss": 10.7821, + "step": 15800 + }, + { + "epoch": 1.17, + "learning_rate": 8.458076764673384e-05, + "loss": 13.2724, + "step": 15850 + }, + { + "epoch": 1.17, + "learning_rate": 8.45305421341825e-05, + "loss": 10.9219, + "step": 15900 + }, + { + "epoch": 1.18, + "learning_rate": 8.448031662163113e-05, + "loss": 12.2532, + "step": 15950 + }, + { + "epoch": 1.18, + "learning_rate": 8.443009110907977e-05, + "loss": 11.0132, + "step": 16000 + }, + { + "epoch": 1.18, + "learning_rate": 8.437986559652841e-05, + "loss": 12.319, + "step": 16050 + }, + { + "epoch": 1.19, + "learning_rate": 8.432964008397706e-05, + "loss": 12.9871, + "step": 16100 + }, + { + "epoch": 1.19, + "learning_rate": 8.42794145714257e-05, + "loss": 12.0625, + "step": 16150 + }, + { + "epoch": 1.19, + "learning_rate": 8.422918905887435e-05, + "loss": 13.4629, + "step": 16200 + }, + { + "epoch": 1.2, + "learning_rate": 8.4178963546323e-05, + "loss": 10.9291, + "step": 16250 + }, + { + "epoch": 1.2, + "learning_rate": 8.412873803377163e-05, + "loss": 13.7719, + "step": 16300 + }, + { + "epoch": 1.21, + "learning_rate": 8.407851252122029e-05, + "loss": 11.3634, + "step": 16350 + }, + { + "epoch": 1.21, + "learning_rate": 8.402828700866892e-05, + "loss": 12.7941, + "step": 16400 + }, + { + "epoch": 1.21, + "learning_rate": 8.397806149611758e-05, + "loss": 11.8863, + "step": 16450 + }, + { + "epoch": 1.22, + "learning_rate": 8.392783598356621e-05, + "loss": 9.5225, + "step": 16500 + }, + { + "epoch": 1.22, + "learning_rate": 8.387761047101485e-05, + "loss": 12.983, + "step": 16550 + }, + { + "epoch": 1.22, + "learning_rate": 8.382738495846351e-05, + "loss": 11.8489, + "step": 16600 + }, + { + "epoch": 1.23, + "learning_rate": 8.377715944591214e-05, + "loss": 11.8122, + "step": 16650 + }, + { + "epoch": 1.23, + "learning_rate": 8.37269339333608e-05, + "loss": 12.3387, + "step": 16700 + }, + { + "epoch": 1.24, + "learning_rate": 8.367670842080943e-05, + "loss": 13.4648, + "step": 16750 + }, + { + "epoch": 1.24, + "learning_rate": 8.362648290825809e-05, + "loss": 10.2301, + "step": 16800 + }, + { + "epoch": 1.24, + "learning_rate": 8.357625739570672e-05, + "loss": 11.492, + "step": 16850 + }, + { + "epoch": 1.25, + "learning_rate": 8.352603188315538e-05, + "loss": 12.5997, + "step": 16900 + }, + { + "epoch": 1.25, + "learning_rate": 8.347580637060402e-05, + "loss": 11.5588, + "step": 16950 + }, + { + "epoch": 1.25, + "learning_rate": 8.342558085805266e-05, + "loss": 11.8627, + "step": 17000 + }, + { + "epoch": 1.26, + "learning_rate": 8.337535534550131e-05, + "loss": 13.2469, + "step": 17050 + }, + { + "epoch": 1.26, + "learning_rate": 8.332512983294994e-05, + "loss": 10.4327, + "step": 17100 + }, + { + "epoch": 1.27, + "learning_rate": 8.32749043203986e-05, + "loss": 12.7566, + "step": 17150 + }, + { + "epoch": 1.27, + "learning_rate": 8.322467880784723e-05, + "loss": 11.0729, + "step": 17200 + }, + { + "epoch": 1.27, + "learning_rate": 8.317445329529588e-05, + "loss": 12.3484, + "step": 17250 + }, + { + "epoch": 1.28, + "learning_rate": 8.312422778274453e-05, + "loss": 10.5193, + "step": 17300 + }, + { + "epoch": 1.28, + "learning_rate": 8.307400227019317e-05, + "loss": 12.2369, + "step": 17350 + }, + { + "epoch": 1.28, + "learning_rate": 8.302377675764182e-05, + "loss": 12.2976, + "step": 17400 + }, + { + "epoch": 1.29, + "learning_rate": 8.297355124509046e-05, + "loss": 12.3852, + "step": 17450 + }, + { + "epoch": 1.29, + "learning_rate": 8.29233257325391e-05, + "loss": 11.2137, + "step": 17500 + }, + { + "epoch": 1.29, + "learning_rate": 8.287310021998775e-05, + "loss": 11.609, + "step": 17550 + }, + { + "epoch": 1.3, + "learning_rate": 8.282287470743639e-05, + "loss": 13.3339, + "step": 17600 + }, + { + "epoch": 1.3, + "learning_rate": 8.277264919488504e-05, + "loss": 11.4263, + "step": 17650 + }, + { + "epoch": 1.31, + "learning_rate": 8.272242368233368e-05, + "loss": 12.6949, + "step": 17700 + }, + { + "epoch": 1.31, + "learning_rate": 8.267219816978233e-05, + "loss": 11.4767, + "step": 17750 + }, + { + "epoch": 1.31, + "learning_rate": 8.262197265723097e-05, + "loss": 12.2225, + "step": 17800 + }, + { + "epoch": 1.32, + "learning_rate": 8.257174714467961e-05, + "loss": 11.0755, + "step": 17850 + }, + { + "epoch": 1.32, + "learning_rate": 8.252152163212826e-05, + "loss": 11.9677, + "step": 17900 + }, + { + "epoch": 1.32, + "learning_rate": 8.24712961195769e-05, + "loss": 11.098, + "step": 17950 + }, + { + "epoch": 1.33, + "learning_rate": 8.242107060702555e-05, + "loss": 11.1102, + "step": 18000 + }, + { + "epoch": 1.33, + "learning_rate": 8.237084509447419e-05, + "loss": 11.4985, + "step": 18050 + }, + { + "epoch": 1.34, + "learning_rate": 8.232061958192285e-05, + "loss": 11.7356, + "step": 18100 + }, + { + "epoch": 1.34, + "learning_rate": 8.227039406937148e-05, + "loss": 11.3336, + "step": 18150 + }, + { + "epoch": 1.34, + "learning_rate": 8.222016855682012e-05, + "loss": 11.0448, + "step": 18200 + }, + { + "epoch": 1.35, + "learning_rate": 8.216994304426877e-05, + "loss": 10.9986, + "step": 18250 + }, + { + "epoch": 1.35, + "learning_rate": 8.211971753171741e-05, + "loss": 10.768, + "step": 18300 + }, + { + "epoch": 1.35, + "learning_rate": 8.206949201916607e-05, + "loss": 11.6844, + "step": 18350 + }, + { + "epoch": 1.36, + "learning_rate": 8.20192665066147e-05, + "loss": 11.5615, + "step": 18400 + }, + { + "epoch": 1.36, + "learning_rate": 8.196904099406336e-05, + "loss": 11.4019, + "step": 18450 + }, + { + "epoch": 1.36, + "learning_rate": 8.191881548151199e-05, + "loss": 12.1784, + "step": 18500 + }, + { + "epoch": 1.37, + "learning_rate": 8.186858996896064e-05, + "loss": 12.4565, + "step": 18550 + }, + { + "epoch": 1.37, + "learning_rate": 8.181836445640927e-05, + "loss": 11.0557, + "step": 18600 + }, + { + "epoch": 1.38, + "learning_rate": 8.176813894385793e-05, + "loss": 12.1892, + "step": 18650 + }, + { + "epoch": 1.38, + "learning_rate": 8.171791343130658e-05, + "loss": 12.0531, + "step": 18700 + }, + { + "epoch": 1.38, + "learning_rate": 8.166768791875522e-05, + "loss": 10.1791, + "step": 18750 + }, + { + "epoch": 1.39, + "learning_rate": 8.161746240620386e-05, + "loss": 11.2501, + "step": 18800 + }, + { + "epoch": 1.39, + "learning_rate": 8.15672368936525e-05, + "loss": 9.92, + "step": 18850 + }, + { + "epoch": 1.39, + "learning_rate": 8.151701138110115e-05, + "loss": 10.0603, + "step": 18900 + }, + { + "epoch": 1.4, + "learning_rate": 8.146678586854978e-05, + "loss": 10.9477, + "step": 18950 + }, + { + "epoch": 1.4, + "learning_rate": 8.141656035599844e-05, + "loss": 9.7579, + "step": 19000 + }, + { + "epoch": 1.41, + "learning_rate": 8.136633484344708e-05, + "loss": 11.243, + "step": 19050 + }, + { + "epoch": 1.41, + "learning_rate": 8.131610933089573e-05, + "loss": 11.0069, + "step": 19100 + }, + { + "epoch": 1.41, + "learning_rate": 8.126588381834437e-05, + "loss": 9.7387, + "step": 19150 + }, + { + "epoch": 1.42, + "learning_rate": 8.121565830579302e-05, + "loss": 11.4624, + "step": 19200 + }, + { + "epoch": 1.42, + "learning_rate": 8.116543279324166e-05, + "loss": 12.1299, + "step": 19250 + }, + { + "epoch": 1.42, + "learning_rate": 8.11152072806903e-05, + "loss": 12.2796, + "step": 19300 + }, + { + "epoch": 1.43, + "learning_rate": 8.106498176813895e-05, + "loss": 10.3295, + "step": 19350 + }, + { + "epoch": 1.43, + "learning_rate": 8.101475625558759e-05, + "loss": 10.0709, + "step": 19400 + }, + { + "epoch": 1.43, + "learning_rate": 8.096453074303624e-05, + "loss": 11.0725, + "step": 19450 + }, + { + "epoch": 1.44, + "learning_rate": 8.091430523048488e-05, + "loss": 10.7882, + "step": 19500 + }, + { + "epoch": 1.44, + "learning_rate": 8.086407971793352e-05, + "loss": 11.4124, + "step": 19550 + }, + { + "epoch": 1.45, + "learning_rate": 8.081385420538217e-05, + "loss": 10.4941, + "step": 19600 + }, + { + "epoch": 1.45, + "learning_rate": 8.076362869283081e-05, + "loss": 11.8687, + "step": 19650 + }, + { + "epoch": 1.45, + "learning_rate": 8.071340318027946e-05, + "loss": 11.3221, + "step": 19700 + }, + { + "epoch": 1.46, + "learning_rate": 8.06631776677281e-05, + "loss": 10.2167, + "step": 19750 + }, + { + "epoch": 1.46, + "learning_rate": 8.061295215517675e-05, + "loss": 10.5425, + "step": 19800 + }, + { + "epoch": 1.46, + "learning_rate": 8.05627266426254e-05, + "loss": 11.2982, + "step": 19850 + }, + { + "epoch": 1.47, + "learning_rate": 8.051250113007403e-05, + "loss": 12.0685, + "step": 19900 + }, + { + "epoch": 1.47, + "learning_rate": 8.046227561752268e-05, + "loss": 10.6613, + "step": 19950 + }, + { + "epoch": 1.48, + "learning_rate": 8.041205010497132e-05, + "loss": 10.8245, + "step": 20000 + }, + { + "epoch": 1.48, + "eval_loss": 10.409339904785156, + "eval_runtime": 890.9956, + "eval_samples_per_second": 14.7, + "eval_steps_per_second": 3.676, + "eval_wer": 0.2624627273109067, + "step": 20000 + }, + { + "epoch": 1.48, + "learning_rate": 8.036182459241997e-05, + "loss": 10.671, + "step": 20050 + }, + { + "epoch": 1.48, + "learning_rate": 8.031159907986861e-05, + "loss": 11.0263, + "step": 20100 + }, + { + "epoch": 1.49, + "learning_rate": 8.026137356731725e-05, + "loss": 11.0571, + "step": 20150 + }, + { + "epoch": 1.49, + "learning_rate": 8.021114805476591e-05, + "loss": 13.0778, + "step": 20200 + }, + { + "epoch": 1.49, + "learning_rate": 8.016092254221454e-05, + "loss": 11.0495, + "step": 20250 + }, + { + "epoch": 1.5, + "learning_rate": 8.01106970296632e-05, + "loss": 10.6039, + "step": 20300 + }, + { + "epoch": 1.5, + "learning_rate": 8.006047151711183e-05, + "loss": 11.4221, + "step": 20350 + }, + { + "epoch": 1.5, + "learning_rate": 8.001024600456049e-05, + "loss": 10.7975, + "step": 20400 + }, + { + "epoch": 1.51, + "learning_rate": 7.996002049200912e-05, + "loss": 10.1123, + "step": 20450 + }, + { + "epoch": 1.51, + "learning_rate": 7.990979497945776e-05, + "loss": 10.2241, + "step": 20500 + }, + { + "epoch": 1.52, + "learning_rate": 7.985956946690642e-05, + "loss": 10.0191, + "step": 20550 + }, + { + "epoch": 1.52, + "learning_rate": 7.980934395435505e-05, + "loss": 10.649, + "step": 20600 + }, + { + "epoch": 1.52, + "learning_rate": 7.975911844180371e-05, + "loss": 9.6091, + "step": 20650 + }, + { + "epoch": 1.53, + "learning_rate": 7.970889292925234e-05, + "loss": 9.9386, + "step": 20700 + }, + { + "epoch": 1.53, + "learning_rate": 7.9658667416701e-05, + "loss": 11.2646, + "step": 20750 + }, + { + "epoch": 1.53, + "learning_rate": 7.960844190414964e-05, + "loss": 10.0181, + "step": 20800 + }, + { + "epoch": 1.54, + "learning_rate": 7.955821639159828e-05, + "loss": 11.9437, + "step": 20850 + }, + { + "epoch": 1.54, + "learning_rate": 7.950799087904693e-05, + "loss": 10.9254, + "step": 20900 + }, + { + "epoch": 1.55, + "learning_rate": 7.945776536649557e-05, + "loss": 11.7954, + "step": 20950 + }, + { + "epoch": 1.55, + "learning_rate": 7.940753985394422e-05, + "loss": 9.6569, + "step": 21000 + }, + { + "epoch": 1.55, + "learning_rate": 7.935731434139286e-05, + "loss": 10.6546, + "step": 21050 + }, + { + "epoch": 1.56, + "learning_rate": 7.93070888288415e-05, + "loss": 10.2795, + "step": 21100 + }, + { + "epoch": 1.56, + "learning_rate": 7.925686331629015e-05, + "loss": 10.4595, + "step": 21150 + }, + { + "epoch": 1.56, + "learning_rate": 7.920663780373879e-05, + "loss": 9.2921, + "step": 21200 + }, + { + "epoch": 1.57, + "learning_rate": 7.915641229118744e-05, + "loss": 10.1245, + "step": 21250 + }, + { + "epoch": 1.57, + "learning_rate": 7.910618677863608e-05, + "loss": 11.2896, + "step": 21300 + }, + { + "epoch": 1.57, + "learning_rate": 7.905596126608472e-05, + "loss": 11.3328, + "step": 21350 + }, + { + "epoch": 1.58, + "learning_rate": 7.900573575353337e-05, + "loss": 10.0718, + "step": 21400 + }, + { + "epoch": 1.58, + "learning_rate": 7.895551024098201e-05, + "loss": 10.8954, + "step": 21450 + }, + { + "epoch": 1.59, + "learning_rate": 7.890528472843066e-05, + "loss": 10.2921, + "step": 21500 + }, + { + "epoch": 1.59, + "learning_rate": 7.88550592158793e-05, + "loss": 9.4609, + "step": 21550 + }, + { + "epoch": 1.59, + "learning_rate": 7.880483370332796e-05, + "loss": 11.4751, + "step": 21600 + }, + { + "epoch": 1.6, + "learning_rate": 7.875460819077659e-05, + "loss": 10.1189, + "step": 21650 + }, + { + "epoch": 1.6, + "learning_rate": 7.870438267822523e-05, + "loss": 11.6478, + "step": 21700 + }, + { + "epoch": 1.6, + "learning_rate": 7.865415716567388e-05, + "loss": 11.2943, + "step": 21750 + }, + { + "epoch": 1.61, + "learning_rate": 7.860393165312252e-05, + "loss": 11.5788, + "step": 21800 + }, + { + "epoch": 1.61, + "learning_rate": 7.855370614057116e-05, + "loss": 10.638, + "step": 21850 + }, + { + "epoch": 1.62, + "learning_rate": 7.850348062801981e-05, + "loss": 9.2895, + "step": 21900 + }, + { + "epoch": 1.62, + "learning_rate": 7.845325511546847e-05, + "loss": 11.4984, + "step": 21950 + }, + { + "epoch": 1.62, + "learning_rate": 7.84030296029171e-05, + "loss": 10.3685, + "step": 22000 + }, + { + "epoch": 1.63, + "learning_rate": 7.835280409036575e-05, + "loss": 10.0115, + "step": 22050 + }, + { + "epoch": 1.63, + "learning_rate": 7.830257857781439e-05, + "loss": 10.2941, + "step": 22100 + }, + { + "epoch": 1.63, + "learning_rate": 7.825235306526304e-05, + "loss": 10.8751, + "step": 22150 + }, + { + "epoch": 1.64, + "learning_rate": 7.820212755271167e-05, + "loss": 10.7477, + "step": 22200 + }, + { + "epoch": 1.64, + "learning_rate": 7.815190204016032e-05, + "loss": 12.2573, + "step": 22250 + }, + { + "epoch": 1.64, + "learning_rate": 7.810167652760897e-05, + "loss": 10.1055, + "step": 22300 + }, + { + "epoch": 1.65, + "learning_rate": 7.80514510150576e-05, + "loss": 10.7913, + "step": 22350 + }, + { + "epoch": 1.65, + "learning_rate": 7.800122550250626e-05, + "loss": 9.4701, + "step": 22400 + }, + { + "epoch": 1.66, + "learning_rate": 7.79509999899549e-05, + "loss": 9.9434, + "step": 22450 + }, + { + "epoch": 1.66, + "learning_rate": 7.790077447740355e-05, + "loss": 10.9016, + "step": 22500 + }, + { + "epoch": 1.66, + "learning_rate": 7.785054896485218e-05, + "loss": 10.1733, + "step": 22550 + }, + { + "epoch": 1.67, + "learning_rate": 7.780032345230084e-05, + "loss": 11.0693, + "step": 22600 + }, + { + "epoch": 1.67, + "learning_rate": 7.775009793974948e-05, + "loss": 10.4538, + "step": 22650 + }, + { + "epoch": 1.67, + "learning_rate": 7.769987242719813e-05, + "loss": 10.5127, + "step": 22700 + }, + { + "epoch": 1.68, + "learning_rate": 7.764964691464677e-05, + "loss": 10.1074, + "step": 22750 + }, + { + "epoch": 1.68, + "learning_rate": 7.75994214020954e-05, + "loss": 11.2803, + "step": 22800 + }, + { + "epoch": 1.69, + "learning_rate": 7.754919588954406e-05, + "loss": 10.9954, + "step": 22850 + }, + { + "epoch": 1.69, + "learning_rate": 7.749897037699269e-05, + "loss": 10.1006, + "step": 22900 + }, + { + "epoch": 1.69, + "learning_rate": 7.744874486444135e-05, + "loss": 10.9978, + "step": 22950 + }, + { + "epoch": 1.7, + "learning_rate": 7.739851935188999e-05, + "loss": 10.5885, + "step": 23000 + }, + { + "epoch": 1.7, + "learning_rate": 7.734829383933864e-05, + "loss": 10.5676, + "step": 23050 + }, + { + "epoch": 1.7, + "learning_rate": 7.729806832678728e-05, + "loss": 11.3204, + "step": 23100 + }, + { + "epoch": 1.71, + "learning_rate": 7.724784281423592e-05, + "loss": 10.5388, + "step": 23150 + }, + { + "epoch": 1.71, + "learning_rate": 7.719761730168457e-05, + "loss": 10.7915, + "step": 23200 + }, + { + "epoch": 1.71, + "learning_rate": 7.714739178913321e-05, + "loss": 11.9486, + "step": 23250 + }, + { + "epoch": 1.72, + "learning_rate": 7.709716627658186e-05, + "loss": 11.6693, + "step": 23300 + }, + { + "epoch": 1.72, + "learning_rate": 7.70469407640305e-05, + "loss": 9.2664, + "step": 23350 + }, + { + "epoch": 1.73, + "learning_rate": 7.699671525147914e-05, + "loss": 12.1429, + "step": 23400 + }, + { + "epoch": 1.73, + "learning_rate": 7.694648973892779e-05, + "loss": 10.1155, + "step": 23450 + }, + { + "epoch": 1.73, + "learning_rate": 7.689626422637643e-05, + "loss": 10.1562, + "step": 23500 + }, + { + "epoch": 1.74, + "learning_rate": 7.684603871382508e-05, + "loss": 11.3484, + "step": 23550 + }, + { + "epoch": 1.74, + "learning_rate": 7.679581320127372e-05, + "loss": 9.5912, + "step": 23600 + }, + { + "epoch": 1.74, + "learning_rate": 7.674558768872236e-05, + "loss": 11.1067, + "step": 23650 + }, + { + "epoch": 1.75, + "learning_rate": 7.669536217617101e-05, + "loss": 11.7182, + "step": 23700 + }, + { + "epoch": 1.75, + "learning_rate": 7.664513666361965e-05, + "loss": 10.1444, + "step": 23750 + }, + { + "epoch": 1.76, + "learning_rate": 7.659491115106831e-05, + "loss": 11.2671, + "step": 23800 + }, + { + "epoch": 1.76, + "learning_rate": 7.654468563851694e-05, + "loss": 10.9027, + "step": 23850 + }, + { + "epoch": 1.76, + "learning_rate": 7.64944601259656e-05, + "loss": 10.9078, + "step": 23900 + }, + { + "epoch": 1.77, + "learning_rate": 7.644423461341423e-05, + "loss": 10.5441, + "step": 23950 + }, + { + "epoch": 1.77, + "learning_rate": 7.639400910086287e-05, + "loss": 9.8617, + "step": 24000 + }, + { + "epoch": 1.77, + "learning_rate": 7.634378358831153e-05, + "loss": 10.8022, + "step": 24050 + }, + { + "epoch": 1.78, + "learning_rate": 7.629355807576016e-05, + "loss": 10.3082, + "step": 24100 + }, + { + "epoch": 1.78, + "learning_rate": 7.624333256320882e-05, + "loss": 9.8398, + "step": 24150 + }, + { + "epoch": 1.79, + "learning_rate": 7.619310705065745e-05, + "loss": 10.3631, + "step": 24200 + }, + { + "epoch": 1.79, + "learning_rate": 7.61428815381061e-05, + "loss": 10.6078, + "step": 24250 + }, + { + "epoch": 1.79, + "learning_rate": 7.609265602555474e-05, + "loss": 11.366, + "step": 24300 + }, + { + "epoch": 1.8, + "learning_rate": 7.60424305130034e-05, + "loss": 12.1154, + "step": 24350 + }, + { + "epoch": 1.8, + "learning_rate": 7.599220500045204e-05, + "loss": 11.3429, + "step": 24400 + }, + { + "epoch": 1.8, + "learning_rate": 7.594197948790068e-05, + "loss": 9.135, + "step": 24450 + }, + { + "epoch": 1.81, + "learning_rate": 7.589175397534933e-05, + "loss": 10.3796, + "step": 24500 + }, + { + "epoch": 1.81, + "learning_rate": 7.584152846279796e-05, + "loss": 10.6452, + "step": 24550 + }, + { + "epoch": 1.81, + "learning_rate": 7.579130295024661e-05, + "loss": 9.6237, + "step": 24600 + }, + { + "epoch": 1.82, + "learning_rate": 7.574107743769525e-05, + "loss": 10.7158, + "step": 24650 + }, + { + "epoch": 1.82, + "learning_rate": 7.56908519251439e-05, + "loss": 9.8296, + "step": 24700 + }, + { + "epoch": 1.83, + "learning_rate": 7.564062641259255e-05, + "loss": 10.1654, + "step": 24750 + }, + { + "epoch": 1.83, + "learning_rate": 7.559040090004119e-05, + "loss": 10.395, + "step": 24800 + }, + { + "epoch": 1.83, + "learning_rate": 7.554017538748984e-05, + "loss": 10.3067, + "step": 24850 + }, + { + "epoch": 1.84, + "learning_rate": 7.548994987493848e-05, + "loss": 10.7243, + "step": 24900 + }, + { + "epoch": 1.84, + "learning_rate": 7.543972436238712e-05, + "loss": 10.4022, + "step": 24950 + }, + { + "epoch": 1.84, + "learning_rate": 7.538949884983577e-05, + "loss": 10.5045, + "step": 25000 + }, + { + "epoch": 1.85, + "learning_rate": 7.533927333728441e-05, + "loss": 11.2205, + "step": 25050 + }, + { + "epoch": 1.85, + "learning_rate": 7.528904782473306e-05, + "loss": 10.5375, + "step": 25100 + }, + { + "epoch": 1.86, + "learning_rate": 7.52388223121817e-05, + "loss": 10.4876, + "step": 25150 + }, + { + "epoch": 1.86, + "learning_rate": 7.518859679963034e-05, + "loss": 9.2096, + "step": 25200 + }, + { + "epoch": 1.86, + "learning_rate": 7.513837128707899e-05, + "loss": 10.0442, + "step": 25250 + }, + { + "epoch": 1.87, + "learning_rate": 7.508814577452763e-05, + "loss": 9.8174, + "step": 25300 + }, + { + "epoch": 1.87, + "learning_rate": 7.503792026197628e-05, + "loss": 10.8789, + "step": 25350 + }, + { + "epoch": 1.87, + "learning_rate": 7.498769474942492e-05, + "loss": 9.8789, + "step": 25400 + }, + { + "epoch": 1.88, + "learning_rate": 7.493746923687356e-05, + "loss": 11.1431, + "step": 25450 + }, + { + "epoch": 1.88, + "learning_rate": 7.488724372432221e-05, + "loss": 10.4659, + "step": 25500 + }, + { + "epoch": 1.88, + "learning_rate": 7.483701821177087e-05, + "loss": 10.7342, + "step": 25550 + }, + { + "epoch": 1.89, + "learning_rate": 7.47867926992195e-05, + "loss": 10.7841, + "step": 25600 + }, + { + "epoch": 1.89, + "learning_rate": 7.473656718666814e-05, + "loss": 9.6162, + "step": 25650 + }, + { + "epoch": 1.9, + "learning_rate": 7.468634167411678e-05, + "loss": 10.3568, + "step": 25700 + }, + { + "epoch": 1.9, + "learning_rate": 7.463611616156543e-05, + "loss": 9.6701, + "step": 25750 + }, + { + "epoch": 1.9, + "learning_rate": 7.458589064901407e-05, + "loss": 9.4003, + "step": 25800 + }, + { + "epoch": 1.91, + "learning_rate": 7.453566513646272e-05, + "loss": 9.6621, + "step": 25850 + }, + { + "epoch": 1.91, + "learning_rate": 7.448543962391137e-05, + "loss": 10.1086, + "step": 25900 + }, + { + "epoch": 1.91, + "learning_rate": 7.443521411136e-05, + "loss": 11.5655, + "step": 25950 + }, + { + "epoch": 1.92, + "learning_rate": 7.438498859880866e-05, + "loss": 8.9418, + "step": 26000 + }, + { + "epoch": 1.92, + "learning_rate": 7.433476308625729e-05, + "loss": 9.2415, + "step": 26050 + }, + { + "epoch": 1.93, + "learning_rate": 7.428453757370595e-05, + "loss": 9.4192, + "step": 26100 + }, + { + "epoch": 1.93, + "learning_rate": 7.423431206115458e-05, + "loss": 9.1755, + "step": 26150 + }, + { + "epoch": 1.93, + "learning_rate": 7.418408654860322e-05, + "loss": 9.6327, + "step": 26200 + }, + { + "epoch": 1.94, + "learning_rate": 7.413386103605188e-05, + "loss": 10.3333, + "step": 26250 + }, + { + "epoch": 1.94, + "learning_rate": 7.408363552350051e-05, + "loss": 10.298, + "step": 26300 + }, + { + "epoch": 1.94, + "learning_rate": 7.403341001094917e-05, + "loss": 10.7038, + "step": 26350 + }, + { + "epoch": 1.95, + "learning_rate": 7.39831844983978e-05, + "loss": 10.5099, + "step": 26400 + }, + { + "epoch": 1.95, + "learning_rate": 7.393295898584646e-05, + "loss": 9.8063, + "step": 26450 + }, + { + "epoch": 1.95, + "learning_rate": 7.38827334732951e-05, + "loss": 9.5784, + "step": 26500 + }, + { + "epoch": 1.96, + "learning_rate": 7.383250796074375e-05, + "loss": 10.1958, + "step": 26550 + }, + { + "epoch": 1.96, + "learning_rate": 7.378228244819239e-05, + "loss": 9.6869, + "step": 26600 + }, + { + "epoch": 1.97, + "learning_rate": 7.373205693564103e-05, + "loss": 10.3761, + "step": 26650 + }, + { + "epoch": 1.97, + "learning_rate": 7.368183142308968e-05, + "loss": 11.6806, + "step": 26700 + }, + { + "epoch": 1.97, + "learning_rate": 7.363160591053832e-05, + "loss": 10.3183, + "step": 26750 + }, + { + "epoch": 1.98, + "learning_rate": 7.358138039798697e-05, + "loss": 11.041, + "step": 26800 + }, + { + "epoch": 1.98, + "learning_rate": 7.353115488543561e-05, + "loss": 9.6997, + "step": 26850 + }, + { + "epoch": 1.98, + "learning_rate": 7.348092937288425e-05, + "loss": 9.6029, + "step": 26900 + }, + { + "epoch": 1.99, + "learning_rate": 7.34307038603329e-05, + "loss": 10.3322, + "step": 26950 + }, + { + "epoch": 1.99, + "learning_rate": 7.338047834778154e-05, + "loss": 9.9009, + "step": 27000 + }, + { + "epoch": 2.0, + "learning_rate": 7.333025283523019e-05, + "loss": 10.4815, + "step": 27050 + }, + { + "epoch": 2.0, + "learning_rate": 7.328002732267883e-05, + "loss": 11.7049, + "step": 27100 + }, + { + "epoch": 2.0, + "learning_rate": 7.322980181012748e-05, + "loss": 10.7831, + "step": 27150 + }, + { + "epoch": 2.01, + "learning_rate": 7.317957629757612e-05, + "loss": 8.735, + "step": 27200 + }, + { + "epoch": 2.01, + "learning_rate": 7.312935078502476e-05, + "loss": 9.4056, + "step": 27250 + }, + { + "epoch": 2.01, + "learning_rate": 7.307912527247342e-05, + "loss": 10.7689, + "step": 27300 + }, + { + "epoch": 2.02, + "learning_rate": 7.302889975992205e-05, + "loss": 9.5266, + "step": 27350 + }, + { + "epoch": 2.02, + "learning_rate": 7.29786742473707e-05, + "loss": 8.2467, + "step": 27400 + }, + { + "epoch": 2.02, + "learning_rate": 7.292844873481934e-05, + "loss": 8.6572, + "step": 27450 + }, + { + "epoch": 2.03, + "learning_rate": 7.287822322226798e-05, + "loss": 8.4693, + "step": 27500 + }, + { + "epoch": 2.03, + "learning_rate": 7.282799770971663e-05, + "loss": 10.4867, + "step": 27550 + }, + { + "epoch": 2.04, + "learning_rate": 7.277777219716527e-05, + "loss": 8.9364, + "step": 27600 + }, + { + "epoch": 2.04, + "learning_rate": 7.272754668461393e-05, + "loss": 10.0109, + "step": 27650 + }, + { + "epoch": 2.04, + "learning_rate": 7.267732117206256e-05, + "loss": 9.5535, + "step": 27700 + }, + { + "epoch": 2.05, + "learning_rate": 7.262709565951122e-05, + "loss": 9.3029, + "step": 27750 + }, + { + "epoch": 2.05, + "learning_rate": 7.257687014695985e-05, + "loss": 9.854, + "step": 27800 + }, + { + "epoch": 2.05, + "learning_rate": 7.25266446344085e-05, + "loss": 9.5327, + "step": 27850 + }, + { + "epoch": 2.06, + "learning_rate": 7.247641912185714e-05, + "loss": 9.8255, + "step": 27900 + }, + { + "epoch": 2.06, + "learning_rate": 7.242619360930578e-05, + "loss": 9.9737, + "step": 27950 + }, + { + "epoch": 2.07, + "learning_rate": 7.237596809675444e-05, + "loss": 9.0471, + "step": 28000 + }, + { + "epoch": 2.07, + "learning_rate": 7.232574258420307e-05, + "loss": 10.0566, + "step": 28050 + }, + { + "epoch": 2.07, + "learning_rate": 7.227551707165173e-05, + "loss": 9.4781, + "step": 28100 + }, + { + "epoch": 2.08, + "learning_rate": 7.222529155910036e-05, + "loss": 8.7599, + "step": 28150 + }, + { + "epoch": 2.08, + "learning_rate": 7.217506604654901e-05, + "loss": 8.7605, + "step": 28200 + }, + { + "epoch": 2.08, + "learning_rate": 7.212484053399764e-05, + "loss": 10.061, + "step": 28250 + }, + { + "epoch": 2.09, + "learning_rate": 7.20746150214463e-05, + "loss": 9.6124, + "step": 28300 + }, + { + "epoch": 2.09, + "learning_rate": 7.202438950889495e-05, + "loss": 10.4776, + "step": 28350 + }, + { + "epoch": 2.09, + "learning_rate": 7.197416399634359e-05, + "loss": 9.2169, + "step": 28400 + }, + { + "epoch": 2.1, + "learning_rate": 7.192393848379223e-05, + "loss": 9.3654, + "step": 28450 + }, + { + "epoch": 2.1, + "learning_rate": 7.187371297124086e-05, + "loss": 9.4445, + "step": 28500 + }, + { + "epoch": 2.11, + "learning_rate": 7.182348745868952e-05, + "loss": 8.3614, + "step": 28550 + }, + { + "epoch": 2.11, + "learning_rate": 7.177326194613815e-05, + "loss": 9.1661, + "step": 28600 + }, + { + "epoch": 2.11, + "learning_rate": 7.172303643358681e-05, + "loss": 9.4976, + "step": 28650 + }, + { + "epoch": 2.12, + "learning_rate": 7.167281092103545e-05, + "loss": 9.125, + "step": 28700 + }, + { + "epoch": 2.12, + "learning_rate": 7.16225854084841e-05, + "loss": 8.9051, + "step": 28750 + }, + { + "epoch": 2.12, + "learning_rate": 7.157235989593274e-05, + "loss": 8.9753, + "step": 28800 + }, + { + "epoch": 2.13, + "learning_rate": 7.152213438338139e-05, + "loss": 9.133, + "step": 28850 + }, + { + "epoch": 2.13, + "learning_rate": 7.147190887083003e-05, + "loss": 9.9677, + "step": 28900 + }, + { + "epoch": 2.14, + "learning_rate": 7.142168335827867e-05, + "loss": 8.725, + "step": 28950 + }, + { + "epoch": 2.14, + "learning_rate": 7.137145784572732e-05, + "loss": 8.831, + "step": 29000 + }, + { + "epoch": 2.14, + "learning_rate": 7.132123233317596e-05, + "loss": 7.8207, + "step": 29050 + }, + { + "epoch": 2.15, + "learning_rate": 7.127100682062461e-05, + "loss": 9.3707, + "step": 29100 + }, + { + "epoch": 2.15, + "learning_rate": 7.122078130807325e-05, + "loss": 10.4259, + "step": 29150 + }, + { + "epoch": 2.15, + "learning_rate": 7.11705557955219e-05, + "loss": 8.1836, + "step": 29200 + }, + { + "epoch": 2.16, + "learning_rate": 7.112033028297054e-05, + "loss": 9.0874, + "step": 29250 + }, + { + "epoch": 2.16, + "learning_rate": 7.107010477041918e-05, + "loss": 9.5957, + "step": 29300 + }, + { + "epoch": 2.16, + "learning_rate": 7.101987925786783e-05, + "loss": 8.7545, + "step": 29350 + }, + { + "epoch": 2.17, + "learning_rate": 7.096965374531647e-05, + "loss": 8.4478, + "step": 29400 + }, + { + "epoch": 2.17, + "learning_rate": 7.091942823276512e-05, + "loss": 8.601, + "step": 29450 + }, + { + "epoch": 2.18, + "learning_rate": 7.086920272021377e-05, + "loss": 9.6172, + "step": 29500 + }, + { + "epoch": 2.18, + "learning_rate": 7.08189772076624e-05, + "loss": 9.0805, + "step": 29550 + }, + { + "epoch": 2.18, + "learning_rate": 7.076875169511106e-05, + "loss": 9.6039, + "step": 29600 + }, + { + "epoch": 2.19, + "learning_rate": 7.071852618255969e-05, + "loss": 9.3622, + "step": 29650 + }, + { + "epoch": 2.19, + "learning_rate": 7.066830067000834e-05, + "loss": 8.8765, + "step": 29700 + }, + { + "epoch": 2.19, + "learning_rate": 7.061807515745699e-05, + "loss": 8.992, + "step": 29750 + }, + { + "epoch": 2.2, + "learning_rate": 7.056784964490562e-05, + "loss": 10.3564, + "step": 29800 + }, + { + "epoch": 2.2, + "learning_rate": 7.051762413235428e-05, + "loss": 8.8092, + "step": 29850 + }, + { + "epoch": 2.21, + "learning_rate": 7.046739861980291e-05, + "loss": 9.8373, + "step": 29900 + }, + { + "epoch": 2.21, + "learning_rate": 7.041717310725157e-05, + "loss": 8.004, + "step": 29950 + }, + { + "epoch": 2.21, + "learning_rate": 7.03669475947002e-05, + "loss": 9.4461, + "step": 30000 + }, + { + "epoch": 2.22, + "learning_rate": 7.031672208214886e-05, + "loss": 8.4964, + "step": 30050 + }, + { + "epoch": 2.22, + "learning_rate": 7.02664965695975e-05, + "loss": 10.3181, + "step": 30100 + }, + { + "epoch": 2.22, + "learning_rate": 7.021627105704615e-05, + "loss": 8.6637, + "step": 30150 + }, + { + "epoch": 2.23, + "learning_rate": 7.016604554449479e-05, + "loss": 10.1703, + "step": 30200 + }, + { + "epoch": 2.23, + "learning_rate": 7.011582003194342e-05, + "loss": 9.2846, + "step": 30250 + }, + { + "epoch": 2.24, + "learning_rate": 7.006559451939208e-05, + "loss": 8.5913, + "step": 30300 + }, + { + "epoch": 2.24, + "learning_rate": 7.001536900684071e-05, + "loss": 9.1308, + "step": 30350 + }, + { + "epoch": 2.24, + "learning_rate": 6.996514349428937e-05, + "loss": 11.2229, + "step": 30400 + }, + { + "epoch": 2.25, + "learning_rate": 6.991491798173801e-05, + "loss": 8.5923, + "step": 30450 + }, + { + "epoch": 2.25, + "learning_rate": 6.986469246918665e-05, + "loss": 9.9826, + "step": 30500 + }, + { + "epoch": 2.25, + "learning_rate": 6.98144669566353e-05, + "loss": 8.4765, + "step": 30550 + }, + { + "epoch": 2.26, + "learning_rate": 6.976424144408394e-05, + "loss": 8.7624, + "step": 30600 + }, + { + "epoch": 2.26, + "learning_rate": 6.971401593153259e-05, + "loss": 9.238, + "step": 30650 + }, + { + "epoch": 2.26, + "learning_rate": 6.966379041898123e-05, + "loss": 8.4976, + "step": 30700 + }, + { + "epoch": 2.27, + "learning_rate": 6.961356490642987e-05, + "loss": 9.1886, + "step": 30750 + }, + { + "epoch": 2.27, + "learning_rate": 6.956333939387852e-05, + "loss": 8.4443, + "step": 30800 + }, + { + "epoch": 2.28, + "learning_rate": 6.951311388132716e-05, + "loss": 8.3648, + "step": 30850 + }, + { + "epoch": 2.28, + "learning_rate": 6.94628883687758e-05, + "loss": 9.2509, + "step": 30900 + }, + { + "epoch": 2.28, + "learning_rate": 6.941266285622445e-05, + "loss": 8.3765, + "step": 30950 + }, + { + "epoch": 2.29, + "learning_rate": 6.93624373436731e-05, + "loss": 9.6616, + "step": 31000 + }, + { + "epoch": 2.29, + "learning_rate": 6.931221183112174e-05, + "loss": 9.658, + "step": 31050 + }, + { + "epoch": 2.29, + "learning_rate": 6.926198631857038e-05, + "loss": 8.7527, + "step": 31100 + }, + { + "epoch": 2.3, + "learning_rate": 6.921176080601903e-05, + "loss": 8.7148, + "step": 31150 + }, + { + "epoch": 2.3, + "learning_rate": 6.916153529346767e-05, + "loss": 8.5962, + "step": 31200 + }, + { + "epoch": 2.31, + "learning_rate": 6.911130978091633e-05, + "loss": 9.2625, + "step": 31250 + }, + { + "epoch": 2.31, + "learning_rate": 6.906108426836496e-05, + "loss": 8.8352, + "step": 31300 + }, + { + "epoch": 2.31, + "learning_rate": 6.90108587558136e-05, + "loss": 7.3991, + "step": 31350 + }, + { + "epoch": 2.32, + "learning_rate": 6.896063324326225e-05, + "loss": 9.9391, + "step": 31400 + }, + { + "epoch": 2.32, + "learning_rate": 6.891040773071089e-05, + "loss": 8.9575, + "step": 31450 + }, + { + "epoch": 2.32, + "learning_rate": 6.886018221815954e-05, + "loss": 7.9103, + "step": 31500 + }, + { + "epoch": 2.33, + "learning_rate": 6.880995670560818e-05, + "loss": 8.5276, + "step": 31550 + }, + { + "epoch": 2.33, + "learning_rate": 6.875973119305684e-05, + "loss": 8.5427, + "step": 31600 + }, + { + "epoch": 2.33, + "learning_rate": 6.870950568050547e-05, + "loss": 8.4672, + "step": 31650 + }, + { + "epoch": 2.34, + "learning_rate": 6.865928016795412e-05, + "loss": 8.9638, + "step": 31700 + }, + { + "epoch": 2.34, + "learning_rate": 6.860905465540276e-05, + "loss": 8.3136, + "step": 31750 + }, + { + "epoch": 2.35, + "learning_rate": 6.855882914285141e-05, + "loss": 8.8076, + "step": 31800 + }, + { + "epoch": 2.35, + "learning_rate": 6.850860363030004e-05, + "loss": 8.6041, + "step": 31850 + }, + { + "epoch": 2.35, + "learning_rate": 6.845837811774869e-05, + "loss": 9.1751, + "step": 31900 + }, + { + "epoch": 2.36, + "learning_rate": 6.840815260519735e-05, + "loss": 8.5955, + "step": 31950 + }, + { + "epoch": 2.36, + "learning_rate": 6.835792709264598e-05, + "loss": 9.0927, + "step": 32000 + }, + { + "epoch": 2.36, + "learning_rate": 6.830770158009463e-05, + "loss": 7.9647, + "step": 32050 + }, + { + "epoch": 2.37, + "learning_rate": 6.825747606754326e-05, + "loss": 10.2647, + "step": 32100 + }, + { + "epoch": 2.37, + "learning_rate": 6.820725055499192e-05, + "loss": 8.3442, + "step": 32150 + }, + { + "epoch": 2.38, + "learning_rate": 6.815702504244057e-05, + "loss": 9.2019, + "step": 32200 + }, + { + "epoch": 2.38, + "learning_rate": 6.810679952988921e-05, + "loss": 8.345, + "step": 32250 + }, + { + "epoch": 2.38, + "learning_rate": 6.805657401733785e-05, + "loss": 9.1835, + "step": 32300 + }, + { + "epoch": 2.39, + "learning_rate": 6.80063485047865e-05, + "loss": 9.1846, + "step": 32350 + }, + { + "epoch": 2.39, + "learning_rate": 6.795612299223514e-05, + "loss": 9.0015, + "step": 32400 + }, + { + "epoch": 2.39, + "learning_rate": 6.790589747968379e-05, + "loss": 8.2404, + "step": 32450 + }, + { + "epoch": 2.4, + "learning_rate": 6.785567196713243e-05, + "loss": 8.8715, + "step": 32500 + }, + { + "epoch": 2.4, + "learning_rate": 6.780544645458107e-05, + "loss": 8.817, + "step": 32550 + }, + { + "epoch": 2.4, + "learning_rate": 6.775522094202972e-05, + "loss": 9.2154, + "step": 32600 + }, + { + "epoch": 2.41, + "learning_rate": 6.770499542947836e-05, + "loss": 9.1914, + "step": 32650 + }, + { + "epoch": 2.41, + "learning_rate": 6.7654769916927e-05, + "loss": 9.2804, + "step": 32700 + }, + { + "epoch": 2.42, + "learning_rate": 6.760454440437565e-05, + "loss": 9.177, + "step": 32750 + }, + { + "epoch": 2.42, + "learning_rate": 6.75543188918243e-05, + "loss": 8.8259, + "step": 32800 + }, + { + "epoch": 2.42, + "learning_rate": 6.750409337927294e-05, + "loss": 8.6121, + "step": 32850 + }, + { + "epoch": 2.43, + "learning_rate": 6.745386786672158e-05, + "loss": 8.644, + "step": 32900 + }, + { + "epoch": 2.43, + "learning_rate": 6.740364235417023e-05, + "loss": 8.5743, + "step": 32950 + }, + { + "epoch": 2.43, + "learning_rate": 6.735341684161888e-05, + "loss": 8.7636, + "step": 33000 + }, + { + "epoch": 2.44, + "learning_rate": 6.730319132906751e-05, + "loss": 8.3064, + "step": 33050 + }, + { + "epoch": 2.44, + "learning_rate": 6.725296581651616e-05, + "loss": 8.8806, + "step": 33100 + }, + { + "epoch": 2.45, + "learning_rate": 6.72027403039648e-05, + "loss": 8.8212, + "step": 33150 + }, + { + "epoch": 2.45, + "learning_rate": 6.715251479141345e-05, + "loss": 9.5261, + "step": 33200 + }, + { + "epoch": 2.45, + "learning_rate": 6.710228927886209e-05, + "loss": 9.0764, + "step": 33250 + }, + { + "epoch": 2.46, + "learning_rate": 6.705206376631073e-05, + "loss": 7.399, + "step": 33300 + }, + { + "epoch": 2.46, + "learning_rate": 6.700183825375939e-05, + "loss": 9.4119, + "step": 33350 + }, + { + "epoch": 2.46, + "learning_rate": 6.695161274120802e-05, + "loss": 8.4576, + "step": 33400 + }, + { + "epoch": 2.47, + "learning_rate": 6.690138722865668e-05, + "loss": 8.024, + "step": 33450 + }, + { + "epoch": 2.47, + "learning_rate": 6.685116171610531e-05, + "loss": 9.1605, + "step": 33500 + }, + { + "epoch": 2.47, + "learning_rate": 6.680093620355397e-05, + "loss": 8.3661, + "step": 33550 + }, + { + "epoch": 2.48, + "learning_rate": 6.67507106910026e-05, + "loss": 8.4145, + "step": 33600 + }, + { + "epoch": 2.48, + "learning_rate": 6.670048517845124e-05, + "loss": 7.824, + "step": 33650 + }, + { + "epoch": 2.49, + "learning_rate": 6.66502596658999e-05, + "loss": 9.129, + "step": 33700 + }, + { + "epoch": 2.49, + "learning_rate": 6.660003415334853e-05, + "loss": 9.0876, + "step": 33750 + }, + { + "epoch": 2.49, + "learning_rate": 6.654980864079719e-05, + "loss": 8.6961, + "step": 33800 + }, + { + "epoch": 2.5, + "learning_rate": 6.649958312824582e-05, + "loss": 8.1584, + "step": 33850 + }, + { + "epoch": 2.5, + "learning_rate": 6.644935761569448e-05, + "loss": 8.6587, + "step": 33900 + }, + { + "epoch": 2.5, + "learning_rate": 6.639913210314311e-05, + "loss": 8.1059, + "step": 33950 + }, + { + "epoch": 2.51, + "learning_rate": 6.634890659059176e-05, + "loss": 9.2588, + "step": 34000 + }, + { + "epoch": 2.51, + "learning_rate": 6.629868107804041e-05, + "loss": 8.6443, + "step": 34050 + }, + { + "epoch": 2.52, + "learning_rate": 6.624845556548905e-05, + "loss": 8.8006, + "step": 34100 + }, + { + "epoch": 2.52, + "learning_rate": 6.61982300529377e-05, + "loss": 9.2288, + "step": 34150 + }, + { + "epoch": 2.52, + "learning_rate": 6.614800454038633e-05, + "loss": 9.0328, + "step": 34200 + }, + { + "epoch": 2.53, + "learning_rate": 6.609777902783499e-05, + "loss": 7.8269, + "step": 34250 + }, + { + "epoch": 2.53, + "learning_rate": 6.604755351528362e-05, + "loss": 8.5883, + "step": 34300 + }, + { + "epoch": 2.53, + "learning_rate": 6.599732800273227e-05, + "loss": 9.9388, + "step": 34350 + }, + { + "epoch": 2.54, + "learning_rate": 6.594710249018092e-05, + "loss": 8.6776, + "step": 34400 + }, + { + "epoch": 2.54, + "learning_rate": 6.589687697762956e-05, + "loss": 7.2287, + "step": 34450 + }, + { + "epoch": 2.54, + "learning_rate": 6.58466514650782e-05, + "loss": 7.7042, + "step": 34500 + }, + { + "epoch": 2.55, + "learning_rate": 6.579642595252685e-05, + "loss": 9.0004, + "step": 34550 + }, + { + "epoch": 2.55, + "learning_rate": 6.57462004399755e-05, + "loss": 9.3279, + "step": 34600 + }, + { + "epoch": 2.56, + "learning_rate": 6.569597492742414e-05, + "loss": 8.9144, + "step": 34650 + }, + { + "epoch": 2.56, + "learning_rate": 6.564574941487278e-05, + "loss": 9.3319, + "step": 34700 + }, + { + "epoch": 2.56, + "learning_rate": 6.559552390232143e-05, + "loss": 9.4986, + "step": 34750 + }, + { + "epoch": 2.57, + "learning_rate": 6.554529838977007e-05, + "loss": 9.002, + "step": 34800 + }, + { + "epoch": 2.57, + "learning_rate": 6.549507287721871e-05, + "loss": 8.6061, + "step": 34850 + }, + { + "epoch": 2.57, + "learning_rate": 6.544484736466736e-05, + "loss": 7.4598, + "step": 34900 + }, + { + "epoch": 2.58, + "learning_rate": 6.5394621852116e-05, + "loss": 8.6618, + "step": 34950 + }, + { + "epoch": 2.58, + "learning_rate": 6.534439633956465e-05, + "loss": 9.0226, + "step": 35000 + }, + { + "epoch": 2.59, + "learning_rate": 6.529417082701329e-05, + "loss": 7.9738, + "step": 35050 + }, + { + "epoch": 2.59, + "learning_rate": 6.524394531446193e-05, + "loss": 8.7871, + "step": 35100 + }, + { + "epoch": 2.59, + "learning_rate": 6.519371980191058e-05, + "loss": 8.8744, + "step": 35150 + }, + { + "epoch": 2.6, + "learning_rate": 6.514349428935924e-05, + "loss": 8.3771, + "step": 35200 + }, + { + "epoch": 2.6, + "learning_rate": 6.509326877680787e-05, + "loss": 8.058, + "step": 35250 + }, + { + "epoch": 2.6, + "learning_rate": 6.504304326425652e-05, + "loss": 8.2627, + "step": 35300 + }, + { + "epoch": 2.61, + "learning_rate": 6.499281775170515e-05, + "loss": 8.1643, + "step": 35350 + }, + { + "epoch": 2.61, + "learning_rate": 6.49425922391538e-05, + "loss": 7.888, + "step": 35400 + }, + { + "epoch": 2.61, + "learning_rate": 6.489236672660246e-05, + "loss": 7.9235, + "step": 35450 + }, + { + "epoch": 2.62, + "learning_rate": 6.484214121405109e-05, + "loss": 8.1139, + "step": 35500 + }, + { + "epoch": 2.62, + "learning_rate": 6.479191570149974e-05, + "loss": 8.7467, + "step": 35550 + }, + { + "epoch": 2.63, + "learning_rate": 6.474169018894837e-05, + "loss": 7.4693, + "step": 35600 + }, + { + "epoch": 2.63, + "learning_rate": 6.469146467639703e-05, + "loss": 8.5167, + "step": 35650 + }, + { + "epoch": 2.63, + "learning_rate": 6.464123916384566e-05, + "loss": 9.5274, + "step": 35700 + }, + { + "epoch": 2.64, + "learning_rate": 6.459101365129432e-05, + "loss": 8.9735, + "step": 35750 + }, + { + "epoch": 2.64, + "learning_rate": 6.454078813874296e-05, + "loss": 8.1756, + "step": 35800 + }, + { + "epoch": 2.64, + "learning_rate": 6.449056262619161e-05, + "loss": 7.8084, + "step": 35850 + }, + { + "epoch": 2.65, + "learning_rate": 6.444033711364025e-05, + "loss": 8.2671, + "step": 35900 + }, + { + "epoch": 2.65, + "learning_rate": 6.439011160108888e-05, + "loss": 8.6628, + "step": 35950 + }, + { + "epoch": 2.66, + "learning_rate": 6.433988608853754e-05, + "loss": 9.8654, + "step": 36000 + }, + { + "epoch": 2.66, + "learning_rate": 6.428966057598617e-05, + "loss": 9.104, + "step": 36050 + }, + { + "epoch": 2.66, + "learning_rate": 6.423943506343483e-05, + "loss": 9.4156, + "step": 36100 + }, + { + "epoch": 2.67, + "learning_rate": 6.418920955088347e-05, + "loss": 8.9803, + "step": 36150 + }, + { + "epoch": 2.67, + "learning_rate": 6.413898403833212e-05, + "loss": 8.9584, + "step": 36200 + }, + { + "epoch": 2.67, + "learning_rate": 6.408875852578076e-05, + "loss": 7.3683, + "step": 36250 + }, + { + "epoch": 2.68, + "learning_rate": 6.40385330132294e-05, + "loss": 8.3277, + "step": 36300 + }, + { + "epoch": 2.68, + "learning_rate": 6.398830750067805e-05, + "loss": 9.3236, + "step": 36350 + }, + { + "epoch": 2.68, + "learning_rate": 6.393808198812669e-05, + "loss": 8.6918, + "step": 36400 + }, + { + "epoch": 2.69, + "learning_rate": 6.388785647557534e-05, + "loss": 8.9422, + "step": 36450 + }, + { + "epoch": 2.69, + "learning_rate": 6.383763096302398e-05, + "loss": 8.8438, + "step": 36500 + }, + { + "epoch": 2.7, + "learning_rate": 6.378740545047263e-05, + "loss": 8.7752, + "step": 36550 + }, + { + "epoch": 2.7, + "learning_rate": 6.373717993792127e-05, + "loss": 8.6483, + "step": 36600 + }, + { + "epoch": 2.7, + "learning_rate": 6.368695442536991e-05, + "loss": 8.5753, + "step": 36650 + }, + { + "epoch": 2.71, + "learning_rate": 6.363672891281856e-05, + "loss": 8.1893, + "step": 36700 + }, + { + "epoch": 2.71, + "learning_rate": 6.35865034002672e-05, + "loss": 8.189, + "step": 36750 + }, + { + "epoch": 2.71, + "learning_rate": 6.353627788771585e-05, + "loss": 8.2979, + "step": 36800 + }, + { + "epoch": 2.72, + "learning_rate": 6.348605237516449e-05, + "loss": 8.3904, + "step": 36850 + }, + { + "epoch": 2.72, + "learning_rate": 6.343582686261313e-05, + "loss": 9.3274, + "step": 36900 + }, + { + "epoch": 2.73, + "learning_rate": 6.338560135006179e-05, + "loss": 7.7663, + "step": 36950 + }, + { + "epoch": 2.73, + "learning_rate": 6.333537583751042e-05, + "loss": 8.2105, + "step": 37000 + }, + { + "epoch": 2.73, + "learning_rate": 6.328515032495907e-05, + "loss": 8.035, + "step": 37050 + }, + { + "epoch": 2.74, + "learning_rate": 6.323492481240771e-05, + "loss": 9.5032, + "step": 37100 + }, + { + "epoch": 2.74, + "learning_rate": 6.318469929985635e-05, + "loss": 8.3856, + "step": 37150 + }, + { + "epoch": 2.74, + "learning_rate": 6.3134473787305e-05, + "loss": 8.9941, + "step": 37200 + }, + { + "epoch": 2.75, + "learning_rate": 6.308424827475364e-05, + "loss": 8.3987, + "step": 37250 + }, + { + "epoch": 2.75, + "learning_rate": 6.30340227622023e-05, + "loss": 9.1753, + "step": 37300 + }, + { + "epoch": 2.76, + "learning_rate": 6.298379724965093e-05, + "loss": 7.9557, + "step": 37350 + }, + { + "epoch": 2.76, + "learning_rate": 6.293357173709959e-05, + "loss": 7.725, + "step": 37400 + }, + { + "epoch": 2.76, + "learning_rate": 6.288334622454822e-05, + "loss": 8.0807, + "step": 37450 + }, + { + "epoch": 2.77, + "learning_rate": 6.283312071199688e-05, + "loss": 8.6492, + "step": 37500 + }, + { + "epoch": 2.77, + "learning_rate": 6.27828951994455e-05, + "loss": 8.4716, + "step": 37550 + }, + { + "epoch": 2.77, + "learning_rate": 6.273266968689416e-05, + "loss": 8.7209, + "step": 37600 + }, + { + "epoch": 2.78, + "learning_rate": 6.268244417434281e-05, + "loss": 8.4902, + "step": 37650 + }, + { + "epoch": 2.78, + "learning_rate": 6.263221866179144e-05, + "loss": 7.9589, + "step": 37700 + }, + { + "epoch": 2.78, + "learning_rate": 6.25819931492401e-05, + "loss": 9.3285, + "step": 37750 + }, + { + "epoch": 2.79, + "learning_rate": 6.253176763668873e-05, + "loss": 9.0506, + "step": 37800 + }, + { + "epoch": 2.79, + "learning_rate": 6.248154212413738e-05, + "loss": 7.9992, + "step": 37850 + }, + { + "epoch": 2.8, + "learning_rate": 6.243131661158603e-05, + "loss": 8.029, + "step": 37900 + }, + { + "epoch": 2.8, + "learning_rate": 6.238109109903467e-05, + "loss": 8.6681, + "step": 37950 + }, + { + "epoch": 2.8, + "learning_rate": 6.233086558648332e-05, + "loss": 8.5906, + "step": 38000 + }, + { + "epoch": 2.81, + "learning_rate": 6.228064007393196e-05, + "loss": 10.4719, + "step": 38050 + }, + { + "epoch": 2.81, + "learning_rate": 6.22304145613806e-05, + "loss": 8.2759, + "step": 38100 + }, + { + "epoch": 2.81, + "learning_rate": 6.218018904882925e-05, + "loss": 8.2633, + "step": 38150 + }, + { + "epoch": 2.82, + "learning_rate": 6.212996353627789e-05, + "loss": 8.5218, + "step": 38200 + }, + { + "epoch": 2.82, + "learning_rate": 6.207973802372654e-05, + "loss": 8.0609, + "step": 38250 + }, + { + "epoch": 2.83, + "learning_rate": 6.202951251117518e-05, + "loss": 9.3672, + "step": 38300 + }, + { + "epoch": 2.83, + "learning_rate": 6.197928699862382e-05, + "loss": 10.1768, + "step": 38350 + }, + { + "epoch": 2.83, + "learning_rate": 6.192906148607247e-05, + "loss": 9.4389, + "step": 38400 + }, + { + "epoch": 2.84, + "learning_rate": 6.187883597352111e-05, + "loss": 7.6737, + "step": 38450 + }, + { + "epoch": 2.84, + "learning_rate": 6.182861046096976e-05, + "loss": 9.2337, + "step": 38500 + }, + { + "epoch": 2.84, + "learning_rate": 6.17783849484184e-05, + "loss": 8.7846, + "step": 38550 + }, + { + "epoch": 2.85, + "learning_rate": 6.172815943586704e-05, + "loss": 7.8709, + "step": 38600 + }, + { + "epoch": 2.85, + "learning_rate": 6.167793392331569e-05, + "loss": 8.8688, + "step": 38650 + }, + { + "epoch": 2.85, + "learning_rate": 6.162770841076435e-05, + "loss": 8.4087, + "step": 38700 + }, + { + "epoch": 2.86, + "learning_rate": 6.157748289821298e-05, + "loss": 7.7129, + "step": 38750 + }, + { + "epoch": 2.86, + "learning_rate": 6.152725738566162e-05, + "loss": 9.3196, + "step": 38800 + }, + { + "epoch": 2.87, + "learning_rate": 6.147703187311027e-05, + "loss": 8.8242, + "step": 38850 + }, + { + "epoch": 2.87, + "learning_rate": 6.142680636055891e-05, + "loss": 8.4237, + "step": 38900 + }, + { + "epoch": 2.87, + "learning_rate": 6.137658084800755e-05, + "loss": 8.9383, + "step": 38950 + }, + { + "epoch": 2.88, + "learning_rate": 6.13263553354562e-05, + "loss": 8.3749, + "step": 39000 + }, + { + "epoch": 2.88, + "learning_rate": 6.127612982290485e-05, + "loss": 8.8894, + "step": 39050 + }, + { + "epoch": 2.88, + "learning_rate": 6.122590431035349e-05, + "loss": 8.2975, + "step": 39100 + }, + { + "epoch": 2.89, + "learning_rate": 6.117567879780214e-05, + "loss": 8.0517, + "step": 39150 + }, + { + "epoch": 2.89, + "learning_rate": 6.112545328525077e-05, + "loss": 8.0154, + "step": 39200 + }, + { + "epoch": 2.9, + "learning_rate": 6.107522777269943e-05, + "loss": 8.4887, + "step": 39250 + }, + { + "epoch": 2.9, + "learning_rate": 6.102500226014807e-05, + "loss": 8.7064, + "step": 39300 + }, + { + "epoch": 2.9, + "learning_rate": 6.0974776747596706e-05, + "loss": 9.7375, + "step": 39350 + }, + { + "epoch": 2.91, + "learning_rate": 6.0924551235045357e-05, + "loss": 8.8614, + "step": 39400 + }, + { + "epoch": 2.91, + "learning_rate": 6.0874325722493994e-05, + "loss": 8.302, + "step": 39450 + }, + { + "epoch": 2.91, + "learning_rate": 6.0824100209942645e-05, + "loss": 7.8469, + "step": 39500 + }, + { + "epoch": 2.92, + "learning_rate": 6.077387469739129e-05, + "loss": 9.0706, + "step": 39550 + }, + { + "epoch": 2.92, + "learning_rate": 6.072364918483994e-05, + "loss": 9.1398, + "step": 39600 + }, + { + "epoch": 2.92, + "learning_rate": 6.067342367228858e-05, + "loss": 8.1838, + "step": 39650 + }, + { + "epoch": 2.93, + "learning_rate": 6.062319815973723e-05, + "loss": 9.2303, + "step": 39700 + }, + { + "epoch": 2.93, + "learning_rate": 6.0572972647185865e-05, + "loss": 8.3715, + "step": 39750 + }, + { + "epoch": 2.94, + "learning_rate": 6.0522747134634516e-05, + "loss": 8.409, + "step": 39800 + }, + { + "epoch": 2.94, + "learning_rate": 6.047252162208315e-05, + "loss": 8.6441, + "step": 39850 + }, + { + "epoch": 2.94, + "learning_rate": 6.04222961095318e-05, + "loss": 9.0975, + "step": 39900 + }, + { + "epoch": 2.95, + "learning_rate": 6.037207059698045e-05, + "loss": 8.0691, + "step": 39950 + }, + { + "epoch": 2.95, + "learning_rate": 6.0321845084429085e-05, + "loss": 8.6646, + "step": 40000 + }, + { + "epoch": 2.95, + "eval_loss": 8.163222312927246, + "eval_runtime": 957.6189, + "eval_samples_per_second": 13.678, + "eval_steps_per_second": 3.42, + "eval_wer": 0.22493805384066187, + "step": 40000 + }, + { + "epoch": 2.95, + "learning_rate": 6.0271619571877736e-05, + "loss": 8.4278, + "step": 40050 + }, + { + "epoch": 2.96, + "learning_rate": 6.022139405932637e-05, + "loss": 8.1656, + "step": 40100 + }, + { + "epoch": 2.96, + "learning_rate": 6.0171168546775024e-05, + "loss": 7.7975, + "step": 40150 + }, + { + "epoch": 2.97, + "learning_rate": 6.012094303422366e-05, + "loss": 7.5465, + "step": 40200 + }, + { + "epoch": 2.97, + "learning_rate": 6.007071752167231e-05, + "loss": 8.3986, + "step": 40250 + }, + { + "epoch": 2.97, + "learning_rate": 6.0020492009120956e-05, + "loss": 8.3762, + "step": 40300 + }, + { + "epoch": 2.98, + "learning_rate": 5.997026649656961e-05, + "loss": 8.6175, + "step": 40350 + }, + { + "epoch": 2.98, + "learning_rate": 5.9920040984018244e-05, + "loss": 8.5622, + "step": 40400 + }, + { + "epoch": 2.98, + "learning_rate": 5.9869815471466895e-05, + "loss": 8.1824, + "step": 40450 + }, + { + "epoch": 2.99, + "learning_rate": 5.981958995891553e-05, + "loss": 7.2886, + "step": 40500 + }, + { + "epoch": 2.99, + "learning_rate": 5.9769364446364177e-05, + "loss": 8.3469, + "step": 40550 + }, + { + "epoch": 2.99, + "learning_rate": 5.971913893381282e-05, + "loss": 8.6257, + "step": 40600 + }, + { + "epoch": 3.0, + "learning_rate": 5.9668913421261465e-05, + "loss": 7.7071, + "step": 40650 + }, + { + "epoch": 3.0, + "learning_rate": 5.9618687908710116e-05, + "loss": 7.8413, + "step": 40700 + }, + { + "epoch": 3.01, + "learning_rate": 5.956846239615875e-05, + "loss": 7.6704, + "step": 40750 + }, + { + "epoch": 3.01, + "learning_rate": 5.9518236883607404e-05, + "loss": 7.3902, + "step": 40800 + }, + { + "epoch": 3.01, + "learning_rate": 5.946801137105604e-05, + "loss": 8.3296, + "step": 40850 + }, + { + "epoch": 3.02, + "learning_rate": 5.941778585850469e-05, + "loss": 7.0884, + "step": 40900 + }, + { + "epoch": 3.02, + "learning_rate": 5.9367560345953336e-05, + "loss": 7.043, + "step": 40950 + }, + { + "epoch": 3.02, + "learning_rate": 5.931733483340198e-05, + "loss": 7.5367, + "step": 41000 + }, + { + "epoch": 3.03, + "learning_rate": 5.9267109320850624e-05, + "loss": 8.3064, + "step": 41050 + }, + { + "epoch": 3.03, + "learning_rate": 5.921688380829926e-05, + "loss": 7.6769, + "step": 41100 + }, + { + "epoch": 3.04, + "learning_rate": 5.916665829574791e-05, + "loss": 9.067, + "step": 41150 + }, + { + "epoch": 3.04, + "learning_rate": 5.911643278319655e-05, + "loss": 8.3565, + "step": 41200 + }, + { + "epoch": 3.04, + "learning_rate": 5.90662072706452e-05, + "loss": 7.8335, + "step": 41250 + }, + { + "epoch": 3.05, + "learning_rate": 5.9015981758093844e-05, + "loss": 7.9617, + "step": 41300 + }, + { + "epoch": 3.05, + "learning_rate": 5.8965756245542495e-05, + "loss": 8.6728, + "step": 41350 + }, + { + "epoch": 3.05, + "learning_rate": 5.891553073299113e-05, + "loss": 7.9142, + "step": 41400 + }, + { + "epoch": 3.06, + "learning_rate": 5.886530522043978e-05, + "loss": 7.7702, + "step": 41450 + }, + { + "epoch": 3.06, + "learning_rate": 5.881507970788842e-05, + "loss": 8.2997, + "step": 41500 + }, + { + "epoch": 3.06, + "learning_rate": 5.876485419533707e-05, + "loss": 8.1519, + "step": 41550 + }, + { + "epoch": 3.07, + "learning_rate": 5.871462868278571e-05, + "loss": 7.3762, + "step": 41600 + }, + { + "epoch": 3.07, + "learning_rate": 5.866440317023435e-05, + "loss": 7.5129, + "step": 41650 + }, + { + "epoch": 3.08, + "learning_rate": 5.8614177657683e-05, + "loss": 8.2537, + "step": 41700 + }, + { + "epoch": 3.08, + "learning_rate": 5.856395214513164e-05, + "loss": 8.4148, + "step": 41750 + }, + { + "epoch": 3.08, + "learning_rate": 5.851372663258029e-05, + "loss": 7.1737, + "step": 41800 + }, + { + "epoch": 3.09, + "learning_rate": 5.846350112002893e-05, + "loss": 7.2628, + "step": 41850 + }, + { + "epoch": 3.09, + "learning_rate": 5.841327560747758e-05, + "loss": 7.2933, + "step": 41900 + }, + { + "epoch": 3.09, + "learning_rate": 5.836305009492622e-05, + "loss": 7.7675, + "step": 41950 + }, + { + "epoch": 3.1, + "learning_rate": 5.831282458237487e-05, + "loss": 8.2344, + "step": 42000 + }, + { + "epoch": 3.1, + "learning_rate": 5.826259906982351e-05, + "loss": 7.1329, + "step": 42050 + }, + { + "epoch": 3.11, + "learning_rate": 5.821237355727216e-05, + "loss": 7.3924, + "step": 42100 + }, + { + "epoch": 3.11, + "learning_rate": 5.81621480447208e-05, + "loss": 6.6189, + "step": 42150 + }, + { + "epoch": 3.11, + "learning_rate": 5.811192253216944e-05, + "loss": 7.3457, + "step": 42200 + }, + { + "epoch": 3.12, + "learning_rate": 5.806169701961809e-05, + "loss": 8.9924, + "step": 42250 + }, + { + "epoch": 3.12, + "learning_rate": 5.8011471507066725e-05, + "loss": 7.6315, + "step": 42300 + }, + { + "epoch": 3.12, + "learning_rate": 5.7961245994515376e-05, + "loss": 8.4726, + "step": 42350 + }, + { + "epoch": 3.13, + "learning_rate": 5.791102048196402e-05, + "loss": 7.1755, + "step": 42400 + }, + { + "epoch": 3.13, + "learning_rate": 5.786079496941267e-05, + "loss": 7.5716, + "step": 42450 + }, + { + "epoch": 3.13, + "learning_rate": 5.781056945686131e-05, + "loss": 7.938, + "step": 42500 + }, + { + "epoch": 3.14, + "learning_rate": 5.776034394430996e-05, + "loss": 7.3833, + "step": 42550 + }, + { + "epoch": 3.14, + "learning_rate": 5.7710118431758596e-05, + "loss": 6.4276, + "step": 42600 + }, + { + "epoch": 3.15, + "learning_rate": 5.765989291920725e-05, + "loss": 6.8907, + "step": 42650 + }, + { + "epoch": 3.15, + "learning_rate": 5.7609667406655884e-05, + "loss": 7.7592, + "step": 42700 + }, + { + "epoch": 3.15, + "learning_rate": 5.755944189410453e-05, + "loss": 7.4997, + "step": 42750 + }, + { + "epoch": 3.16, + "learning_rate": 5.750921638155318e-05, + "loss": 7.2821, + "step": 42800 + }, + { + "epoch": 3.16, + "learning_rate": 5.745899086900182e-05, + "loss": 7.4861, + "step": 42850 + }, + { + "epoch": 3.16, + "learning_rate": 5.740876535645047e-05, + "loss": 7.9266, + "step": 42900 + }, + { + "epoch": 3.17, + "learning_rate": 5.7358539843899105e-05, + "loss": 7.6244, + "step": 42950 + }, + { + "epoch": 3.17, + "learning_rate": 5.7308314331347756e-05, + "loss": 7.382, + "step": 43000 + }, + { + "epoch": 3.18, + "learning_rate": 5.725808881879639e-05, + "loss": 8.1925, + "step": 43050 + }, + { + "epoch": 3.18, + "learning_rate": 5.7207863306245044e-05, + "loss": 8.3185, + "step": 43100 + }, + { + "epoch": 3.18, + "learning_rate": 5.715763779369369e-05, + "loss": 7.091, + "step": 43150 + }, + { + "epoch": 3.19, + "learning_rate": 5.710741228114234e-05, + "loss": 7.8352, + "step": 43200 + }, + { + "epoch": 3.19, + "learning_rate": 5.7057186768590976e-05, + "loss": 6.6085, + "step": 43250 + }, + { + "epoch": 3.19, + "learning_rate": 5.700696125603963e-05, + "loss": 7.8052, + "step": 43300 + }, + { + "epoch": 3.2, + "learning_rate": 5.6956735743488264e-05, + "loss": 8.1999, + "step": 43350 + }, + { + "epoch": 3.2, + "learning_rate": 5.690651023093691e-05, + "loss": 7.2801, + "step": 43400 + }, + { + "epoch": 3.2, + "learning_rate": 5.685628471838555e-05, + "loss": 7.6289, + "step": 43450 + }, + { + "epoch": 3.21, + "learning_rate": 5.6806059205834196e-05, + "loss": 6.8215, + "step": 43500 + }, + { + "epoch": 3.21, + "learning_rate": 5.675583369328285e-05, + "loss": 7.1678, + "step": 43550 + }, + { + "epoch": 3.22, + "learning_rate": 5.6705608180731484e-05, + "loss": 7.6612, + "step": 43600 + }, + { + "epoch": 3.22, + "learning_rate": 5.6655382668180135e-05, + "loss": 7.8899, + "step": 43650 + }, + { + "epoch": 3.22, + "learning_rate": 5.660515715562877e-05, + "loss": 7.8546, + "step": 43700 + }, + { + "epoch": 3.23, + "learning_rate": 5.655493164307742e-05, + "loss": 7.319, + "step": 43750 + }, + { + "epoch": 3.23, + "learning_rate": 5.650470613052607e-05, + "loss": 7.3317, + "step": 43800 + }, + { + "epoch": 3.23, + "learning_rate": 5.645448061797471e-05, + "loss": 7.8875, + "step": 43850 + }, + { + "epoch": 3.24, + "learning_rate": 5.6404255105423355e-05, + "loss": 7.8145, + "step": 43900 + }, + { + "epoch": 3.24, + "learning_rate": 5.635402959287199e-05, + "loss": 7.0667, + "step": 43950 + }, + { + "epoch": 3.25, + "learning_rate": 5.6303804080320643e-05, + "loss": 7.7603, + "step": 44000 + }, + { + "epoch": 3.25, + "learning_rate": 5.625357856776928e-05, + "loss": 7.6111, + "step": 44050 + }, + { + "epoch": 3.25, + "learning_rate": 5.620335305521793e-05, + "loss": 7.9858, + "step": 44100 + }, + { + "epoch": 3.26, + "learning_rate": 5.6153127542666576e-05, + "loss": 8.9896, + "step": 44150 + }, + { + "epoch": 3.26, + "learning_rate": 5.6102902030115226e-05, + "loss": 8.4081, + "step": 44200 + }, + { + "epoch": 3.26, + "learning_rate": 5.6052676517563864e-05, + "loss": 7.4748, + "step": 44250 + }, + { + "epoch": 3.27, + "learning_rate": 5.6002451005012515e-05, + "loss": 8.2133, + "step": 44300 + }, + { + "epoch": 3.27, + "learning_rate": 5.595222549246115e-05, + "loss": 7.3073, + "step": 44350 + }, + { + "epoch": 3.28, + "learning_rate": 5.59019999799098e-05, + "loss": 7.9638, + "step": 44400 + }, + { + "epoch": 3.28, + "learning_rate": 5.585177446735844e-05, + "loss": 7.9653, + "step": 44450 + }, + { + "epoch": 3.28, + "learning_rate": 5.5801548954807084e-05, + "loss": 7.8583, + "step": 44500 + }, + { + "epoch": 3.29, + "learning_rate": 5.5751323442255735e-05, + "loss": 8.0561, + "step": 44550 + }, + { + "epoch": 3.29, + "learning_rate": 5.570109792970437e-05, + "loss": 8.1276, + "step": 44600 + }, + { + "epoch": 3.29, + "learning_rate": 5.565087241715302e-05, + "loss": 7.7357, + "step": 44650 + }, + { + "epoch": 3.3, + "learning_rate": 5.560064690460166e-05, + "loss": 7.7529, + "step": 44700 + }, + { + "epoch": 3.3, + "learning_rate": 5.555042139205031e-05, + "loss": 7.2583, + "step": 44750 + }, + { + "epoch": 3.3, + "learning_rate": 5.550019587949895e-05, + "loss": 6.4675, + "step": 44800 + }, + { + "epoch": 3.31, + "learning_rate": 5.54499703669476e-05, + "loss": 7.3658, + "step": 44850 + }, + { + "epoch": 3.31, + "learning_rate": 5.539974485439624e-05, + "loss": 8.278, + "step": 44900 + }, + { + "epoch": 3.32, + "learning_rate": 5.5349519341844894e-05, + "loss": 7.3867, + "step": 44950 + }, + { + "epoch": 3.32, + "learning_rate": 5.529929382929353e-05, + "loss": 7.4187, + "step": 45000 + }, + { + "epoch": 3.32, + "learning_rate": 5.524906831674217e-05, + "loss": 7.5281, + "step": 45050 + }, + { + "epoch": 3.33, + "learning_rate": 5.519884280419082e-05, + "loss": 7.8815, + "step": 45100 + }, + { + "epoch": 3.33, + "learning_rate": 5.514861729163946e-05, + "loss": 7.2487, + "step": 45150 + }, + { + "epoch": 3.33, + "learning_rate": 5.509839177908811e-05, + "loss": 8.3441, + "step": 45200 + }, + { + "epoch": 3.34, + "learning_rate": 5.504816626653675e-05, + "loss": 7.4892, + "step": 45250 + }, + { + "epoch": 3.34, + "learning_rate": 5.49979407539854e-05, + "loss": 7.7789, + "step": 45300 + }, + { + "epoch": 3.35, + "learning_rate": 5.494771524143404e-05, + "loss": 7.3951, + "step": 45350 + }, + { + "epoch": 3.35, + "learning_rate": 5.489748972888269e-05, + "loss": 7.8756, + "step": 45400 + }, + { + "epoch": 3.35, + "learning_rate": 5.484726421633133e-05, + "loss": 7.9274, + "step": 45450 + }, + { + "epoch": 3.36, + "learning_rate": 5.479703870377998e-05, + "loss": 8.1525, + "step": 45500 + }, + { + "epoch": 3.36, + "learning_rate": 5.4746813191228616e-05, + "loss": 7.5597, + "step": 45550 + }, + { + "epoch": 3.36, + "learning_rate": 5.469658767867726e-05, + "loss": 7.8939, + "step": 45600 + }, + { + "epoch": 3.37, + "learning_rate": 5.464636216612591e-05, + "loss": 6.1451, + "step": 45650 + }, + { + "epoch": 3.37, + "learning_rate": 5.459613665357455e-05, + "loss": 7.224, + "step": 45700 + }, + { + "epoch": 3.37, + "learning_rate": 5.45459111410232e-05, + "loss": 7.2489, + "step": 45750 + }, + { + "epoch": 3.38, + "learning_rate": 5.4495685628471836e-05, + "loss": 7.4162, + "step": 45800 + }, + { + "epoch": 3.38, + "learning_rate": 5.444546011592049e-05, + "loss": 6.8503, + "step": 45850 + }, + { + "epoch": 3.39, + "learning_rate": 5.4395234603369124e-05, + "loss": 6.7087, + "step": 45900 + }, + { + "epoch": 3.39, + "learning_rate": 5.4345009090817775e-05, + "loss": 6.9697, + "step": 45950 + }, + { + "epoch": 3.39, + "learning_rate": 5.429478357826642e-05, + "loss": 7.8369, + "step": 46000 + }, + { + "epoch": 3.4, + "learning_rate": 5.424455806571507e-05, + "loss": 7.7567, + "step": 46050 + }, + { + "epoch": 3.4, + "learning_rate": 5.419433255316371e-05, + "loss": 6.6241, + "step": 46100 + }, + { + "epoch": 3.4, + "learning_rate": 5.414410704061236e-05, + "loss": 7.5218, + "step": 46150 + }, + { + "epoch": 3.41, + "learning_rate": 5.4093881528060995e-05, + "loss": 7.2338, + "step": 46200 + }, + { + "epoch": 3.41, + "learning_rate": 5.404365601550964e-05, + "loss": 7.0707, + "step": 46250 + }, + { + "epoch": 3.42, + "learning_rate": 5.3993430502958283e-05, + "loss": 7.6922, + "step": 46300 + }, + { + "epoch": 3.42, + "learning_rate": 5.394320499040693e-05, + "loss": 8.6056, + "step": 46350 + }, + { + "epoch": 3.42, + "learning_rate": 5.389297947785558e-05, + "loss": 7.4641, + "step": 46400 + }, + { + "epoch": 3.43, + "learning_rate": 5.3842753965304216e-05, + "loss": 7.1716, + "step": 46450 + }, + { + "epoch": 3.43, + "learning_rate": 5.3792528452752866e-05, + "loss": 7.6382, + "step": 46500 + }, + { + "epoch": 3.43, + "learning_rate": 5.3742302940201504e-05, + "loss": 7.0739, + "step": 46550 + }, + { + "epoch": 3.44, + "learning_rate": 5.3692077427650155e-05, + "loss": 7.8667, + "step": 46600 + }, + { + "epoch": 3.44, + "learning_rate": 5.36418519150988e-05, + "loss": 7.559, + "step": 46650 + }, + { + "epoch": 3.44, + "learning_rate": 5.359162640254744e-05, + "loss": 7.6078, + "step": 46700 + }, + { + "epoch": 3.45, + "learning_rate": 5.354140088999609e-05, + "loss": 7.7994, + "step": 46750 + }, + { + "epoch": 3.45, + "learning_rate": 5.3491175377444724e-05, + "loss": 7.0418, + "step": 46800 + }, + { + "epoch": 3.46, + "learning_rate": 5.3440949864893375e-05, + "loss": 7.3261, + "step": 46850 + }, + { + "epoch": 3.46, + "learning_rate": 5.339072435234201e-05, + "loss": 7.9914, + "step": 46900 + }, + { + "epoch": 3.46, + "learning_rate": 5.334049883979066e-05, + "loss": 7.1998, + "step": 46950 + }, + { + "epoch": 3.47, + "learning_rate": 5.329027332723931e-05, + "loss": 7.3343, + "step": 47000 + }, + { + "epoch": 3.47, + "learning_rate": 5.324004781468796e-05, + "loss": 8.1604, + "step": 47050 + }, + { + "epoch": 3.47, + "learning_rate": 5.3189822302136595e-05, + "loss": 7.5405, + "step": 47100 + }, + { + "epoch": 3.48, + "learning_rate": 5.3139596789585246e-05, + "loss": 7.9409, + "step": 47150 + }, + { + "epoch": 3.48, + "learning_rate": 5.308937127703388e-05, + "loss": 8.0573, + "step": 47200 + }, + { + "epoch": 3.49, + "learning_rate": 5.3039145764482534e-05, + "loss": 7.2927, + "step": 47250 + }, + { + "epoch": 3.49, + "learning_rate": 5.298892025193117e-05, + "loss": 6.9476, + "step": 47300 + }, + { + "epoch": 3.49, + "learning_rate": 5.2938694739379815e-05, + "loss": 7.1999, + "step": 47350 + }, + { + "epoch": 3.5, + "learning_rate": 5.2888469226828466e-05, + "loss": 7.8224, + "step": 47400 + }, + { + "epoch": 3.5, + "learning_rate": 5.2838243714277103e-05, + "loss": 8.1369, + "step": 47450 + }, + { + "epoch": 3.5, + "learning_rate": 5.2788018201725754e-05, + "loss": 6.7302, + "step": 47500 + }, + { + "epoch": 3.51, + "learning_rate": 5.273779268917439e-05, + "loss": 8.0819, + "step": 47550 + }, + { + "epoch": 3.51, + "learning_rate": 5.268756717662304e-05, + "loss": 7.8832, + "step": 47600 + }, + { + "epoch": 3.51, + "learning_rate": 5.263734166407168e-05, + "loss": 8.4479, + "step": 47650 + }, + { + "epoch": 3.52, + "learning_rate": 5.258711615152033e-05, + "loss": 7.7838, + "step": 47700 + }, + { + "epoch": 3.52, + "learning_rate": 5.2536890638968975e-05, + "loss": 8.3843, + "step": 47750 + }, + { + "epoch": 3.53, + "learning_rate": 5.2486665126417625e-05, + "loss": 6.9055, + "step": 47800 + }, + { + "epoch": 3.53, + "learning_rate": 5.243643961386626e-05, + "loss": 6.6339, + "step": 47850 + }, + { + "epoch": 3.53, + "learning_rate": 5.23862141013149e-05, + "loss": 7.0316, + "step": 47900 + }, + { + "epoch": 3.54, + "learning_rate": 5.233598858876355e-05, + "loss": 7.4569, + "step": 47950 + }, + { + "epoch": 3.54, + "learning_rate": 5.228576307621219e-05, + "loss": 7.6204, + "step": 48000 + }, + { + "epoch": 3.54, + "learning_rate": 5.223553756366084e-05, + "loss": 7.1085, + "step": 48050 + }, + { + "epoch": 3.55, + "learning_rate": 5.218531205110948e-05, + "loss": 7.7254, + "step": 48100 + }, + { + "epoch": 3.55, + "learning_rate": 5.2135086538558134e-05, + "loss": 7.1486, + "step": 48150 + }, + { + "epoch": 3.56, + "learning_rate": 5.208486102600677e-05, + "loss": 6.9297, + "step": 48200 + }, + { + "epoch": 3.56, + "learning_rate": 5.203463551345542e-05, + "loss": 7.5314, + "step": 48250 + }, + { + "epoch": 3.56, + "learning_rate": 5.198441000090406e-05, + "loss": 7.68, + "step": 48300 + }, + { + "epoch": 3.57, + "learning_rate": 5.193418448835271e-05, + "loss": 7.9467, + "step": 48350 + }, + { + "epoch": 3.57, + "learning_rate": 5.188395897580135e-05, + "loss": 6.7188, + "step": 48400 + }, + { + "epoch": 3.57, + "learning_rate": 5.183373346325e-05, + "loss": 7.7619, + "step": 48450 + }, + { + "epoch": 3.58, + "learning_rate": 5.178350795069864e-05, + "loss": 7.6537, + "step": 48500 + }, + { + "epoch": 3.58, + "learning_rate": 5.173328243814728e-05, + "loss": 6.9593, + "step": 48550 + }, + { + "epoch": 3.58, + "learning_rate": 5.168305692559593e-05, + "loss": 7.4834, + "step": 48600 + }, + { + "epoch": 3.59, + "learning_rate": 5.163283141304457e-05, + "loss": 8.2864, + "step": 48650 + }, + { + "epoch": 3.59, + "learning_rate": 5.158260590049322e-05, + "loss": 7.234, + "step": 48700 + }, + { + "epoch": 3.6, + "learning_rate": 5.1532380387941856e-05, + "loss": 7.2513, + "step": 48750 + }, + { + "epoch": 3.6, + "learning_rate": 5.1482154875390506e-05, + "loss": 7.508, + "step": 48800 + }, + { + "epoch": 3.6, + "learning_rate": 5.143192936283915e-05, + "loss": 7.1513, + "step": 48850 + }, + { + "epoch": 3.61, + "learning_rate": 5.13817038502878e-05, + "loss": 7.8882, + "step": 48900 + }, + { + "epoch": 3.61, + "learning_rate": 5.133147833773644e-05, + "loss": 8.0859, + "step": 48950 + }, + { + "epoch": 3.61, + "learning_rate": 5.128125282518509e-05, + "loss": 7.5506, + "step": 49000 + }, + { + "epoch": 3.62, + "learning_rate": 5.123102731263373e-05, + "loss": 7.9777, + "step": 49050 + }, + { + "epoch": 3.62, + "learning_rate": 5.118080180008237e-05, + "loss": 8.3599, + "step": 49100 + }, + { + "epoch": 3.63, + "learning_rate": 5.1130576287531015e-05, + "loss": 7.105, + "step": 49150 + }, + { + "epoch": 3.63, + "learning_rate": 5.108035077497966e-05, + "loss": 8.2692, + "step": 49200 + }, + { + "epoch": 3.63, + "learning_rate": 5.103012526242831e-05, + "loss": 7.9098, + "step": 49250 + }, + { + "epoch": 3.64, + "learning_rate": 5.097989974987695e-05, + "loss": 7.1698, + "step": 49300 + }, + { + "epoch": 3.64, + "learning_rate": 5.09296742373256e-05, + "loss": 7.406, + "step": 49350 + }, + { + "epoch": 3.64, + "learning_rate": 5.0879448724774235e-05, + "loss": 8.3276, + "step": 49400 + }, + { + "epoch": 3.65, + "learning_rate": 5.0829223212222886e-05, + "loss": 7.5714, + "step": 49450 + }, + { + "epoch": 3.65, + "learning_rate": 5.077899769967153e-05, + "loss": 7.0839, + "step": 49500 + }, + { + "epoch": 3.65, + "learning_rate": 5.0728772187120174e-05, + "loss": 7.0589, + "step": 49550 + }, + { + "epoch": 3.66, + "learning_rate": 5.067854667456882e-05, + "loss": 7.4998, + "step": 49600 + }, + { + "epoch": 3.66, + "learning_rate": 5.0628321162017455e-05, + "loss": 7.3495, + "step": 49650 + }, + { + "epoch": 3.67, + "learning_rate": 5.0578095649466106e-05, + "loss": 7.5101, + "step": 49700 + }, + { + "epoch": 3.67, + "learning_rate": 5.0527870136914743e-05, + "loss": 6.7707, + "step": 49750 + }, + { + "epoch": 3.67, + "learning_rate": 5.0477644624363394e-05, + "loss": 7.5822, + "step": 49800 + }, + { + "epoch": 3.68, + "learning_rate": 5.042741911181204e-05, + "loss": 6.5937, + "step": 49850 + }, + { + "epoch": 3.68, + "learning_rate": 5.037719359926069e-05, + "loss": 7.2497, + "step": 49900 + }, + { + "epoch": 3.68, + "learning_rate": 5.0326968086709326e-05, + "loss": 7.35, + "step": 49950 + }, + { + "epoch": 3.69, + "learning_rate": 5.027674257415798e-05, + "loss": 7.767, + "step": 50000 + }, + { + "epoch": 3.69, + "learning_rate": 5.0226517061606615e-05, + "loss": 8.3228, + "step": 50050 + }, + { + "epoch": 3.7, + "learning_rate": 5.0176291549055265e-05, + "loss": 7.6905, + "step": 50100 + }, + { + "epoch": 3.7, + "learning_rate": 5.01260660365039e-05, + "loss": 7.8275, + "step": 50150 + }, + { + "epoch": 3.7, + "learning_rate": 5.007584052395255e-05, + "loss": 8.0724, + "step": 50200 + }, + { + "epoch": 3.71, + "learning_rate": 5.00256150114012e-05, + "loss": 7.0501, + "step": 50250 + }, + { + "epoch": 3.71, + "learning_rate": 4.997538949884984e-05, + "loss": 7.4269, + "step": 50300 + }, + { + "epoch": 3.71, + "learning_rate": 4.9925163986298486e-05, + "loss": 7.5186, + "step": 50350 + }, + { + "epoch": 3.72, + "learning_rate": 4.987493847374713e-05, + "loss": 8.2606, + "step": 50400 + }, + { + "epoch": 3.72, + "learning_rate": 4.9824712961195774e-05, + "loss": 8.2097, + "step": 50450 + }, + { + "epoch": 3.73, + "learning_rate": 4.977448744864441e-05, + "loss": 7.468, + "step": 50500 + }, + { + "epoch": 3.73, + "learning_rate": 4.9724261936093055e-05, + "loss": 8.2075, + "step": 50550 + }, + { + "epoch": 3.73, + "learning_rate": 4.9674036423541706e-05, + "loss": 7.3928, + "step": 50600 + }, + { + "epoch": 3.74, + "learning_rate": 4.962381091099035e-05, + "loss": 7.2907, + "step": 50650 + }, + { + "epoch": 3.74, + "learning_rate": 4.9573585398438994e-05, + "loss": 7.706, + "step": 50700 + }, + { + "epoch": 3.74, + "learning_rate": 4.952335988588764e-05, + "loss": 7.301, + "step": 50750 + }, + { + "epoch": 3.75, + "learning_rate": 4.947313437333628e-05, + "loss": 6.9109, + "step": 50800 + }, + { + "epoch": 3.75, + "learning_rate": 4.9422908860784926e-05, + "loss": 6.6967, + "step": 50850 + }, + { + "epoch": 3.75, + "learning_rate": 4.937268334823357e-05, + "loss": 5.9484, + "step": 50900 + }, + { + "epoch": 3.76, + "learning_rate": 4.9322457835682214e-05, + "loss": 7.8288, + "step": 50950 + }, + { + "epoch": 3.76, + "learning_rate": 4.9272232323130865e-05, + "loss": 7.3987, + "step": 51000 + }, + { + "epoch": 3.77, + "learning_rate": 4.92220068105795e-05, + "loss": 7.3714, + "step": 51050 + }, + { + "epoch": 3.77, + "learning_rate": 4.9171781298028147e-05, + "loss": 7.258, + "step": 51100 + }, + { + "epoch": 3.77, + "learning_rate": 4.912155578547679e-05, + "loss": 6.8541, + "step": 51150 + }, + { + "epoch": 3.78, + "learning_rate": 4.9071330272925435e-05, + "loss": 7.085, + "step": 51200 + }, + { + "epoch": 3.78, + "learning_rate": 4.902110476037408e-05, + "loss": 6.7827, + "step": 51250 + }, + { + "epoch": 3.78, + "learning_rate": 4.897087924782273e-05, + "loss": 6.6806, + "step": 51300 + }, + { + "epoch": 3.79, + "learning_rate": 4.8920653735271374e-05, + "loss": 7.2918, + "step": 51350 + }, + { + "epoch": 3.79, + "learning_rate": 4.887042822272002e-05, + "loss": 7.9022, + "step": 51400 + }, + { + "epoch": 3.8, + "learning_rate": 4.882020271016866e-05, + "loss": 7.6094, + "step": 51450 + }, + { + "epoch": 3.8, + "learning_rate": 4.8769977197617306e-05, + "loss": 8.1048, + "step": 51500 + }, + { + "epoch": 3.8, + "learning_rate": 4.871975168506595e-05, + "loss": 6.9056, + "step": 51550 + }, + { + "epoch": 3.81, + "learning_rate": 4.866952617251459e-05, + "loss": 6.4347, + "step": 51600 + }, + { + "epoch": 3.81, + "learning_rate": 4.861930065996324e-05, + "loss": 7.307, + "step": 51650 + }, + { + "epoch": 3.81, + "learning_rate": 4.856907514741188e-05, + "loss": 7.649, + "step": 51700 + }, + { + "epoch": 3.82, + "learning_rate": 4.8518849634860526e-05, + "loss": 6.7706, + "step": 51750 + }, + { + "epoch": 3.82, + "learning_rate": 4.846862412230917e-05, + "loss": 6.7943, + "step": 51800 + }, + { + "epoch": 3.82, + "learning_rate": 4.8418398609757814e-05, + "loss": 7.654, + "step": 51850 + }, + { + "epoch": 3.83, + "learning_rate": 4.836817309720646e-05, + "loss": 7.6245, + "step": 51900 + }, + { + "epoch": 3.83, + "learning_rate": 4.83179475846551e-05, + "loss": 7.8284, + "step": 51950 + }, + { + "epoch": 3.84, + "learning_rate": 4.8267722072103746e-05, + "loss": 6.9516, + "step": 52000 + }, + { + "epoch": 3.84, + "learning_rate": 4.82174965595524e-05, + "loss": 7.1367, + "step": 52050 + }, + { + "epoch": 3.84, + "learning_rate": 4.816727104700104e-05, + "loss": 7.4153, + "step": 52100 + }, + { + "epoch": 3.85, + "learning_rate": 4.8117045534449685e-05, + "loss": 6.5358, + "step": 52150 + }, + { + "epoch": 3.85, + "learning_rate": 4.806682002189832e-05, + "loss": 7.5443, + "step": 52200 + }, + { + "epoch": 3.85, + "learning_rate": 4.8016594509346967e-05, + "loss": 7.8184, + "step": 52250 + }, + { + "epoch": 3.86, + "learning_rate": 4.796636899679561e-05, + "loss": 6.9702, + "step": 52300 + }, + { + "epoch": 3.86, + "learning_rate": 4.791614348424426e-05, + "loss": 8.3988, + "step": 52350 + }, + { + "epoch": 3.87, + "learning_rate": 4.7865917971692905e-05, + "loss": 8.1025, + "step": 52400 + }, + { + "epoch": 3.87, + "learning_rate": 4.781569245914155e-05, + "loss": 7.81, + "step": 52450 + }, + { + "epoch": 3.87, + "learning_rate": 4.7765466946590194e-05, + "loss": 6.6809, + "step": 52500 + }, + { + "epoch": 3.88, + "learning_rate": 4.771524143403884e-05, + "loss": 6.81, + "step": 52550 + }, + { + "epoch": 3.88, + "learning_rate": 4.766501592148748e-05, + "loss": 7.1717, + "step": 52600 + }, + { + "epoch": 3.88, + "learning_rate": 4.7614790408936126e-05, + "loss": 7.3114, + "step": 52650 + }, + { + "epoch": 3.89, + "learning_rate": 4.756456489638477e-05, + "loss": 7.2819, + "step": 52700 + }, + { + "epoch": 3.89, + "learning_rate": 4.7514339383833414e-05, + "loss": 6.6964, + "step": 52750 + }, + { + "epoch": 3.89, + "learning_rate": 4.746411387128206e-05, + "loss": 8.1118, + "step": 52800 + }, + { + "epoch": 3.9, + "learning_rate": 4.74138883587307e-05, + "loss": 8.1901, + "step": 52850 + }, + { + "epoch": 3.9, + "learning_rate": 4.7363662846179346e-05, + "loss": 6.8883, + "step": 52900 + }, + { + "epoch": 3.91, + "learning_rate": 4.731343733362799e-05, + "loss": 7.2554, + "step": 52950 + }, + { + "epoch": 3.91, + "learning_rate": 4.7263211821076634e-05, + "loss": 7.402, + "step": 53000 + }, + { + "epoch": 3.91, + "learning_rate": 4.721298630852528e-05, + "loss": 8.8808, + "step": 53050 + }, + { + "epoch": 3.92, + "learning_rate": 4.716276079597393e-05, + "loss": 7.1652, + "step": 53100 + }, + { + "epoch": 3.92, + "learning_rate": 4.711253528342257e-05, + "loss": 6.884, + "step": 53150 + }, + { + "epoch": 3.92, + "learning_rate": 4.706230977087122e-05, + "loss": 7.4472, + "step": 53200 + }, + { + "epoch": 3.93, + "learning_rate": 4.701208425831986e-05, + "loss": 6.8787, + "step": 53250 + }, + { + "epoch": 3.93, + "learning_rate": 4.6961858745768505e-05, + "loss": 6.9316, + "step": 53300 + }, + { + "epoch": 3.94, + "learning_rate": 4.691163323321714e-05, + "loss": 7.1614, + "step": 53350 + }, + { + "epoch": 3.94, + "learning_rate": 4.6861407720665787e-05, + "loss": 7.193, + "step": 53400 + }, + { + "epoch": 3.94, + "learning_rate": 4.681118220811444e-05, + "loss": 7.5875, + "step": 53450 + }, + { + "epoch": 3.95, + "learning_rate": 4.676095669556308e-05, + "loss": 7.0836, + "step": 53500 + }, + { + "epoch": 3.95, + "learning_rate": 4.6710731183011725e-05, + "loss": 7.2054, + "step": 53550 + }, + { + "epoch": 3.95, + "learning_rate": 4.666050567046037e-05, + "loss": 6.95, + "step": 53600 + }, + { + "epoch": 3.96, + "learning_rate": 4.6610280157909014e-05, + "loss": 6.6366, + "step": 53650 + }, + { + "epoch": 3.96, + "learning_rate": 4.656005464535766e-05, + "loss": 6.7976, + "step": 53700 + }, + { + "epoch": 3.96, + "learning_rate": 4.65098291328063e-05, + "loss": 7.1371, + "step": 53750 + }, + { + "epoch": 3.97, + "learning_rate": 4.6459603620254946e-05, + "loss": 6.7457, + "step": 53800 + }, + { + "epoch": 3.97, + "learning_rate": 4.6409378107703597e-05, + "loss": 6.6139, + "step": 53850 + }, + { + "epoch": 3.98, + "learning_rate": 4.6359152595152234e-05, + "loss": 7.9291, + "step": 53900 + }, + { + "epoch": 3.98, + "learning_rate": 4.630892708260088e-05, + "loss": 7.4235, + "step": 53950 + }, + { + "epoch": 3.98, + "learning_rate": 4.625870157004952e-05, + "loss": 6.8609, + "step": 54000 + }, + { + "epoch": 3.99, + "learning_rate": 4.6208476057498166e-05, + "loss": 6.6128, + "step": 54050 + }, + { + "epoch": 3.99, + "learning_rate": 4.615825054494681e-05, + "loss": 7.0313, + "step": 54100 + }, + { + "epoch": 3.99, + "learning_rate": 4.610802503239546e-05, + "loss": 7.3654, + "step": 54150 + }, + { + "epoch": 4.0, + "learning_rate": 4.6057799519844105e-05, + "loss": 8.266, + "step": 54200 + }, + { + "epoch": 4.0, + "learning_rate": 4.600757400729275e-05, + "loss": 7.9471, + "step": 54250 + }, + { + "epoch": 4.01, + "learning_rate": 4.595734849474139e-05, + "loss": 6.0877, + "step": 54300 + }, + { + "epoch": 4.01, + "learning_rate": 4.590712298219004e-05, + "loss": 6.7453, + "step": 54350 + }, + { + "epoch": 4.01, + "learning_rate": 4.585689746963868e-05, + "loss": 5.8985, + "step": 54400 + }, + { + "epoch": 4.02, + "learning_rate": 4.580667195708732e-05, + "loss": 7.4527, + "step": 54450 + }, + { + "epoch": 4.02, + "learning_rate": 4.575644644453597e-05, + "loss": 7.0419, + "step": 54500 + }, + { + "epoch": 4.02, + "learning_rate": 4.570622093198461e-05, + "loss": 6.281, + "step": 54550 + }, + { + "epoch": 4.03, + "learning_rate": 4.565599541943326e-05, + "loss": 6.6096, + "step": 54600 + }, + { + "epoch": 4.03, + "learning_rate": 4.56057699068819e-05, + "loss": 7.0341, + "step": 54650 + }, + { + "epoch": 4.03, + "learning_rate": 4.5555544394330545e-05, + "loss": 6.621, + "step": 54700 + }, + { + "epoch": 4.04, + "learning_rate": 4.550531888177919e-05, + "loss": 7.4405, + "step": 54750 + }, + { + "epoch": 4.04, + "learning_rate": 4.5455093369227834e-05, + "loss": 7.2506, + "step": 54800 + }, + { + "epoch": 4.05, + "learning_rate": 4.540486785667648e-05, + "loss": 7.1534, + "step": 54850 + }, + { + "epoch": 4.05, + "learning_rate": 4.535464234412513e-05, + "loss": 6.5829, + "step": 54900 + }, + { + "epoch": 4.05, + "learning_rate": 4.530441683157377e-05, + "loss": 7.0338, + "step": 54950 + }, + { + "epoch": 4.06, + "learning_rate": 4.5254191319022417e-05, + "loss": 6.6234, + "step": 55000 + }, + { + "epoch": 4.06, + "learning_rate": 4.5203965806471054e-05, + "loss": 6.2412, + "step": 55050 + }, + { + "epoch": 4.06, + "learning_rate": 4.51537402939197e-05, + "loss": 6.3439, + "step": 55100 + }, + { + "epoch": 4.07, + "learning_rate": 4.510351478136834e-05, + "loss": 6.8272, + "step": 55150 + }, + { + "epoch": 4.07, + "learning_rate": 4.5053289268816986e-05, + "loss": 6.4758, + "step": 55200 + }, + { + "epoch": 4.08, + "learning_rate": 4.500306375626564e-05, + "loss": 6.434, + "step": 55250 + }, + { + "epoch": 4.08, + "learning_rate": 4.495283824371428e-05, + "loss": 6.5471, + "step": 55300 + }, + { + "epoch": 4.08, + "learning_rate": 4.4902612731162925e-05, + "loss": 6.5088, + "step": 55350 + }, + { + "epoch": 4.09, + "learning_rate": 4.485238721861157e-05, + "loss": 6.6941, + "step": 55400 + }, + { + "epoch": 4.09, + "learning_rate": 4.480216170606021e-05, + "loss": 6.3248, + "step": 55450 + }, + { + "epoch": 4.09, + "learning_rate": 4.475193619350886e-05, + "loss": 7.2989, + "step": 55500 + }, + { + "epoch": 4.1, + "learning_rate": 4.47017106809575e-05, + "loss": 7.0947, + "step": 55550 + }, + { + "epoch": 4.1, + "learning_rate": 4.4651485168406145e-05, + "loss": 6.4896, + "step": 55600 + }, + { + "epoch": 4.1, + "learning_rate": 4.460125965585479e-05, + "loss": 5.9249, + "step": 55650 + }, + { + "epoch": 4.11, + "learning_rate": 4.455103414330343e-05, + "loss": 6.7801, + "step": 55700 + }, + { + "epoch": 4.11, + "learning_rate": 4.450080863075208e-05, + "loss": 6.2216, + "step": 55750 + }, + { + "epoch": 4.12, + "learning_rate": 4.445058311820072e-05, + "loss": 6.8346, + "step": 55800 + }, + { + "epoch": 4.12, + "learning_rate": 4.4400357605649366e-05, + "loss": 7.2863, + "step": 55850 + }, + { + "epoch": 4.12, + "learning_rate": 4.435013209309801e-05, + "loss": 7.406, + "step": 55900 + }, + { + "epoch": 4.13, + "learning_rate": 4.429990658054666e-05, + "loss": 6.0165, + "step": 55950 + }, + { + "epoch": 4.13, + "learning_rate": 4.4249681067995304e-05, + "loss": 6.8052, + "step": 56000 + }, + { + "epoch": 4.13, + "learning_rate": 4.419945555544395e-05, + "loss": 6.679, + "step": 56050 + }, + { + "epoch": 4.14, + "learning_rate": 4.414923004289259e-05, + "loss": 6.2087, + "step": 56100 + }, + { + "epoch": 4.14, + "learning_rate": 4.4099004530341237e-05, + "loss": 6.5904, + "step": 56150 + }, + { + "epoch": 4.15, + "learning_rate": 4.4048779017789874e-05, + "loss": 6.4147, + "step": 56200 + }, + { + "epoch": 4.15, + "learning_rate": 4.399855350523852e-05, + "loss": 6.6465, + "step": 56250 + }, + { + "epoch": 4.15, + "learning_rate": 4.394832799268717e-05, + "loss": 7.11, + "step": 56300 + }, + { + "epoch": 4.16, + "learning_rate": 4.389810248013581e-05, + "loss": 7.0558, + "step": 56350 + }, + { + "epoch": 4.16, + "learning_rate": 4.384787696758446e-05, + "loss": 6.922, + "step": 56400 + }, + { + "epoch": 4.16, + "learning_rate": 4.37976514550331e-05, + "loss": 7.2125, + "step": 56450 + }, + { + "epoch": 4.17, + "learning_rate": 4.3747425942481745e-05, + "loss": 6.4719, + "step": 56500 + }, + { + "epoch": 4.17, + "learning_rate": 4.369720042993039e-05, + "loss": 7.204, + "step": 56550 + }, + { + "epoch": 4.17, + "learning_rate": 4.364697491737903e-05, + "loss": 7.2371, + "step": 56600 + }, + { + "epoch": 4.18, + "learning_rate": 4.359674940482768e-05, + "loss": 6.6131, + "step": 56650 + }, + { + "epoch": 4.18, + "learning_rate": 4.354652389227633e-05, + "loss": 6.6349, + "step": 56700 + }, + { + "epoch": 4.19, + "learning_rate": 4.3496298379724965e-05, + "loss": 5.9137, + "step": 56750 + }, + { + "epoch": 4.19, + "learning_rate": 4.344607286717361e-05, + "loss": 6.5402, + "step": 56800 + }, + { + "epoch": 4.19, + "learning_rate": 4.339584735462225e-05, + "loss": 7.3351, + "step": 56850 + }, + { + "epoch": 4.2, + "learning_rate": 4.33456218420709e-05, + "loss": 8.1387, + "step": 56900 + }, + { + "epoch": 4.2, + "learning_rate": 4.329539632951954e-05, + "loss": 7.0783, + "step": 56950 + }, + { + "epoch": 4.2, + "learning_rate": 4.324517081696819e-05, + "loss": 6.947, + "step": 57000 + }, + { + "epoch": 4.21, + "learning_rate": 4.3194945304416836e-05, + "loss": 6.1526, + "step": 57050 + }, + { + "epoch": 4.21, + "learning_rate": 4.314471979186548e-05, + "loss": 7.273, + "step": 57100 + }, + { + "epoch": 4.22, + "learning_rate": 4.3094494279314124e-05, + "loss": 7.0958, + "step": 57150 + }, + { + "epoch": 4.22, + "learning_rate": 4.304426876676277e-05, + "loss": 6.4413, + "step": 57200 + }, + { + "epoch": 4.22, + "learning_rate": 4.299404325421141e-05, + "loss": 6.597, + "step": 57250 + }, + { + "epoch": 4.23, + "learning_rate": 4.294381774166006e-05, + "loss": 6.6893, + "step": 57300 + }, + { + "epoch": 4.23, + "learning_rate": 4.28935922291087e-05, + "loss": 6.4746, + "step": 57350 + }, + { + "epoch": 4.23, + "learning_rate": 4.2843366716557345e-05, + "loss": 7.376, + "step": 57400 + }, + { + "epoch": 4.24, + "learning_rate": 4.279314120400599e-05, + "loss": 7.2823, + "step": 57450 + }, + { + "epoch": 4.24, + "learning_rate": 4.274291569145463e-05, + "loss": 6.3184, + "step": 57500 + }, + { + "epoch": 4.25, + "learning_rate": 4.269269017890328e-05, + "loss": 6.4526, + "step": 57550 + }, + { + "epoch": 4.25, + "learning_rate": 4.264246466635192e-05, + "loss": 6.7892, + "step": 57600 + }, + { + "epoch": 4.25, + "learning_rate": 4.2592239153800565e-05, + "loss": 6.2082, + "step": 57650 + }, + { + "epoch": 4.26, + "learning_rate": 4.254201364124921e-05, + "loss": 7.1488, + "step": 57700 + }, + { + "epoch": 4.26, + "learning_rate": 4.249178812869786e-05, + "loss": 6.9399, + "step": 57750 + }, + { + "epoch": 4.26, + "learning_rate": 4.2441562616146504e-05, + "loss": 6.8596, + "step": 57800 + }, + { + "epoch": 4.27, + "learning_rate": 4.239133710359515e-05, + "loss": 6.8899, + "step": 57850 + }, + { + "epoch": 4.27, + "learning_rate": 4.2341111591043785e-05, + "loss": 6.8196, + "step": 57900 + }, + { + "epoch": 4.27, + "learning_rate": 4.229088607849243e-05, + "loss": 7.5114, + "step": 57950 + }, + { + "epoch": 4.28, + "learning_rate": 4.224066056594107e-05, + "loss": 6.4122, + "step": 58000 + }, + { + "epoch": 4.28, + "learning_rate": 4.219043505338972e-05, + "loss": 6.9228, + "step": 58050 + }, + { + "epoch": 4.29, + "learning_rate": 4.214020954083837e-05, + "loss": 6.3687, + "step": 58100 + }, + { + "epoch": 4.29, + "learning_rate": 4.208998402828701e-05, + "loss": 6.8616, + "step": 58150 + }, + { + "epoch": 4.29, + "learning_rate": 4.2039758515735656e-05, + "loss": 6.002, + "step": 58200 + }, + { + "epoch": 4.3, + "learning_rate": 4.19895330031843e-05, + "loss": 6.0985, + "step": 58250 + }, + { + "epoch": 4.3, + "learning_rate": 4.1939307490632944e-05, + "loss": 6.5857, + "step": 58300 + }, + { + "epoch": 4.3, + "learning_rate": 4.188908197808159e-05, + "loss": 6.257, + "step": 58350 + }, + { + "epoch": 4.31, + "learning_rate": 4.183885646553023e-05, + "loss": 6.9222, + "step": 58400 + }, + { + "epoch": 4.31, + "learning_rate": 4.178863095297888e-05, + "loss": 6.7801, + "step": 58450 + }, + { + "epoch": 4.32, + "learning_rate": 4.173840544042752e-05, + "loss": 6.3861, + "step": 58500 + }, + { + "epoch": 4.32, + "learning_rate": 4.1688179927876165e-05, + "loss": 6.8685, + "step": 58550 + }, + { + "epoch": 4.32, + "learning_rate": 4.163795441532481e-05, + "loss": 6.9948, + "step": 58600 + }, + { + "epoch": 4.33, + "learning_rate": 4.158772890277345e-05, + "loss": 6.0965, + "step": 58650 + }, + { + "epoch": 4.33, + "learning_rate": 4.15375033902221e-05, + "loss": 7.282, + "step": 58700 + }, + { + "epoch": 4.33, + "learning_rate": 4.148727787767074e-05, + "loss": 7.6165, + "step": 58750 + }, + { + "epoch": 4.34, + "learning_rate": 4.143705236511939e-05, + "loss": 6.734, + "step": 58800 + }, + { + "epoch": 4.34, + "learning_rate": 4.1386826852568036e-05, + "loss": 6.0334, + "step": 58850 + }, + { + "epoch": 4.34, + "learning_rate": 4.133660134001668e-05, + "loss": 6.5306, + "step": 58900 + }, + { + "epoch": 4.35, + "learning_rate": 4.1286375827465324e-05, + "loss": 7.4324, + "step": 58950 + }, + { + "epoch": 4.35, + "learning_rate": 4.123615031491397e-05, + "loss": 7.234, + "step": 59000 + }, + { + "epoch": 4.36, + "learning_rate": 4.1185924802362605e-05, + "loss": 6.7196, + "step": 59050 + }, + { + "epoch": 4.36, + "learning_rate": 4.113569928981125e-05, + "loss": 6.0641, + "step": 59100 + }, + { + "epoch": 4.36, + "learning_rate": 4.10854737772599e-05, + "loss": 5.9373, + "step": 59150 + }, + { + "epoch": 4.37, + "learning_rate": 4.1035248264708544e-05, + "loss": 6.4428, + "step": 59200 + }, + { + "epoch": 4.37, + "learning_rate": 4.098502275215719e-05, + "loss": 6.7303, + "step": 59250 + }, + { + "epoch": 4.37, + "learning_rate": 4.093479723960583e-05, + "loss": 6.5585, + "step": 59300 + }, + { + "epoch": 4.38, + "learning_rate": 4.0884571727054476e-05, + "loss": 5.837, + "step": 59350 + }, + { + "epoch": 4.38, + "learning_rate": 4.083434621450312e-05, + "loss": 6.574, + "step": 59400 + }, + { + "epoch": 4.39, + "learning_rate": 4.0784120701951764e-05, + "loss": 7.4319, + "step": 59450 + }, + { + "epoch": 4.39, + "learning_rate": 4.073389518940041e-05, + "loss": 6.1092, + "step": 59500 + }, + { + "epoch": 4.39, + "learning_rate": 4.068366967684906e-05, + "loss": 6.4683, + "step": 59550 + }, + { + "epoch": 4.4, + "learning_rate": 4.06334441642977e-05, + "loss": 7.1323, + "step": 59600 + }, + { + "epoch": 4.4, + "learning_rate": 4.058321865174634e-05, + "loss": 6.7011, + "step": 59650 + }, + { + "epoch": 4.4, + "learning_rate": 4.0532993139194985e-05, + "loss": 6.7281, + "step": 59700 + }, + { + "epoch": 4.41, + "learning_rate": 4.048276762664363e-05, + "loss": 6.0361, + "step": 59750 + }, + { + "epoch": 4.41, + "learning_rate": 4.043254211409227e-05, + "loss": 6.5359, + "step": 59800 + }, + { + "epoch": 4.41, + "learning_rate": 4.0382316601540924e-05, + "loss": 7.3576, + "step": 59850 + }, + { + "epoch": 4.42, + "learning_rate": 4.033209108898957e-05, + "loss": 6.7324, + "step": 59900 + }, + { + "epoch": 4.42, + "learning_rate": 4.028186557643821e-05, + "loss": 7.1445, + "step": 59950 + }, + { + "epoch": 4.43, + "learning_rate": 4.0231640063886856e-05, + "loss": 5.426, + "step": 60000 + }, + { + "epoch": 4.43, + "eval_loss": 7.799332618713379, + "eval_runtime": 963.4222, + "eval_samples_per_second": 13.595, + "eval_steps_per_second": 3.399, + "eval_wer": 0.20775061946159337, + "step": 60000 + }, + { + "epoch": 4.43, + "learning_rate": 4.01814145513355e-05, + "loss": 6.577, + "step": 60050 + }, + { + "epoch": 4.43, + "learning_rate": 4.0131189038784144e-05, + "loss": 7.1169, + "step": 60100 + }, + { + "epoch": 4.44, + "learning_rate": 4.008096352623279e-05, + "loss": 7.2535, + "step": 60150 + }, + { + "epoch": 4.44, + "learning_rate": 4.003073801368143e-05, + "loss": 6.2427, + "step": 60200 + }, + { + "epoch": 4.44, + "learning_rate": 3.9980512501130076e-05, + "loss": 6.3184, + "step": 60250 + }, + { + "epoch": 4.45, + "learning_rate": 3.993028698857872e-05, + "loss": 6.9348, + "step": 60300 + }, + { + "epoch": 4.45, + "learning_rate": 3.9880061476027364e-05, + "loss": 6.5074, + "step": 60350 + }, + { + "epoch": 4.46, + "learning_rate": 3.982983596347601e-05, + "loss": 7.2028, + "step": 60400 + }, + { + "epoch": 4.46, + "learning_rate": 3.977961045092465e-05, + "loss": 7.1185, + "step": 60450 + }, + { + "epoch": 4.46, + "learning_rate": 3.9729384938373296e-05, + "loss": 6.5089, + "step": 60500 + }, + { + "epoch": 4.47, + "learning_rate": 3.967915942582194e-05, + "loss": 5.8853, + "step": 60550 + }, + { + "epoch": 4.47, + "learning_rate": 3.962893391327059e-05, + "loss": 6.0402, + "step": 60600 + }, + { + "epoch": 4.47, + "learning_rate": 3.9578708400719235e-05, + "loss": 6.6078, + "step": 60650 + }, + { + "epoch": 4.48, + "learning_rate": 3.952848288816788e-05, + "loss": 6.4986, + "step": 60700 + }, + { + "epoch": 4.48, + "learning_rate": 3.947825737561652e-05, + "loss": 6.9922, + "step": 60750 + }, + { + "epoch": 4.48, + "learning_rate": 3.942803186306516e-05, + "loss": 6.8327, + "step": 60800 + }, + { + "epoch": 4.49, + "learning_rate": 3.9377806350513805e-05, + "loss": 6.7131, + "step": 60850 + }, + { + "epoch": 4.49, + "learning_rate": 3.932758083796245e-05, + "loss": 6.3372, + "step": 60900 + }, + { + "epoch": 4.5, + "learning_rate": 3.92773553254111e-05, + "loss": 6.5337, + "step": 60950 + }, + { + "epoch": 4.5, + "learning_rate": 3.9227129812859744e-05, + "loss": 7.5148, + "step": 61000 + }, + { + "epoch": 4.5, + "learning_rate": 3.917690430030839e-05, + "loss": 6.2416, + "step": 61050 + }, + { + "epoch": 4.51, + "learning_rate": 3.912667878775703e-05, + "loss": 6.3845, + "step": 61100 + }, + { + "epoch": 4.51, + "learning_rate": 3.9076453275205676e-05, + "loss": 7.0156, + "step": 61150 + }, + { + "epoch": 4.51, + "learning_rate": 3.902622776265432e-05, + "loss": 6.4239, + "step": 61200 + }, + { + "epoch": 4.52, + "learning_rate": 3.8976002250102964e-05, + "loss": 7.2111, + "step": 61250 + }, + { + "epoch": 4.52, + "learning_rate": 3.892577673755161e-05, + "loss": 6.5958, + "step": 61300 + }, + { + "epoch": 4.53, + "learning_rate": 3.887555122500025e-05, + "loss": 7.3125, + "step": 61350 + }, + { + "epoch": 4.53, + "learning_rate": 3.8825325712448896e-05, + "loss": 6.0597, + "step": 61400 + }, + { + "epoch": 4.53, + "learning_rate": 3.877510019989754e-05, + "loss": 6.3881, + "step": 61450 + }, + { + "epoch": 4.54, + "learning_rate": 3.8724874687346184e-05, + "loss": 6.2309, + "step": 61500 + }, + { + "epoch": 4.54, + "learning_rate": 3.867464917479483e-05, + "loss": 6.7603, + "step": 61550 + }, + { + "epoch": 4.54, + "learning_rate": 3.862442366224347e-05, + "loss": 7.386, + "step": 61600 + }, + { + "epoch": 4.55, + "learning_rate": 3.857419814969212e-05, + "loss": 7.1897, + "step": 61650 + }, + { + "epoch": 4.55, + "learning_rate": 3.852397263714077e-05, + "loss": 6.8813, + "step": 61700 + }, + { + "epoch": 4.55, + "learning_rate": 3.847374712458941e-05, + "loss": 7.9826, + "step": 61750 + }, + { + "epoch": 4.56, + "learning_rate": 3.8423521612038055e-05, + "loss": 6.4649, + "step": 61800 + }, + { + "epoch": 4.56, + "learning_rate": 3.83732960994867e-05, + "loss": 6.0005, + "step": 61850 + }, + { + "epoch": 4.57, + "learning_rate": 3.832307058693534e-05, + "loss": 6.7249, + "step": 61900 + }, + { + "epoch": 4.57, + "learning_rate": 3.827284507438398e-05, + "loss": 7.3703, + "step": 61950 + }, + { + "epoch": 4.57, + "learning_rate": 3.822261956183263e-05, + "loss": 6.3157, + "step": 62000 + }, + { + "epoch": 4.58, + "learning_rate": 3.8172394049281276e-05, + "loss": 6.3982, + "step": 62050 + }, + { + "epoch": 4.58, + "learning_rate": 3.812216853672992e-05, + "loss": 6.3995, + "step": 62100 + }, + { + "epoch": 4.58, + "learning_rate": 3.8071943024178564e-05, + "loss": 6.9792, + "step": 62150 + }, + { + "epoch": 4.59, + "learning_rate": 3.802171751162721e-05, + "loss": 7.7098, + "step": 62200 + }, + { + "epoch": 4.59, + "learning_rate": 3.797149199907585e-05, + "loss": 7.3092, + "step": 62250 + }, + { + "epoch": 4.6, + "learning_rate": 3.7921266486524496e-05, + "loss": 6.7666, + "step": 62300 + }, + { + "epoch": 4.6, + "learning_rate": 3.787104097397314e-05, + "loss": 6.1829, + "step": 62350 + }, + { + "epoch": 4.6, + "learning_rate": 3.782081546142179e-05, + "loss": 8.2604, + "step": 62400 + }, + { + "epoch": 4.61, + "learning_rate": 3.777058994887043e-05, + "loss": 6.7275, + "step": 62450 + }, + { + "epoch": 4.61, + "learning_rate": 3.772036443631907e-05, + "loss": 6.8682, + "step": 62500 + }, + { + "epoch": 4.61, + "learning_rate": 3.7670138923767716e-05, + "loss": 7.4531, + "step": 62550 + }, + { + "epoch": 4.62, + "learning_rate": 3.761991341121636e-05, + "loss": 7.4792, + "step": 62600 + }, + { + "epoch": 4.62, + "learning_rate": 3.7569687898665004e-05, + "loss": 6.3364, + "step": 62650 + }, + { + "epoch": 4.62, + "learning_rate": 3.7519462386113655e-05, + "loss": 6.395, + "step": 62700 + }, + { + "epoch": 4.63, + "learning_rate": 3.74692368735623e-05, + "loss": 6.4644, + "step": 62750 + }, + { + "epoch": 4.63, + "learning_rate": 3.741901136101094e-05, + "loss": 7.6636, + "step": 62800 + }, + { + "epoch": 4.64, + "learning_rate": 3.736878584845959e-05, + "loss": 6.5346, + "step": 62850 + }, + { + "epoch": 4.64, + "learning_rate": 3.731856033590823e-05, + "loss": 7.7544, + "step": 62900 + }, + { + "epoch": 4.64, + "learning_rate": 3.7268334823356875e-05, + "loss": 7.1518, + "step": 62950 + }, + { + "epoch": 4.65, + "learning_rate": 3.721810931080552e-05, + "loss": 5.2845, + "step": 63000 + }, + { + "epoch": 4.65, + "learning_rate": 3.7167883798254163e-05, + "loss": 6.4635, + "step": 63050 + }, + { + "epoch": 4.65, + "learning_rate": 3.711765828570281e-05, + "loss": 6.7313, + "step": 63100 + }, + { + "epoch": 4.66, + "learning_rate": 3.706743277315145e-05, + "loss": 6.2767, + "step": 63150 + }, + { + "epoch": 4.66, + "learning_rate": 3.7017207260600096e-05, + "loss": 6.2349, + "step": 63200 + }, + { + "epoch": 4.67, + "learning_rate": 3.696698174804874e-05, + "loss": 7.2152, + "step": 63250 + }, + { + "epoch": 4.67, + "learning_rate": 3.6916756235497384e-05, + "loss": 6.4904, + "step": 63300 + }, + { + "epoch": 4.67, + "learning_rate": 3.686653072294603e-05, + "loss": 6.4779, + "step": 63350 + }, + { + "epoch": 4.68, + "learning_rate": 3.681630521039467e-05, + "loss": 7.0359, + "step": 63400 + }, + { + "epoch": 4.68, + "learning_rate": 3.676607969784332e-05, + "loss": 6.3846, + "step": 63450 + }, + { + "epoch": 4.68, + "learning_rate": 3.671585418529197e-05, + "loss": 6.3923, + "step": 63500 + }, + { + "epoch": 4.69, + "learning_rate": 3.666562867274061e-05, + "loss": 6.7271, + "step": 63550 + }, + { + "epoch": 4.69, + "learning_rate": 3.661540316018925e-05, + "loss": 6.6546, + "step": 63600 + }, + { + "epoch": 4.69, + "learning_rate": 3.656517764763789e-05, + "loss": 7.1482, + "step": 63650 + }, + { + "epoch": 4.7, + "learning_rate": 3.6514952135086536e-05, + "loss": 6.4931, + "step": 63700 + }, + { + "epoch": 4.7, + "learning_rate": 3.646472662253518e-05, + "loss": 5.1346, + "step": 63750 + }, + { + "epoch": 4.71, + "learning_rate": 3.641450110998383e-05, + "loss": 6.25, + "step": 63800 + }, + { + "epoch": 4.71, + "learning_rate": 3.6364275597432475e-05, + "loss": 6.5812, + "step": 63850 + }, + { + "epoch": 4.71, + "learning_rate": 3.631405008488112e-05, + "loss": 6.4797, + "step": 63900 + }, + { + "epoch": 4.72, + "learning_rate": 3.626382457232976e-05, + "loss": 6.2886, + "step": 63950 + }, + { + "epoch": 4.72, + "learning_rate": 3.621359905977841e-05, + "loss": 6.4452, + "step": 64000 + }, + { + "epoch": 4.72, + "learning_rate": 3.616337354722705e-05, + "loss": 6.5534, + "step": 64050 + }, + { + "epoch": 4.73, + "learning_rate": 3.6113148034675695e-05, + "loss": 6.9353, + "step": 64100 + }, + { + "epoch": 4.73, + "learning_rate": 3.606292252212434e-05, + "loss": 6.1219, + "step": 64150 + }, + { + "epoch": 4.74, + "learning_rate": 3.6012697009572984e-05, + "loss": 6.8233, + "step": 64200 + }, + { + "epoch": 4.74, + "learning_rate": 3.596247149702163e-05, + "loss": 7.1924, + "step": 64250 + }, + { + "epoch": 4.74, + "learning_rate": 3.591224598447027e-05, + "loss": 7.1116, + "step": 64300 + }, + { + "epoch": 4.75, + "learning_rate": 3.5862020471918916e-05, + "loss": 7.2818, + "step": 64350 + }, + { + "epoch": 4.75, + "learning_rate": 3.581179495936756e-05, + "loss": 6.3182, + "step": 64400 + }, + { + "epoch": 4.75, + "learning_rate": 3.5761569446816204e-05, + "loss": 6.7712, + "step": 64450 + }, + { + "epoch": 4.76, + "learning_rate": 3.5711343934264855e-05, + "loss": 6.7902, + "step": 64500 + }, + { + "epoch": 4.76, + "learning_rate": 3.56611184217135e-05, + "loss": 6.5504, + "step": 64550 + }, + { + "epoch": 4.77, + "learning_rate": 3.561089290916214e-05, + "loss": 6.3599, + "step": 64600 + }, + { + "epoch": 4.77, + "learning_rate": 3.556066739661079e-05, + "loss": 6.4758, + "step": 64650 + }, + { + "epoch": 4.77, + "learning_rate": 3.551044188405943e-05, + "loss": 7.2899, + "step": 64700 + }, + { + "epoch": 4.78, + "learning_rate": 3.546021637150807e-05, + "loss": 6.6164, + "step": 64750 + }, + { + "epoch": 4.78, + "learning_rate": 3.540999085895671e-05, + "loss": 6.0466, + "step": 64800 + }, + { + "epoch": 4.78, + "learning_rate": 3.535976534640536e-05, + "loss": 6.2209, + "step": 64850 + }, + { + "epoch": 4.79, + "learning_rate": 3.530953983385401e-05, + "loss": 6.6098, + "step": 64900 + }, + { + "epoch": 4.79, + "learning_rate": 3.525931432130265e-05, + "loss": 5.959, + "step": 64950 + }, + { + "epoch": 4.79, + "learning_rate": 3.5209088808751295e-05, + "loss": 6.6942, + "step": 65000 + }, + { + "epoch": 4.8, + "learning_rate": 3.515886329619994e-05, + "loss": 7.7404, + "step": 65050 + }, + { + "epoch": 4.8, + "learning_rate": 3.510863778364858e-05, + "loss": 6.5342, + "step": 65100 + }, + { + "epoch": 4.81, + "learning_rate": 3.505841227109723e-05, + "loss": 6.1434, + "step": 65150 + }, + { + "epoch": 4.81, + "learning_rate": 3.500818675854587e-05, + "loss": 5.8523, + "step": 65200 + }, + { + "epoch": 4.81, + "learning_rate": 3.495796124599452e-05, + "loss": 5.7111, + "step": 65250 + }, + { + "epoch": 4.82, + "learning_rate": 3.490773573344316e-05, + "loss": 6.4092, + "step": 65300 + }, + { + "epoch": 4.82, + "learning_rate": 3.4857510220891804e-05, + "loss": 6.5057, + "step": 65350 + }, + { + "epoch": 4.82, + "learning_rate": 3.480728470834045e-05, + "loss": 5.9863, + "step": 65400 + }, + { + "epoch": 4.83, + "learning_rate": 3.475705919578909e-05, + "loss": 6.0978, + "step": 65450 + }, + { + "epoch": 4.83, + "learning_rate": 3.4706833683237736e-05, + "loss": 6.6325, + "step": 65500 + }, + { + "epoch": 4.84, + "learning_rate": 3.4656608170686387e-05, + "loss": 5.9716, + "step": 65550 + }, + { + "epoch": 4.84, + "learning_rate": 3.460638265813503e-05, + "loss": 5.9812, + "step": 65600 + }, + { + "epoch": 4.84, + "learning_rate": 3.4556157145583675e-05, + "loss": 5.6784, + "step": 65650 + }, + { + "epoch": 4.85, + "learning_rate": 3.450593163303232e-05, + "loss": 6.7303, + "step": 65700 + }, + { + "epoch": 4.85, + "learning_rate": 3.445570612048096e-05, + "loss": 7.527, + "step": 65750 + }, + { + "epoch": 4.85, + "learning_rate": 3.440548060792961e-05, + "loss": 6.3649, + "step": 65800 + }, + { + "epoch": 4.86, + "learning_rate": 3.435525509537825e-05, + "loss": 6.6456, + "step": 65850 + }, + { + "epoch": 4.86, + "learning_rate": 3.4305029582826895e-05, + "loss": 6.2518, + "step": 65900 + }, + { + "epoch": 4.86, + "learning_rate": 3.425480407027554e-05, + "loss": 6.0104, + "step": 65950 + }, + { + "epoch": 4.87, + "learning_rate": 3.420457855772418e-05, + "loss": 7.0261, + "step": 66000 + }, + { + "epoch": 4.87, + "learning_rate": 3.415435304517283e-05, + "loss": 6.4373, + "step": 66050 + }, + { + "epoch": 4.88, + "learning_rate": 3.410412753262147e-05, + "loss": 6.3931, + "step": 66100 + }, + { + "epoch": 4.88, + "learning_rate": 3.4053902020070115e-05, + "loss": 7.0678, + "step": 66150 + }, + { + "epoch": 4.88, + "learning_rate": 3.400367650751876e-05, + "loss": 6.9086, + "step": 66200 + }, + { + "epoch": 4.89, + "learning_rate": 3.39534509949674e-05, + "loss": 6.3431, + "step": 66250 + }, + { + "epoch": 4.89, + "learning_rate": 3.3903225482416054e-05, + "loss": 7.298, + "step": 66300 + }, + { + "epoch": 4.89, + "learning_rate": 3.38529999698647e-05, + "loss": 6.4188, + "step": 66350 + }, + { + "epoch": 4.9, + "learning_rate": 3.380277445731334e-05, + "loss": 6.1998, + "step": 66400 + }, + { + "epoch": 4.9, + "learning_rate": 3.375254894476198e-05, + "loss": 6.7306, + "step": 66450 + }, + { + "epoch": 4.91, + "learning_rate": 3.3702323432210624e-05, + "loss": 6.0333, + "step": 66500 + }, + { + "epoch": 4.91, + "learning_rate": 3.365209791965927e-05, + "loss": 6.9375, + "step": 66550 + }, + { + "epoch": 4.91, + "learning_rate": 3.360187240710791e-05, + "loss": 6.4168, + "step": 66600 + }, + { + "epoch": 4.92, + "learning_rate": 3.355164689455656e-05, + "loss": 6.6486, + "step": 66650 + }, + { + "epoch": 4.92, + "learning_rate": 3.3501421382005207e-05, + "loss": 5.6693, + "step": 66700 + }, + { + "epoch": 4.92, + "learning_rate": 3.345119586945385e-05, + "loss": 6.7548, + "step": 66750 + }, + { + "epoch": 4.93, + "learning_rate": 3.3400970356902495e-05, + "loss": 8.7782, + "step": 66800 + }, + { + "epoch": 4.93, + "learning_rate": 3.335074484435114e-05, + "loss": 6.9741, + "step": 66850 + }, + { + "epoch": 4.93, + "learning_rate": 3.330051933179978e-05, + "loss": 6.423, + "step": 66900 + }, + { + "epoch": 4.94, + "learning_rate": 3.325029381924843e-05, + "loss": 6.0077, + "step": 66950 + }, + { + "epoch": 4.94, + "learning_rate": 3.320006830669707e-05, + "loss": 6.4085, + "step": 67000 + }, + { + "epoch": 4.95, + "learning_rate": 3.3149842794145715e-05, + "loss": 6.383, + "step": 67050 + }, + { + "epoch": 4.95, + "learning_rate": 3.309961728159436e-05, + "loss": 5.8758, + "step": 67100 + }, + { + "epoch": 4.95, + "learning_rate": 3.3049391769043e-05, + "loss": 7.1169, + "step": 67150 + }, + { + "epoch": 4.96, + "learning_rate": 3.299916625649165e-05, + "loss": 7.5655, + "step": 67200 + }, + { + "epoch": 4.96, + "learning_rate": 3.294894074394029e-05, + "loss": 6.1727, + "step": 67250 + }, + { + "epoch": 4.96, + "learning_rate": 3.2898715231388935e-05, + "loss": 6.0568, + "step": 67300 + }, + { + "epoch": 4.97, + "learning_rate": 3.2848489718837586e-05, + "loss": 6.9697, + "step": 67350 + }, + { + "epoch": 4.97, + "learning_rate": 3.279826420628623e-05, + "loss": 6.7938, + "step": 67400 + }, + { + "epoch": 4.98, + "learning_rate": 3.2748038693734874e-05, + "loss": 6.3006, + "step": 67450 + }, + { + "epoch": 4.98, + "learning_rate": 3.269781318118352e-05, + "loss": 6.3299, + "step": 67500 + }, + { + "epoch": 4.98, + "learning_rate": 3.264758766863216e-05, + "loss": 6.3615, + "step": 67550 + }, + { + "epoch": 4.99, + "learning_rate": 3.25973621560808e-05, + "loss": 6.1106, + "step": 67600 + }, + { + "epoch": 4.99, + "learning_rate": 3.2547136643529444e-05, + "loss": 5.571, + "step": 67650 + }, + { + "epoch": 4.99, + "learning_rate": 3.2496911130978094e-05, + "loss": 6.5922, + "step": 67700 + }, + { + "epoch": 5.0, + "learning_rate": 3.244668561842674e-05, + "loss": 5.663, + "step": 67750 + }, + { + "epoch": 5.0, + "learning_rate": 3.239646010587538e-05, + "loss": 7.3669, + "step": 67800 + }, + { + "epoch": 5.0, + "learning_rate": 3.2346234593324027e-05, + "loss": 6.015, + "step": 67850 + }, + { + "epoch": 5.01, + "learning_rate": 3.229600908077267e-05, + "loss": 5.8678, + "step": 67900 + }, + { + "epoch": 5.01, + "learning_rate": 3.2245783568221315e-05, + "loss": 5.5537, + "step": 67950 + }, + { + "epoch": 5.02, + "learning_rate": 3.219555805566996e-05, + "loss": 6.175, + "step": 68000 + }, + { + "epoch": 5.02, + "learning_rate": 3.21453325431186e-05, + "loss": 5.9018, + "step": 68050 + }, + { + "epoch": 5.02, + "learning_rate": 3.2095107030567254e-05, + "loss": 6.9064, + "step": 68100 + }, + { + "epoch": 5.03, + "learning_rate": 3.204488151801589e-05, + "loss": 6.8775, + "step": 68150 + }, + { + "epoch": 5.03, + "learning_rate": 3.1994656005464535e-05, + "loss": 5.6397, + "step": 68200 + }, + { + "epoch": 5.03, + "learning_rate": 3.194443049291318e-05, + "loss": 5.815, + "step": 68250 + }, + { + "epoch": 5.04, + "learning_rate": 3.189420498036182e-05, + "loss": 6.0795, + "step": 68300 + }, + { + "epoch": 5.04, + "learning_rate": 3.184397946781047e-05, + "loss": 6.8721, + "step": 68350 + }, + { + "epoch": 5.05, + "learning_rate": 3.179375395525912e-05, + "loss": 6.4936, + "step": 68400 + }, + { + "epoch": 5.05, + "learning_rate": 3.174352844270776e-05, + "loss": 5.7195, + "step": 68450 + }, + { + "epoch": 5.05, + "learning_rate": 3.1693302930156406e-05, + "loss": 5.6897, + "step": 68500 + }, + { + "epoch": 5.06, + "learning_rate": 3.164307741760505e-05, + "loss": 6.2271, + "step": 68550 + }, + { + "epoch": 5.06, + "learning_rate": 3.1592851905053694e-05, + "loss": 6.3731, + "step": 68600 + }, + { + "epoch": 5.06, + "learning_rate": 3.154262639250234e-05, + "loss": 5.8502, + "step": 68650 + }, + { + "epoch": 5.07, + "learning_rate": 3.149240087995098e-05, + "loss": 5.8768, + "step": 68700 + }, + { + "epoch": 5.07, + "learning_rate": 3.1442175367399626e-05, + "loss": 6.4265, + "step": 68750 + }, + { + "epoch": 5.07, + "learning_rate": 3.139194985484827e-05, + "loss": 5.8952, + "step": 68800 + }, + { + "epoch": 5.08, + "learning_rate": 3.1341724342296914e-05, + "loss": 5.3739, + "step": 68850 + }, + { + "epoch": 5.08, + "learning_rate": 3.129149882974556e-05, + "loss": 6.4317, + "step": 68900 + }, + { + "epoch": 5.09, + "learning_rate": 3.12412733171942e-05, + "loss": 5.6296, + "step": 68950 + }, + { + "epoch": 5.09, + "learning_rate": 3.1191047804642847e-05, + "loss": 6.2448, + "step": 69000 + }, + { + "epoch": 5.09, + "learning_rate": 3.114082229209149e-05, + "loss": 6.116, + "step": 69050 + }, + { + "epoch": 5.1, + "learning_rate": 3.1090596779540135e-05, + "loss": 6.4299, + "step": 69100 + }, + { + "epoch": 5.1, + "learning_rate": 3.1040371266988785e-05, + "loss": 6.7337, + "step": 69150 + }, + { + "epoch": 5.1, + "learning_rate": 3.099014575443743e-05, + "loss": 6.6103, + "step": 69200 + }, + { + "epoch": 5.11, + "learning_rate": 3.0939920241886074e-05, + "loss": 5.6155, + "step": 69250 + }, + { + "epoch": 5.11, + "learning_rate": 3.088969472933471e-05, + "loss": 5.7131, + "step": 69300 + }, + { + "epoch": 5.12, + "learning_rate": 3.0839469216783355e-05, + "loss": 5.6799, + "step": 69350 + }, + { + "epoch": 5.12, + "learning_rate": 3.0789243704232e-05, + "loss": 5.9907, + "step": 69400 + }, + { + "epoch": 5.12, + "learning_rate": 3.073901819168064e-05, + "loss": 5.7125, + "step": 69450 + }, + { + "epoch": 5.13, + "learning_rate": 3.0688792679129294e-05, + "loss": 6.3093, + "step": 69500 + }, + { + "epoch": 5.13, + "learning_rate": 3.063856716657794e-05, + "loss": 6.1113, + "step": 69550 + }, + { + "epoch": 5.13, + "learning_rate": 3.058834165402658e-05, + "loss": 5.5845, + "step": 69600 + }, + { + "epoch": 5.14, + "learning_rate": 3.0538116141475226e-05, + "loss": 5.9267, + "step": 69650 + }, + { + "epoch": 5.14, + "learning_rate": 3.048789062892387e-05, + "loss": 6.0062, + "step": 69700 + }, + { + "epoch": 5.14, + "learning_rate": 3.0437665116372514e-05, + "loss": 6.005, + "step": 69750 + }, + { + "epoch": 5.15, + "learning_rate": 3.038743960382116e-05, + "loss": 5.854, + "step": 69800 + }, + { + "epoch": 5.15, + "learning_rate": 3.0337214091269806e-05, + "loss": 6.3468, + "step": 69850 + }, + { + "epoch": 5.16, + "learning_rate": 3.0286988578718446e-05, + "loss": 5.9127, + "step": 69900 + }, + { + "epoch": 5.16, + "learning_rate": 3.023676306616709e-05, + "loss": 6.5192, + "step": 69950 + }, + { + "epoch": 5.16, + "learning_rate": 3.0186537553615734e-05, + "loss": 5.8418, + "step": 70000 + }, + { + "epoch": 5.17, + "learning_rate": 3.013631204106438e-05, + "loss": 6.0775, + "step": 70050 + }, + { + "epoch": 5.17, + "learning_rate": 3.0086086528513026e-05, + "loss": 5.926, + "step": 70100 + }, + { + "epoch": 5.17, + "learning_rate": 3.003586101596167e-05, + "loss": 5.8467, + "step": 70150 + }, + { + "epoch": 5.18, + "learning_rate": 2.9985635503410314e-05, + "loss": 6.441, + "step": 70200 + }, + { + "epoch": 5.18, + "learning_rate": 2.9935409990858958e-05, + "loss": 5.6337, + "step": 70250 + }, + { + "epoch": 5.19, + "learning_rate": 2.9885184478307606e-05, + "loss": 6.3408, + "step": 70300 + }, + { + "epoch": 5.19, + "learning_rate": 2.983495896575625e-05, + "loss": 6.0077, + "step": 70350 + }, + { + "epoch": 5.19, + "learning_rate": 2.9784733453204894e-05, + "loss": 5.7263, + "step": 70400 + }, + { + "epoch": 5.2, + "learning_rate": 2.9734507940653534e-05, + "loss": 6.8161, + "step": 70450 + }, + { + "epoch": 5.2, + "learning_rate": 2.968428242810218e-05, + "loss": 6.4292, + "step": 70500 + }, + { + "epoch": 5.2, + "learning_rate": 2.9634056915550822e-05, + "loss": 6.0751, + "step": 70550 + }, + { + "epoch": 5.21, + "learning_rate": 2.9583831402999466e-05, + "loss": 6.2439, + "step": 70600 + }, + { + "epoch": 5.21, + "learning_rate": 2.9533605890448114e-05, + "loss": 5.6406, + "step": 70650 + }, + { + "epoch": 5.22, + "learning_rate": 2.9483380377896758e-05, + "loss": 5.5623, + "step": 70700 + }, + { + "epoch": 5.22, + "learning_rate": 2.9433154865345402e-05, + "loss": 6.3392, + "step": 70750 + }, + { + "epoch": 5.22, + "learning_rate": 2.9382929352794046e-05, + "loss": 7.3752, + "step": 70800 + }, + { + "epoch": 5.23, + "learning_rate": 2.9332703840242693e-05, + "loss": 6.2126, + "step": 70850 + }, + { + "epoch": 5.23, + "learning_rate": 2.9282478327691338e-05, + "loss": 5.3583, + "step": 70900 + }, + { + "epoch": 5.23, + "learning_rate": 2.923225281513998e-05, + "loss": 5.4659, + "step": 70950 + }, + { + "epoch": 5.24, + "learning_rate": 2.9182027302588626e-05, + "loss": 6.1876, + "step": 71000 + }, + { + "epoch": 5.24, + "learning_rate": 2.9131801790037266e-05, + "loss": 5.8878, + "step": 71050 + }, + { + "epoch": 5.24, + "learning_rate": 2.908157627748591e-05, + "loss": 6.2974, + "step": 71100 + }, + { + "epoch": 5.25, + "learning_rate": 2.9031350764934558e-05, + "loss": 6.348, + "step": 71150 + }, + { + "epoch": 5.25, + "learning_rate": 2.8981125252383202e-05, + "loss": 5.9929, + "step": 71200 + }, + { + "epoch": 5.26, + "learning_rate": 2.8930899739831846e-05, + "loss": 5.9609, + "step": 71250 + }, + { + "epoch": 5.26, + "learning_rate": 2.888067422728049e-05, + "loss": 5.4301, + "step": 71300 + }, + { + "epoch": 5.26, + "learning_rate": 2.8830448714729137e-05, + "loss": 5.4559, + "step": 71350 + }, + { + "epoch": 5.27, + "learning_rate": 2.878022320217778e-05, + "loss": 7.1806, + "step": 71400 + }, + { + "epoch": 5.27, + "learning_rate": 2.8729997689626426e-05, + "loss": 5.6962, + "step": 71450 + }, + { + "epoch": 5.27, + "learning_rate": 2.867977217707507e-05, + "loss": 5.2751, + "step": 71500 + }, + { + "epoch": 5.28, + "learning_rate": 2.8629546664523717e-05, + "loss": 5.8732, + "step": 71550 + }, + { + "epoch": 5.28, + "learning_rate": 2.8579321151972354e-05, + "loss": 5.3111, + "step": 71600 + }, + { + "epoch": 5.29, + "learning_rate": 2.8529095639421e-05, + "loss": 6.269, + "step": 71650 + }, + { + "epoch": 5.29, + "learning_rate": 2.8478870126869646e-05, + "loss": 4.7494, + "step": 71700 + }, + { + "epoch": 5.29, + "learning_rate": 2.842864461431829e-05, + "loss": 6.2853, + "step": 71750 + }, + { + "epoch": 5.3, + "learning_rate": 2.8378419101766934e-05, + "loss": 6.7802, + "step": 71800 + }, + { + "epoch": 5.3, + "learning_rate": 2.8328193589215578e-05, + "loss": 7.7665, + "step": 71850 + }, + { + "epoch": 5.3, + "learning_rate": 2.8277968076664225e-05, + "loss": 5.7143, + "step": 71900 + }, + { + "epoch": 5.31, + "learning_rate": 2.822774256411287e-05, + "loss": 6.1485, + "step": 71950 + }, + { + "epoch": 5.31, + "learning_rate": 2.8177517051561514e-05, + "loss": 5.9611, + "step": 72000 + }, + { + "epoch": 5.31, + "learning_rate": 2.8127291539010158e-05, + "loss": 6.9089, + "step": 72050 + }, + { + "epoch": 5.32, + "learning_rate": 2.8077066026458805e-05, + "loss": 5.3067, + "step": 72100 + }, + { + "epoch": 5.32, + "learning_rate": 2.8026840513907442e-05, + "loss": 5.7255, + "step": 72150 + }, + { + "epoch": 5.33, + "learning_rate": 2.7976615001356086e-05, + "loss": 6.6141, + "step": 72200 + }, + { + "epoch": 5.33, + "learning_rate": 2.7926389488804734e-05, + "loss": 5.1587, + "step": 72250 + }, + { + "epoch": 5.33, + "learning_rate": 2.7876163976253378e-05, + "loss": 6.2063, + "step": 72300 + }, + { + "epoch": 5.34, + "learning_rate": 2.7825938463702022e-05, + "loss": 5.9984, + "step": 72350 + }, + { + "epoch": 5.34, + "learning_rate": 2.7775712951150666e-05, + "loss": 6.2301, + "step": 72400 + }, + { + "epoch": 5.34, + "learning_rate": 2.7725487438599313e-05, + "loss": 6.0619, + "step": 72450 + }, + { + "epoch": 5.35, + "learning_rate": 2.7675261926047957e-05, + "loss": 7.1879, + "step": 72500 + }, + { + "epoch": 5.35, + "learning_rate": 2.76250364134966e-05, + "loss": 6.8024, + "step": 72550 + }, + { + "epoch": 5.36, + "learning_rate": 2.7574810900945246e-05, + "loss": 7.412, + "step": 72600 + }, + { + "epoch": 5.36, + "learning_rate": 2.7524585388393893e-05, + "loss": 6.172, + "step": 72650 + }, + { + "epoch": 5.36, + "learning_rate": 2.7474359875842537e-05, + "loss": 5.9536, + "step": 72700 + }, + { + "epoch": 5.37, + "learning_rate": 2.7424134363291178e-05, + "loss": 6.4215, + "step": 72750 + }, + { + "epoch": 5.37, + "learning_rate": 2.7373908850739822e-05, + "loss": 5.6326, + "step": 72800 + }, + { + "epoch": 5.37, + "learning_rate": 2.7323683338188466e-05, + "loss": 5.8943, + "step": 72850 + }, + { + "epoch": 5.38, + "learning_rate": 2.727345782563711e-05, + "loss": 6.8689, + "step": 72900 + }, + { + "epoch": 5.38, + "learning_rate": 2.7223232313085757e-05, + "loss": 6.2079, + "step": 72950 + }, + { + "epoch": 5.38, + "learning_rate": 2.71730068005344e-05, + "loss": 6.4607, + "step": 73000 + }, + { + "epoch": 5.39, + "learning_rate": 2.7122781287983045e-05, + "loss": 6.0781, + "step": 73050 + }, + { + "epoch": 5.39, + "learning_rate": 2.707255577543169e-05, + "loss": 5.7624, + "step": 73100 + }, + { + "epoch": 5.4, + "learning_rate": 2.7022330262880337e-05, + "loss": 6.0385, + "step": 73150 + }, + { + "epoch": 5.4, + "learning_rate": 2.697210475032898e-05, + "loss": 5.9751, + "step": 73200 + }, + { + "epoch": 5.4, + "learning_rate": 2.6921879237777625e-05, + "loss": 6.3938, + "step": 73250 + }, + { + "epoch": 5.41, + "learning_rate": 2.6871653725226266e-05, + "loss": 5.9229, + "step": 73300 + }, + { + "epoch": 5.41, + "learning_rate": 2.682142821267491e-05, + "loss": 6.0674, + "step": 73350 + }, + { + "epoch": 5.41, + "learning_rate": 2.6771202700123554e-05, + "loss": 6.7223, + "step": 73400 + }, + { + "epoch": 5.42, + "learning_rate": 2.6720977187572198e-05, + "loss": 5.7889, + "step": 73450 + }, + { + "epoch": 5.42, + "learning_rate": 2.6670751675020845e-05, + "loss": 7.1486, + "step": 73500 + }, + { + "epoch": 5.43, + "learning_rate": 2.662052616246949e-05, + "loss": 6.1844, + "step": 73550 + }, + { + "epoch": 5.43, + "learning_rate": 2.6570300649918133e-05, + "loss": 6.198, + "step": 73600 + }, + { + "epoch": 5.43, + "learning_rate": 2.6520075137366777e-05, + "loss": 6.6778, + "step": 73650 + }, + { + "epoch": 5.44, + "learning_rate": 2.6469849624815425e-05, + "loss": 5.9788, + "step": 73700 + }, + { + "epoch": 5.44, + "learning_rate": 2.641962411226407e-05, + "loss": 6.3568, + "step": 73750 + }, + { + "epoch": 5.44, + "learning_rate": 2.6369398599712713e-05, + "loss": 5.9383, + "step": 73800 + }, + { + "epoch": 5.45, + "learning_rate": 2.6319173087161357e-05, + "loss": 6.4832, + "step": 73850 + }, + { + "epoch": 5.45, + "learning_rate": 2.6268947574609998e-05, + "loss": 5.883, + "step": 73900 + }, + { + "epoch": 5.45, + "learning_rate": 2.6218722062058642e-05, + "loss": 6.213, + "step": 73950 + }, + { + "epoch": 5.46, + "learning_rate": 2.616849654950729e-05, + "loss": 6.5404, + "step": 74000 + }, + { + "epoch": 5.46, + "learning_rate": 2.6118271036955933e-05, + "loss": 6.1246, + "step": 74050 + }, + { + "epoch": 5.47, + "learning_rate": 2.6068045524404577e-05, + "loss": 6.0739, + "step": 74100 + }, + { + "epoch": 5.47, + "learning_rate": 2.601782001185322e-05, + "loss": 6.2085, + "step": 74150 + }, + { + "epoch": 5.47, + "learning_rate": 2.596759449930187e-05, + "loss": 6.7059, + "step": 74200 + }, + { + "epoch": 5.48, + "learning_rate": 2.5917368986750513e-05, + "loss": 6.231, + "step": 74250 + }, + { + "epoch": 5.48, + "learning_rate": 2.5867143474199157e-05, + "loss": 6.1287, + "step": 74300 + }, + { + "epoch": 5.48, + "learning_rate": 2.58169179616478e-05, + "loss": 6.0583, + "step": 74350 + }, + { + "epoch": 5.49, + "learning_rate": 2.576669244909645e-05, + "loss": 6.1552, + "step": 74400 + }, + { + "epoch": 5.49, + "learning_rate": 2.5716466936545086e-05, + "loss": 6.4191, + "step": 74450 + }, + { + "epoch": 5.5, + "learning_rate": 2.566624142399373e-05, + "loss": 7.2899, + "step": 74500 + }, + { + "epoch": 5.5, + "learning_rate": 2.5616015911442377e-05, + "loss": 6.3234, + "step": 74550 + }, + { + "epoch": 5.5, + "learning_rate": 2.556579039889102e-05, + "loss": 5.6938, + "step": 74600 + }, + { + "epoch": 5.51, + "learning_rate": 2.5515564886339665e-05, + "loss": 5.1418, + "step": 74650 + }, + { + "epoch": 5.51, + "learning_rate": 2.546533937378831e-05, + "loss": 5.7747, + "step": 74700 + }, + { + "epoch": 5.51, + "learning_rate": 2.5415113861236957e-05, + "loss": 5.7191, + "step": 74750 + }, + { + "epoch": 5.52, + "learning_rate": 2.53648883486856e-05, + "loss": 6.0435, + "step": 74800 + }, + { + "epoch": 5.52, + "learning_rate": 2.5314662836134245e-05, + "loss": 6.1592, + "step": 74850 + }, + { + "epoch": 5.52, + "learning_rate": 2.526443732358289e-05, + "loss": 6.1625, + "step": 74900 + }, + { + "epoch": 5.53, + "learning_rate": 2.5214211811031536e-05, + "loss": 4.7958, + "step": 74950 + }, + { + "epoch": 5.53, + "learning_rate": 2.516398629848018e-05, + "loss": 6.0618, + "step": 75000 + }, + { + "epoch": 5.54, + "learning_rate": 2.5113760785928818e-05, + "loss": 5.754, + "step": 75050 + }, + { + "epoch": 5.54, + "learning_rate": 2.5063535273377465e-05, + "loss": 5.6346, + "step": 75100 + }, + { + "epoch": 5.54, + "learning_rate": 2.501330976082611e-05, + "loss": 5.6922, + "step": 75150 + }, + { + "epoch": 5.55, + "learning_rate": 2.4963084248274753e-05, + "loss": 6.6754, + "step": 75200 + }, + { + "epoch": 5.55, + "learning_rate": 2.4912858735723397e-05, + "loss": 5.2828, + "step": 75250 + }, + { + "epoch": 5.55, + "learning_rate": 2.4862633223172045e-05, + "loss": 5.799, + "step": 75300 + }, + { + "epoch": 5.56, + "learning_rate": 2.481240771062069e-05, + "loss": 5.8229, + "step": 75350 + }, + { + "epoch": 5.56, + "learning_rate": 2.4762182198069333e-05, + "loss": 5.1759, + "step": 75400 + }, + { + "epoch": 5.57, + "learning_rate": 2.4711956685517977e-05, + "loss": 5.9411, + "step": 75450 + }, + { + "epoch": 5.57, + "learning_rate": 2.466173117296662e-05, + "loss": 5.4522, + "step": 75500 + }, + { + "epoch": 5.57, + "learning_rate": 2.4611505660415265e-05, + "loss": 6.0731, + "step": 75550 + }, + { + "epoch": 5.58, + "learning_rate": 2.456128014786391e-05, + "loss": 5.9288, + "step": 75600 + }, + { + "epoch": 5.58, + "learning_rate": 2.4511054635312557e-05, + "loss": 5.7434, + "step": 75650 + }, + { + "epoch": 5.58, + "learning_rate": 2.44608291227612e-05, + "loss": 5.5638, + "step": 75700 + }, + { + "epoch": 5.59, + "learning_rate": 2.441060361020984e-05, + "loss": 6.423, + "step": 75750 + }, + { + "epoch": 5.59, + "learning_rate": 2.436037809765849e-05, + "loss": 5.4612, + "step": 75800 + }, + { + "epoch": 5.59, + "learning_rate": 2.4310152585107133e-05, + "loss": 7.1213, + "step": 75850 + }, + { + "epoch": 5.6, + "learning_rate": 2.4259927072555777e-05, + "loss": 6.366, + "step": 75900 + }, + { + "epoch": 5.6, + "learning_rate": 2.420970156000442e-05, + "loss": 5.8278, + "step": 75950 + }, + { + "epoch": 5.61, + "learning_rate": 2.4159476047453065e-05, + "loss": 6.1465, + "step": 76000 + }, + { + "epoch": 5.61, + "learning_rate": 2.410925053490171e-05, + "loss": 5.7868, + "step": 76050 + }, + { + "epoch": 5.61, + "learning_rate": 2.4059025022350353e-05, + "loss": 5.8116, + "step": 76100 + }, + { + "epoch": 5.62, + "learning_rate": 2.4008799509798997e-05, + "loss": 5.7459, + "step": 76150 + }, + { + "epoch": 5.62, + "learning_rate": 2.3958573997247645e-05, + "loss": 6.2053, + "step": 76200 + }, + { + "epoch": 5.62, + "learning_rate": 2.390834848469629e-05, + "loss": 5.768, + "step": 76250 + }, + { + "epoch": 5.63, + "learning_rate": 2.385812297214493e-05, + "loss": 5.9021, + "step": 76300 + }, + { + "epoch": 5.63, + "learning_rate": 2.3807897459593577e-05, + "loss": 6.2206, + "step": 76350 + }, + { + "epoch": 5.64, + "learning_rate": 2.375767194704222e-05, + "loss": 6.3088, + "step": 76400 + }, + { + "epoch": 5.64, + "learning_rate": 2.3707446434490865e-05, + "loss": 6.0105, + "step": 76450 + }, + { + "epoch": 5.64, + "learning_rate": 2.365722092193951e-05, + "loss": 5.738, + "step": 76500 + }, + { + "epoch": 5.65, + "learning_rate": 2.3606995409388156e-05, + "loss": 6.2838, + "step": 76550 + }, + { + "epoch": 5.65, + "learning_rate": 2.3556769896836797e-05, + "loss": 5.7041, + "step": 76600 + }, + { + "epoch": 5.65, + "learning_rate": 2.350654438428544e-05, + "loss": 6.7796, + "step": 76650 + }, + { + "epoch": 5.66, + "learning_rate": 2.345631887173409e-05, + "loss": 6.4432, + "step": 76700 + }, + { + "epoch": 5.66, + "learning_rate": 2.3406093359182733e-05, + "loss": 5.5805, + "step": 76750 + }, + { + "epoch": 5.66, + "learning_rate": 2.3355867846631377e-05, + "loss": 5.4049, + "step": 76800 + }, + { + "epoch": 5.67, + "learning_rate": 2.330564233408002e-05, + "loss": 5.0643, + "step": 76850 + }, + { + "epoch": 5.67, + "learning_rate": 2.3255416821528665e-05, + "loss": 5.4007, + "step": 76900 + }, + { + "epoch": 5.68, + "learning_rate": 2.320519130897731e-05, + "loss": 5.3506, + "step": 76950 + }, + { + "epoch": 5.68, + "learning_rate": 2.3154965796425953e-05, + "loss": 6.5889, + "step": 77000 + }, + { + "epoch": 5.68, + "learning_rate": 2.31047402838746e-05, + "loss": 6.206, + "step": 77050 + }, + { + "epoch": 5.69, + "learning_rate": 2.3054514771323244e-05, + "loss": 6.03, + "step": 77100 + }, + { + "epoch": 5.69, + "learning_rate": 2.3004289258771885e-05, + "loss": 5.6658, + "step": 77150 + }, + { + "epoch": 5.69, + "learning_rate": 2.295406374622053e-05, + "loss": 6.5901, + "step": 77200 + }, + { + "epoch": 5.7, + "learning_rate": 2.2903838233669176e-05, + "loss": 6.3019, + "step": 77250 + }, + { + "epoch": 5.7, + "learning_rate": 2.285361272111782e-05, + "loss": 5.5744, + "step": 77300 + }, + { + "epoch": 5.71, + "learning_rate": 2.2803387208566465e-05, + "loss": 5.8269, + "step": 77350 + }, + { + "epoch": 5.71, + "learning_rate": 2.275316169601511e-05, + "loss": 6.1005, + "step": 77400 + }, + { + "epoch": 5.71, + "learning_rate": 2.2702936183463753e-05, + "loss": 6.0196, + "step": 77450 + }, + { + "epoch": 5.72, + "learning_rate": 2.2652710670912397e-05, + "loss": 5.8475, + "step": 77500 + }, + { + "epoch": 5.72, + "learning_rate": 2.260248515836104e-05, + "loss": 6.4338, + "step": 77550 + }, + { + "epoch": 5.72, + "learning_rate": 2.2552259645809688e-05, + "loss": 4.958, + "step": 77600 + }, + { + "epoch": 5.73, + "learning_rate": 2.2502034133258332e-05, + "loss": 6.4737, + "step": 77650 + }, + { + "epoch": 5.73, + "learning_rate": 2.2451808620706976e-05, + "loss": 6.3223, + "step": 77700 + }, + { + "epoch": 5.74, + "learning_rate": 2.240158310815562e-05, + "loss": 7.171, + "step": 77750 + }, + { + "epoch": 5.74, + "learning_rate": 2.2351357595604264e-05, + "loss": 6.5725, + "step": 77800 + }, + { + "epoch": 5.74, + "learning_rate": 2.230113208305291e-05, + "loss": 5.7644, + "step": 77850 + }, + { + "epoch": 5.75, + "learning_rate": 2.2250906570501553e-05, + "loss": 5.6257, + "step": 77900 + }, + { + "epoch": 5.75, + "learning_rate": 2.22006810579502e-05, + "loss": 6.2325, + "step": 77950 + }, + { + "epoch": 5.75, + "learning_rate": 2.215045554539884e-05, + "loss": 6.7106, + "step": 78000 + }, + { + "epoch": 5.76, + "learning_rate": 2.2100230032847485e-05, + "loss": 5.0113, + "step": 78050 + }, + { + "epoch": 5.76, + "learning_rate": 2.205000452029613e-05, + "loss": 6.1309, + "step": 78100 + }, + { + "epoch": 5.76, + "learning_rate": 2.1999779007744776e-05, + "loss": 5.5098, + "step": 78150 + }, + { + "epoch": 5.77, + "learning_rate": 2.194955349519342e-05, + "loss": 6.6709, + "step": 78200 + }, + { + "epoch": 5.77, + "learning_rate": 2.1899327982642064e-05, + "loss": 5.8039, + "step": 78250 + }, + { + "epoch": 5.78, + "learning_rate": 2.184910247009071e-05, + "loss": 5.0271, + "step": 78300 + }, + { + "epoch": 5.78, + "learning_rate": 2.1798876957539352e-05, + "loss": 6.1648, + "step": 78350 + }, + { + "epoch": 5.78, + "learning_rate": 2.1748651444987996e-05, + "loss": 5.1838, + "step": 78400 + }, + { + "epoch": 5.79, + "learning_rate": 2.169842593243664e-05, + "loss": 7.1149, + "step": 78450 + }, + { + "epoch": 5.79, + "learning_rate": 2.1648200419885288e-05, + "loss": 5.9544, + "step": 78500 + }, + { + "epoch": 5.79, + "learning_rate": 2.1597974907333932e-05, + "loss": 6.4747, + "step": 78550 + }, + { + "epoch": 5.8, + "learning_rate": 2.1547749394782573e-05, + "loss": 5.6367, + "step": 78600 + }, + { + "epoch": 5.8, + "learning_rate": 2.149752388223122e-05, + "loss": 5.8395, + "step": 78650 + }, + { + "epoch": 5.81, + "learning_rate": 2.1447298369679864e-05, + "loss": 6.8058, + "step": 78700 + }, + { + "epoch": 5.81, + "learning_rate": 2.1397072857128508e-05, + "loss": 6.4977, + "step": 78750 + }, + { + "epoch": 5.81, + "learning_rate": 2.1346847344577152e-05, + "loss": 7.0943, + "step": 78800 + }, + { + "epoch": 5.82, + "learning_rate": 2.12966218320258e-05, + "loss": 6.0009, + "step": 78850 + }, + { + "epoch": 5.82, + "learning_rate": 2.124639631947444e-05, + "loss": 5.8074, + "step": 78900 + }, + { + "epoch": 5.82, + "learning_rate": 2.1196170806923084e-05, + "loss": 6.277, + "step": 78950 + }, + { + "epoch": 5.83, + "learning_rate": 2.114594529437173e-05, + "loss": 5.61, + "step": 79000 + }, + { + "epoch": 5.83, + "learning_rate": 2.1095719781820376e-05, + "loss": 5.6585, + "step": 79050 + }, + { + "epoch": 5.83, + "learning_rate": 2.104549426926902e-05, + "loss": 4.9836, + "step": 79100 + }, + { + "epoch": 5.84, + "learning_rate": 2.099526875671766e-05, + "loss": 6.1327, + "step": 79150 + }, + { + "epoch": 5.84, + "learning_rate": 2.0945043244166308e-05, + "loss": 6.2281, + "step": 79200 + }, + { + "epoch": 5.85, + "learning_rate": 2.0894817731614952e-05, + "loss": 5.9593, + "step": 79250 + }, + { + "epoch": 5.85, + "learning_rate": 2.0844592219063596e-05, + "loss": 5.1415, + "step": 79300 + }, + { + "epoch": 5.85, + "learning_rate": 2.079436670651224e-05, + "loss": 5.719, + "step": 79350 + }, + { + "epoch": 5.86, + "learning_rate": 2.0744141193960888e-05, + "loss": 6.1617, + "step": 79400 + }, + { + "epoch": 5.86, + "learning_rate": 2.069391568140953e-05, + "loss": 6.3103, + "step": 79450 + }, + { + "epoch": 5.86, + "learning_rate": 2.0643690168858172e-05, + "loss": 5.2091, + "step": 79500 + }, + { + "epoch": 5.87, + "learning_rate": 2.059346465630682e-05, + "loss": 6.1573, + "step": 79550 + }, + { + "epoch": 5.87, + "learning_rate": 2.0543239143755464e-05, + "loss": 6.3729, + "step": 79600 + }, + { + "epoch": 5.88, + "learning_rate": 2.0493013631204108e-05, + "loss": 6.1918, + "step": 79650 + }, + { + "epoch": 5.88, + "learning_rate": 2.0442788118652752e-05, + "loss": 6.7536, + "step": 79700 + }, + { + "epoch": 5.88, + "learning_rate": 2.0392562606101396e-05, + "loss": 5.7906, + "step": 79750 + }, + { + "epoch": 5.89, + "learning_rate": 2.034233709355004e-05, + "loss": 6.3394, + "step": 79800 + }, + { + "epoch": 5.89, + "learning_rate": 2.0292111580998684e-05, + "loss": 5.8182, + "step": 79850 + }, + { + "epoch": 5.89, + "learning_rate": 2.0241886068447328e-05, + "loss": 5.8381, + "step": 79900 + }, + { + "epoch": 5.9, + "learning_rate": 2.0191660555895976e-05, + "loss": 6.4559, + "step": 79950 + }, + { + "epoch": 5.9, + "learning_rate": 2.0141435043344616e-05, + "loss": 5.5812, + "step": 80000 + }, + { + "epoch": 5.9, + "eval_loss": 7.74003791809082, + "eval_runtime": 967.3759, + "eval_samples_per_second": 13.54, + "eval_steps_per_second": 3.385, + "eval_wer": 0.19946663306874973, + "step": 80000 + }, + { + "epoch": 5.9, + "learning_rate": 2.009120953079326e-05, + "loss": 5.7727, + "step": 80050 + }, + { + "epoch": 5.91, + "learning_rate": 2.0040984018241908e-05, + "loss": 5.5779, + "step": 80100 + }, + { + "epoch": 5.91, + "learning_rate": 1.9990758505690552e-05, + "loss": 5.9231, + "step": 80150 + }, + { + "epoch": 5.92, + "learning_rate": 1.9940532993139196e-05, + "loss": 5.428, + "step": 80200 + }, + { + "epoch": 5.92, + "learning_rate": 1.989030748058784e-05, + "loss": 5.4097, + "step": 80250 + }, + { + "epoch": 5.92, + "learning_rate": 1.9840081968036484e-05, + "loss": 6.2016, + "step": 80300 + }, + { + "epoch": 5.93, + "learning_rate": 1.9789856455485128e-05, + "loss": 6.3442, + "step": 80350 + }, + { + "epoch": 5.93, + "learning_rate": 1.9739630942933772e-05, + "loss": 5.775, + "step": 80400 + }, + { + "epoch": 5.93, + "learning_rate": 1.968940543038242e-05, + "loss": 6.2853, + "step": 80450 + }, + { + "epoch": 5.94, + "learning_rate": 1.9639179917831064e-05, + "loss": 6.1317, + "step": 80500 + }, + { + "epoch": 5.94, + "learning_rate": 1.9588954405279708e-05, + "loss": 6.1315, + "step": 80550 + }, + { + "epoch": 5.95, + "learning_rate": 1.9538728892728352e-05, + "loss": 5.6609, + "step": 80600 + }, + { + "epoch": 5.95, + "learning_rate": 1.9488503380176996e-05, + "loss": 5.2825, + "step": 80650 + }, + { + "epoch": 5.95, + "learning_rate": 1.943827786762564e-05, + "loss": 6.114, + "step": 80700 + }, + { + "epoch": 5.96, + "learning_rate": 1.9388052355074284e-05, + "loss": 5.5919, + "step": 80750 + }, + { + "epoch": 5.96, + "learning_rate": 1.933782684252293e-05, + "loss": 5.7829, + "step": 80800 + }, + { + "epoch": 5.96, + "learning_rate": 1.9287601329971575e-05, + "loss": 5.2979, + "step": 80850 + }, + { + "epoch": 5.97, + "learning_rate": 1.9237375817420216e-05, + "loss": 6.2099, + "step": 80900 + }, + { + "epoch": 5.97, + "learning_rate": 1.918715030486886e-05, + "loss": 6.3945, + "step": 80950 + }, + { + "epoch": 5.97, + "learning_rate": 1.9136924792317508e-05, + "loss": 6.1131, + "step": 81000 + }, + { + "epoch": 5.98, + "learning_rate": 1.908669927976615e-05, + "loss": 5.7025, + "step": 81050 + }, + { + "epoch": 5.98, + "learning_rate": 1.9036473767214796e-05, + "loss": 6.2167, + "step": 81100 + }, + { + "epoch": 5.99, + "learning_rate": 1.898624825466344e-05, + "loss": 4.7241, + "step": 81150 + }, + { + "epoch": 5.99, + "learning_rate": 1.8936022742112084e-05, + "loss": 5.4894, + "step": 81200 + }, + { + "epoch": 5.99, + "learning_rate": 1.8885797229560728e-05, + "loss": 5.5749, + "step": 81250 + }, + { + "epoch": 6.0, + "learning_rate": 1.8835571717009372e-05, + "loss": 6.286, + "step": 81300 + }, + { + "epoch": 6.0, + "learning_rate": 1.878534620445802e-05, + "loss": 5.5281, + "step": 81350 + }, + { + "epoch": 6.0, + "learning_rate": 1.8735120691906663e-05, + "loss": 5.5189, + "step": 81400 + }, + { + "epoch": 6.01, + "learning_rate": 1.8684895179355304e-05, + "loss": 5.5588, + "step": 81450 + }, + { + "epoch": 6.01, + "learning_rate": 1.863466966680395e-05, + "loss": 4.9543, + "step": 81500 + }, + { + "epoch": 6.02, + "learning_rate": 1.8584444154252596e-05, + "loss": 6.1674, + "step": 81550 + }, + { + "epoch": 6.02, + "learning_rate": 1.853421864170124e-05, + "loss": 5.6677, + "step": 81600 + }, + { + "epoch": 6.02, + "learning_rate": 1.8483993129149884e-05, + "loss": 6.0838, + "step": 81650 + }, + { + "epoch": 6.03, + "learning_rate": 1.843376761659853e-05, + "loss": 5.4898, + "step": 81700 + }, + { + "epoch": 6.03, + "learning_rate": 1.8383542104047172e-05, + "loss": 5.2513, + "step": 81750 + }, + { + "epoch": 6.03, + "learning_rate": 1.8333316591495816e-05, + "loss": 5.2589, + "step": 81800 + }, + { + "epoch": 6.04, + "learning_rate": 1.828309107894446e-05, + "loss": 5.8669, + "step": 81850 + }, + { + "epoch": 6.04, + "learning_rate": 1.8232865566393107e-05, + "loss": 5.1666, + "step": 81900 + }, + { + "epoch": 6.04, + "learning_rate": 1.818264005384175e-05, + "loss": 6.0622, + "step": 81950 + }, + { + "epoch": 6.05, + "learning_rate": 1.8132414541290392e-05, + "loss": 5.9538, + "step": 82000 + }, + { + "epoch": 6.05, + "learning_rate": 1.808218902873904e-05, + "loss": 5.3682, + "step": 82050 + }, + { + "epoch": 6.06, + "learning_rate": 1.8031963516187684e-05, + "loss": 6.2593, + "step": 82100 + }, + { + "epoch": 6.06, + "learning_rate": 1.7981738003636328e-05, + "loss": 5.7516, + "step": 82150 + }, + { + "epoch": 6.06, + "learning_rate": 1.793151249108497e-05, + "loss": 5.034, + "step": 82200 + }, + { + "epoch": 6.07, + "learning_rate": 1.788128697853362e-05, + "loss": 5.9257, + "step": 82250 + }, + { + "epoch": 6.07, + "learning_rate": 1.783106146598226e-05, + "loss": 6.0995, + "step": 82300 + }, + { + "epoch": 6.07, + "learning_rate": 1.7780835953430904e-05, + "loss": 5.6513, + "step": 82350 + }, + { + "epoch": 6.08, + "learning_rate": 1.773061044087955e-05, + "loss": 5.4488, + "step": 82400 + }, + { + "epoch": 6.08, + "learning_rate": 1.7680384928328195e-05, + "loss": 5.9832, + "step": 82450 + }, + { + "epoch": 6.09, + "learning_rate": 1.763015941577684e-05, + "loss": 5.3573, + "step": 82500 + }, + { + "epoch": 6.09, + "learning_rate": 1.7579933903225483e-05, + "loss": 5.7321, + "step": 82550 + }, + { + "epoch": 6.09, + "learning_rate": 1.7529708390674127e-05, + "loss": 6.0602, + "step": 82600 + }, + { + "epoch": 6.1, + "learning_rate": 1.747948287812277e-05, + "loss": 5.6744, + "step": 82650 + }, + { + "epoch": 6.1, + "learning_rate": 1.7429257365571416e-05, + "loss": 4.8128, + "step": 82700 + }, + { + "epoch": 6.1, + "learning_rate": 1.737903185302006e-05, + "loss": 5.271, + "step": 82750 + }, + { + "epoch": 6.11, + "learning_rate": 1.7328806340468707e-05, + "loss": 6.0612, + "step": 82800 + }, + { + "epoch": 6.11, + "learning_rate": 1.727858082791735e-05, + "loss": 6.1573, + "step": 82850 + }, + { + "epoch": 6.11, + "learning_rate": 1.7228355315365992e-05, + "loss": 6.6811, + "step": 82900 + }, + { + "epoch": 6.12, + "learning_rate": 1.717812980281464e-05, + "loss": 5.7212, + "step": 82950 + }, + { + "epoch": 6.12, + "learning_rate": 1.7127904290263283e-05, + "loss": 5.7033, + "step": 83000 + }, + { + "epoch": 6.13, + "learning_rate": 1.7077678777711927e-05, + "loss": 5.6656, + "step": 83050 + }, + { + "epoch": 6.13, + "learning_rate": 1.702745326516057e-05, + "loss": 5.6811, + "step": 83100 + }, + { + "epoch": 6.13, + "learning_rate": 1.6977227752609215e-05, + "loss": 5.8367, + "step": 83150 + }, + { + "epoch": 6.14, + "learning_rate": 1.692700224005786e-05, + "loss": 5.2576, + "step": 83200 + }, + { + "epoch": 6.14, + "learning_rate": 1.6876776727506504e-05, + "loss": 5.7272, + "step": 83250 + }, + { + "epoch": 6.14, + "learning_rate": 1.682655121495515e-05, + "loss": 5.1136, + "step": 83300 + }, + { + "epoch": 6.15, + "learning_rate": 1.6776325702403795e-05, + "loss": 5.7522, + "step": 83350 + }, + { + "epoch": 6.15, + "learning_rate": 1.672610018985244e-05, + "loss": 5.89, + "step": 83400 + }, + { + "epoch": 6.16, + "learning_rate": 1.6675874677301083e-05, + "loss": 6.5674, + "step": 83450 + }, + { + "epoch": 6.16, + "learning_rate": 1.6625649164749727e-05, + "loss": 5.241, + "step": 83500 + }, + { + "epoch": 6.16, + "learning_rate": 1.657542365219837e-05, + "loss": 6.0398, + "step": 83550 + }, + { + "epoch": 6.17, + "learning_rate": 1.6525198139647015e-05, + "loss": 5.3322, + "step": 83600 + }, + { + "epoch": 6.17, + "learning_rate": 1.6474972627095663e-05, + "loss": 6.3029, + "step": 83650 + }, + { + "epoch": 6.17, + "learning_rate": 1.6424747114544307e-05, + "loss": 5.1723, + "step": 83700 + }, + { + "epoch": 6.18, + "learning_rate": 1.6374521601992947e-05, + "loss": 5.5713, + "step": 83750 + }, + { + "epoch": 6.18, + "learning_rate": 1.632429608944159e-05, + "loss": 6.0942, + "step": 83800 + }, + { + "epoch": 6.18, + "learning_rate": 1.627407057689024e-05, + "loss": 5.8515, + "step": 83850 + }, + { + "epoch": 6.19, + "learning_rate": 1.6223845064338883e-05, + "loss": 6.5971, + "step": 83900 + }, + { + "epoch": 6.19, + "learning_rate": 1.6173619551787527e-05, + "loss": 5.87, + "step": 83950 + }, + { + "epoch": 6.2, + "learning_rate": 1.612339403923617e-05, + "loss": 5.1113, + "step": 84000 + }, + { + "epoch": 6.2, + "learning_rate": 1.6073168526684815e-05, + "loss": 5.6073, + "step": 84050 + }, + { + "epoch": 6.2, + "learning_rate": 1.602294301413346e-05, + "loss": 4.9219, + "step": 84100 + }, + { + "epoch": 6.21, + "learning_rate": 1.5972717501582103e-05, + "loss": 5.2715, + "step": 84150 + }, + { + "epoch": 6.21, + "learning_rate": 1.592249198903075e-05, + "loss": 5.2142, + "step": 84200 + }, + { + "epoch": 6.21, + "learning_rate": 1.5872266476479395e-05, + "loss": 5.7238, + "step": 84250 + }, + { + "epoch": 6.22, + "learning_rate": 1.5822040963928035e-05, + "loss": 5.2094, + "step": 84300 + }, + { + "epoch": 6.22, + "learning_rate": 1.5771815451376683e-05, + "loss": 5.6301, + "step": 84350 + }, + { + "epoch": 6.23, + "learning_rate": 1.5721589938825327e-05, + "loss": 6.3736, + "step": 84400 + }, + { + "epoch": 6.23, + "learning_rate": 1.567136442627397e-05, + "loss": 5.3024, + "step": 84450 + }, + { + "epoch": 6.23, + "learning_rate": 1.5621138913722615e-05, + "loss": 5.251, + "step": 84500 + }, + { + "epoch": 6.24, + "learning_rate": 1.5570913401171263e-05, + "loss": 5.0983, + "step": 84550 + }, + { + "epoch": 6.24, + "learning_rate": 1.5520687888619903e-05, + "loss": 5.3273, + "step": 84600 + }, + { + "epoch": 6.24, + "learning_rate": 1.5470462376068547e-05, + "loss": 5.0034, + "step": 84650 + }, + { + "epoch": 6.25, + "learning_rate": 1.542023686351719e-05, + "loss": 6.1106, + "step": 84700 + }, + { + "epoch": 6.25, + "learning_rate": 1.537001135096584e-05, + "loss": 6.0266, + "step": 84750 + }, + { + "epoch": 6.26, + "learning_rate": 1.5319785838414483e-05, + "loss": 5.941, + "step": 84800 + }, + { + "epoch": 6.26, + "learning_rate": 1.5269560325863123e-05, + "loss": 6.0568, + "step": 84850 + }, + { + "epoch": 6.26, + "learning_rate": 1.521933481331177e-05, + "loss": 5.2097, + "step": 84900 + }, + { + "epoch": 6.27, + "learning_rate": 1.5169109300760415e-05, + "loss": 5.66, + "step": 84950 + }, + { + "epoch": 6.27, + "learning_rate": 1.5118883788209059e-05, + "loss": 5.1255, + "step": 85000 + }, + { + "epoch": 6.27, + "learning_rate": 1.5068658275657705e-05, + "loss": 4.3753, + "step": 85050 + }, + { + "epoch": 6.28, + "learning_rate": 1.5018432763106349e-05, + "loss": 5.7544, + "step": 85100 + }, + { + "epoch": 6.28, + "learning_rate": 1.4968207250554991e-05, + "loss": 5.0477, + "step": 85150 + }, + { + "epoch": 6.28, + "learning_rate": 1.4917981738003637e-05, + "loss": 4.7382, + "step": 85200 + }, + { + "epoch": 6.29, + "learning_rate": 1.4867756225452281e-05, + "loss": 6.1112, + "step": 85250 + }, + { + "epoch": 6.29, + "learning_rate": 1.4817530712900927e-05, + "loss": 5.9275, + "step": 85300 + }, + { + "epoch": 6.3, + "learning_rate": 1.476730520034957e-05, + "loss": 4.9116, + "step": 85350 + }, + { + "epoch": 6.3, + "learning_rate": 1.4717079687798217e-05, + "loss": 5.3206, + "step": 85400 + }, + { + "epoch": 6.3, + "learning_rate": 1.4666854175246857e-05, + "loss": 5.1605, + "step": 85450 + }, + { + "epoch": 6.31, + "learning_rate": 1.4616628662695503e-05, + "loss": 5.2704, + "step": 85500 + }, + { + "epoch": 6.31, + "learning_rate": 1.4566403150144147e-05, + "loss": 5.4004, + "step": 85550 + }, + { + "epoch": 6.31, + "learning_rate": 1.4516177637592793e-05, + "loss": 5.1312, + "step": 85600 + }, + { + "epoch": 6.32, + "learning_rate": 1.4465952125041437e-05, + "loss": 5.4173, + "step": 85650 + }, + { + "epoch": 6.32, + "learning_rate": 1.4415726612490083e-05, + "loss": 5.5278, + "step": 85700 + }, + { + "epoch": 6.33, + "learning_rate": 1.4365501099938725e-05, + "loss": 6.4264, + "step": 85750 + }, + { + "epoch": 6.33, + "learning_rate": 1.4315275587387369e-05, + "loss": 6.233, + "step": 85800 + }, + { + "epoch": 6.33, + "learning_rate": 1.4265050074836015e-05, + "loss": 5.6656, + "step": 85850 + }, + { + "epoch": 6.34, + "learning_rate": 1.4214824562284659e-05, + "loss": 5.3803, + "step": 85900 + }, + { + "epoch": 6.34, + "learning_rate": 1.4164599049733305e-05, + "loss": 6.1274, + "step": 85950 + }, + { + "epoch": 6.34, + "learning_rate": 1.4114373537181947e-05, + "loss": 5.2657, + "step": 86000 + }, + { + "epoch": 6.35, + "learning_rate": 1.4064148024630591e-05, + "loss": 6.0392, + "step": 86050 + }, + { + "epoch": 6.35, + "learning_rate": 1.4013922512079237e-05, + "loss": 5.1692, + "step": 86100 + }, + { + "epoch": 6.35, + "learning_rate": 1.396369699952788e-05, + "loss": 5.1826, + "step": 86150 + }, + { + "epoch": 6.36, + "learning_rate": 1.3913471486976526e-05, + "loss": 5.5917, + "step": 86200 + }, + { + "epoch": 6.36, + "learning_rate": 1.386324597442517e-05, + "loss": 5.1852, + "step": 86250 + }, + { + "epoch": 6.37, + "learning_rate": 1.3813020461873813e-05, + "loss": 5.1779, + "step": 86300 + }, + { + "epoch": 6.37, + "learning_rate": 1.3762794949322459e-05, + "loss": 5.6655, + "step": 86350 + }, + { + "epoch": 6.37, + "learning_rate": 1.3712569436771103e-05, + "loss": 5.6346, + "step": 86400 + }, + { + "epoch": 6.38, + "learning_rate": 1.3662343924219748e-05, + "loss": 6.4509, + "step": 86450 + }, + { + "epoch": 6.38, + "learning_rate": 1.3612118411668392e-05, + "loss": 6.6129, + "step": 86500 + }, + { + "epoch": 6.38, + "learning_rate": 1.3561892899117038e-05, + "loss": 5.1282, + "step": 86550 + }, + { + "epoch": 6.39, + "learning_rate": 1.3511667386565679e-05, + "loss": 6.06, + "step": 86600 + }, + { + "epoch": 6.39, + "learning_rate": 1.3461441874014325e-05, + "loss": 6.2118, + "step": 86650 + }, + { + "epoch": 6.4, + "learning_rate": 1.3411216361462969e-05, + "loss": 5.9067, + "step": 86700 + }, + { + "epoch": 6.4, + "learning_rate": 1.3360990848911614e-05, + "loss": 6.5174, + "step": 86750 + }, + { + "epoch": 6.4, + "learning_rate": 1.3310765336360259e-05, + "loss": 6.0514, + "step": 86800 + }, + { + "epoch": 6.41, + "learning_rate": 1.3260539823808901e-05, + "loss": 6.2585, + "step": 86850 + }, + { + "epoch": 6.41, + "learning_rate": 1.3210314311257547e-05, + "loss": 5.8812, + "step": 86900 + }, + { + "epoch": 6.41, + "learning_rate": 1.316008879870619e-05, + "loss": 5.7082, + "step": 86950 + }, + { + "epoch": 6.42, + "learning_rate": 1.3109863286154836e-05, + "loss": 5.9844, + "step": 87000 + }, + { + "epoch": 6.42, + "learning_rate": 1.305963777360348e-05, + "loss": 5.2375, + "step": 87050 + }, + { + "epoch": 6.42, + "learning_rate": 1.3009412261052126e-05, + "loss": 5.8796, + "step": 87100 + }, + { + "epoch": 6.43, + "learning_rate": 1.2959186748500769e-05, + "loss": 5.0831, + "step": 87150 + }, + { + "epoch": 6.43, + "learning_rate": 1.2908961235949413e-05, + "loss": 5.9476, + "step": 87200 + }, + { + "epoch": 6.44, + "learning_rate": 1.2858735723398058e-05, + "loss": 5.9525, + "step": 87250 + }, + { + "epoch": 6.44, + "learning_rate": 1.2808510210846702e-05, + "loss": 4.9682, + "step": 87300 + }, + { + "epoch": 6.44, + "learning_rate": 1.2758284698295348e-05, + "loss": 5.8342, + "step": 87350 + }, + { + "epoch": 6.45, + "learning_rate": 1.2708059185743992e-05, + "loss": 5.2582, + "step": 87400 + }, + { + "epoch": 6.45, + "learning_rate": 1.2657833673192635e-05, + "loss": 5.6833, + "step": 87450 + }, + { + "epoch": 6.45, + "learning_rate": 1.2607608160641279e-05, + "loss": 6.4167, + "step": 87500 + }, + { + "epoch": 6.46, + "learning_rate": 1.2557382648089924e-05, + "loss": 6.0258, + "step": 87550 + }, + { + "epoch": 6.46, + "learning_rate": 1.2507157135538568e-05, + "loss": 6.4125, + "step": 87600 + }, + { + "epoch": 6.47, + "learning_rate": 1.2456931622987213e-05, + "loss": 5.5319, + "step": 87650 + }, + { + "epoch": 6.47, + "learning_rate": 1.2406706110435858e-05, + "loss": 5.169, + "step": 87700 + }, + { + "epoch": 6.47, + "learning_rate": 1.2356480597884502e-05, + "loss": 4.8537, + "step": 87750 + }, + { + "epoch": 6.48, + "learning_rate": 1.2306255085333146e-05, + "loss": 5.6043, + "step": 87800 + }, + { + "epoch": 6.48, + "learning_rate": 1.225602957278179e-05, + "loss": 5.1859, + "step": 87850 + }, + { + "epoch": 6.48, + "learning_rate": 1.2205804060230436e-05, + "loss": 4.814, + "step": 87900 + }, + { + "epoch": 6.49, + "learning_rate": 1.2155578547679079e-05, + "loss": 5.4293, + "step": 87950 + }, + { + "epoch": 6.49, + "learning_rate": 1.2105353035127724e-05, + "loss": 6.1502, + "step": 88000 + }, + { + "epoch": 6.49, + "learning_rate": 1.2055127522576368e-05, + "loss": 6.1367, + "step": 88050 + }, + { + "epoch": 6.5, + "learning_rate": 1.2004902010025012e-05, + "loss": 6.2755, + "step": 88100 + }, + { + "epoch": 6.5, + "learning_rate": 1.1954676497473658e-05, + "loss": 5.211, + "step": 88150 + }, + { + "epoch": 6.51, + "learning_rate": 1.1904450984922302e-05, + "loss": 5.2505, + "step": 88200 + }, + { + "epoch": 6.51, + "learning_rate": 1.1854225472370946e-05, + "loss": 6.7278, + "step": 88250 + }, + { + "epoch": 6.51, + "learning_rate": 1.180399995981959e-05, + "loss": 6.1237, + "step": 88300 + }, + { + "epoch": 6.52, + "learning_rate": 1.1753774447268234e-05, + "loss": 6.1895, + "step": 88350 + }, + { + "epoch": 6.52, + "learning_rate": 1.1703548934716878e-05, + "loss": 5.1235, + "step": 88400 + }, + { + "epoch": 6.52, + "learning_rate": 1.1653323422165524e-05, + "loss": 5.9953, + "step": 88450 + }, + { + "epoch": 6.53, + "learning_rate": 1.1603097909614168e-05, + "loss": 6.2782, + "step": 88500 + }, + { + "epoch": 6.53, + "learning_rate": 1.1552872397062812e-05, + "loss": 5.3504, + "step": 88550 + }, + { + "epoch": 6.54, + "learning_rate": 1.1502646884511458e-05, + "loss": 5.7504, + "step": 88600 + }, + { + "epoch": 6.54, + "learning_rate": 1.14524213719601e-05, + "loss": 5.3578, + "step": 88650 + }, + { + "epoch": 6.54, + "learning_rate": 1.1402195859408746e-05, + "loss": 5.0383, + "step": 88700 + }, + { + "epoch": 6.55, + "learning_rate": 1.135197034685739e-05, + "loss": 5.9042, + "step": 88750 + }, + { + "epoch": 6.55, + "learning_rate": 1.1301744834306034e-05, + "loss": 5.155, + "step": 88800 + }, + { + "epoch": 6.55, + "learning_rate": 1.1251519321754678e-05, + "loss": 5.9763, + "step": 88850 + }, + { + "epoch": 6.56, + "learning_rate": 1.1201293809203324e-05, + "loss": 5.6412, + "step": 88900 + }, + { + "epoch": 6.56, + "learning_rate": 1.1151068296651968e-05, + "loss": 5.4027, + "step": 88950 + }, + { + "epoch": 6.56, + "learning_rate": 1.1100842784100612e-05, + "loss": 5.3177, + "step": 89000 + }, + { + "epoch": 6.57, + "learning_rate": 1.1050617271549258e-05, + "loss": 5.5425, + "step": 89050 + }, + { + "epoch": 6.57, + "learning_rate": 1.10003917589979e-05, + "loss": 5.1199, + "step": 89100 + }, + { + "epoch": 6.58, + "learning_rate": 1.0950166246446546e-05, + "loss": 5.2852, + "step": 89150 + }, + { + "epoch": 6.58, + "learning_rate": 1.089994073389519e-05, + "loss": 5.905, + "step": 89200 + }, + { + "epoch": 6.58, + "learning_rate": 1.0849715221343834e-05, + "loss": 5.7968, + "step": 89250 + }, + { + "epoch": 6.59, + "learning_rate": 1.079948970879248e-05, + "loss": 6.0684, + "step": 89300 + }, + { + "epoch": 6.59, + "learning_rate": 1.0749264196241122e-05, + "loss": 4.8366, + "step": 89350 + }, + { + "epoch": 6.59, + "learning_rate": 1.0699038683689768e-05, + "loss": 6.2994, + "step": 89400 + }, + { + "epoch": 6.6, + "learning_rate": 1.0648813171138412e-05, + "loss": 5.2661, + "step": 89450 + }, + { + "epoch": 6.6, + "learning_rate": 1.0598587658587056e-05, + "loss": 6.3783, + "step": 89500 + }, + { + "epoch": 6.61, + "learning_rate": 1.05483621460357e-05, + "loss": 5.4387, + "step": 89550 + }, + { + "epoch": 6.61, + "learning_rate": 1.0498136633484346e-05, + "loss": 5.3958, + "step": 89600 + }, + { + "epoch": 6.61, + "learning_rate": 1.044791112093299e-05, + "loss": 5.2108, + "step": 89650 + }, + { + "epoch": 6.62, + "learning_rate": 1.0397685608381634e-05, + "loss": 5.9092, + "step": 89700 + }, + { + "epoch": 6.62, + "learning_rate": 1.034746009583028e-05, + "loss": 5.8141, + "step": 89750 + }, + { + "epoch": 6.62, + "learning_rate": 1.0297234583278922e-05, + "loss": 6.1024, + "step": 89800 + }, + { + "epoch": 6.63, + "learning_rate": 1.0247009070727568e-05, + "loss": 5.5871, + "step": 89850 + }, + { + "epoch": 6.63, + "learning_rate": 1.0196783558176212e-05, + "loss": 5.323, + "step": 89900 + }, + { + "epoch": 6.63, + "learning_rate": 1.0146558045624856e-05, + "loss": 5.9425, + "step": 89950 + }, + { + "epoch": 6.64, + "learning_rate": 1.00963325330735e-05, + "loss": 5.7013, + "step": 90000 + }, + { + "epoch": 6.64, + "learning_rate": 1.0046107020522146e-05, + "loss": 5.701, + "step": 90050 + }, + { + "epoch": 6.65, + "learning_rate": 9.99588150797079e-06, + "loss": 6.146, + "step": 90100 + }, + { + "epoch": 6.65, + "learning_rate": 9.945655995419434e-06, + "loss": 5.4734, + "step": 90150 + }, + { + "epoch": 6.65, + "learning_rate": 9.89543048286808e-06, + "loss": 5.8372, + "step": 90200 + }, + { + "epoch": 6.66, + "learning_rate": 9.845204970316722e-06, + "loss": 6.0749, + "step": 90250 + }, + { + "epoch": 6.66, + "learning_rate": 9.794979457765368e-06, + "loss": 5.2792, + "step": 90300 + }, + { + "epoch": 6.66, + "learning_rate": 9.74475394521401e-06, + "loss": 5.759, + "step": 90350 + }, + { + "epoch": 6.67, + "learning_rate": 9.694528432662656e-06, + "loss": 4.975, + "step": 90400 + }, + { + "epoch": 6.67, + "learning_rate": 9.6443029201113e-06, + "loss": 6.1866, + "step": 90450 + }, + { + "epoch": 6.68, + "learning_rate": 9.594077407559944e-06, + "loss": 6.1507, + "step": 90500 + }, + { + "epoch": 6.68, + "learning_rate": 9.54385189500859e-06, + "loss": 5.9567, + "step": 90550 + }, + { + "epoch": 6.68, + "learning_rate": 9.493626382457234e-06, + "loss": 5.8165, + "step": 90600 + }, + { + "epoch": 6.69, + "learning_rate": 9.443400869905878e-06, + "loss": 5.3155, + "step": 90650 + }, + { + "epoch": 6.69, + "learning_rate": 9.393175357354522e-06, + "loss": 5.5206, + "step": 90700 + }, + { + "epoch": 6.69, + "learning_rate": 9.342949844803168e-06, + "loss": 4.7732, + "step": 90750 + }, + { + "epoch": 6.7, + "learning_rate": 9.29272433225181e-06, + "loss": 5.5809, + "step": 90800 + }, + { + "epoch": 6.7, + "learning_rate": 9.242498819700456e-06, + "loss": 5.7061, + "step": 90850 + }, + { + "epoch": 6.71, + "learning_rate": 9.1922733071491e-06, + "loss": 6.1421, + "step": 90900 + }, + { + "epoch": 6.71, + "learning_rate": 9.142047794597744e-06, + "loss": 5.4665, + "step": 90950 + }, + { + "epoch": 6.71, + "learning_rate": 9.09182228204639e-06, + "loss": 5.4408, + "step": 91000 + }, + { + "epoch": 6.72, + "learning_rate": 9.041596769495034e-06, + "loss": 5.3826, + "step": 91050 + }, + { + "epoch": 6.72, + "learning_rate": 8.991371256943678e-06, + "loss": 5.9068, + "step": 91100 + }, + { + "epoch": 6.72, + "learning_rate": 8.941145744392322e-06, + "loss": 5.1764, + "step": 91150 + }, + { + "epoch": 6.73, + "learning_rate": 8.890920231840966e-06, + "loss": 5.2145, + "step": 91200 + }, + { + "epoch": 6.73, + "learning_rate": 8.84069471928961e-06, + "loss": 5.5649, + "step": 91250 + }, + { + "epoch": 6.73, + "learning_rate": 8.790469206738256e-06, + "loss": 6.5099, + "step": 91300 + }, + { + "epoch": 6.74, + "learning_rate": 8.7402436941869e-06, + "loss": 5.3549, + "step": 91350 + }, + { + "epoch": 6.74, + "learning_rate": 8.690018181635544e-06, + "loss": 5.5097, + "step": 91400 + }, + { + "epoch": 6.75, + "learning_rate": 8.63979266908419e-06, + "loss": 6.265, + "step": 91450 + }, + { + "epoch": 6.75, + "learning_rate": 8.589567156532832e-06, + "loss": 5.3955, + "step": 91500 + }, + { + "epoch": 6.75, + "learning_rate": 8.539341643981478e-06, + "loss": 6.4223, + "step": 91550 + }, + { + "epoch": 6.76, + "learning_rate": 8.489116131430122e-06, + "loss": 5.8701, + "step": 91600 + }, + { + "epoch": 6.76, + "learning_rate": 8.438890618878766e-06, + "loss": 5.7781, + "step": 91650 + }, + { + "epoch": 6.76, + "learning_rate": 8.38866510632741e-06, + "loss": 5.522, + "step": 91700 + }, + { + "epoch": 6.77, + "learning_rate": 8.338439593776055e-06, + "loss": 5.6781, + "step": 91750 + }, + { + "epoch": 6.77, + "learning_rate": 8.2882140812247e-06, + "loss": 5.5512, + "step": 91800 + }, + { + "epoch": 6.78, + "learning_rate": 8.237988568673344e-06, + "loss": 5.4052, + "step": 91850 + }, + { + "epoch": 6.78, + "learning_rate": 8.18776305612199e-06, + "loss": 6.6671, + "step": 91900 + }, + { + "epoch": 6.78, + "learning_rate": 8.137537543570632e-06, + "loss": 5.2754, + "step": 91950 + }, + { + "epoch": 6.79, + "learning_rate": 8.087312031019277e-06, + "loss": 6.2486, + "step": 92000 + }, + { + "epoch": 6.79, + "learning_rate": 8.037086518467921e-06, + "loss": 5.0945, + "step": 92050 + }, + { + "epoch": 6.79, + "learning_rate": 7.986861005916565e-06, + "loss": 5.3328, + "step": 92100 + }, + { + "epoch": 6.8, + "learning_rate": 7.936635493365211e-06, + "loss": 5.1418, + "step": 92150 + }, + { + "epoch": 6.8, + "learning_rate": 7.886409980813854e-06, + "loss": 5.1135, + "step": 92200 + }, + { + "epoch": 6.8, + "learning_rate": 7.8361844682625e-06, + "loss": 5.5731, + "step": 92250 + }, + { + "epoch": 6.81, + "learning_rate": 7.785958955711143e-06, + "loss": 5.4876, + "step": 92300 + }, + { + "epoch": 6.81, + "learning_rate": 7.735733443159787e-06, + "loss": 4.4139, + "step": 92350 + }, + { + "epoch": 6.82, + "learning_rate": 7.685507930608432e-06, + "loss": 5.5843, + "step": 92400 + }, + { + "epoch": 6.82, + "learning_rate": 7.635282418057077e-06, + "loss": 5.8209, + "step": 92450 + }, + { + "epoch": 6.82, + "learning_rate": 7.5850569055057205e-06, + "loss": 5.1681, + "step": 92500 + }, + { + "epoch": 6.83, + "learning_rate": 7.534831392954365e-06, + "loss": 5.2068, + "step": 92550 + }, + { + "epoch": 6.83, + "learning_rate": 7.48460588040301e-06, + "loss": 6.0487, + "step": 92600 + }, + { + "epoch": 6.83, + "learning_rate": 7.4343803678516535e-06, + "loss": 5.1479, + "step": 92650 + }, + { + "epoch": 6.84, + "learning_rate": 7.384154855300298e-06, + "loss": 5.7305, + "step": 92700 + }, + { + "epoch": 6.84, + "learning_rate": 7.333929342748943e-06, + "loss": 5.1534, + "step": 92750 + }, + { + "epoch": 6.85, + "learning_rate": 7.283703830197587e-06, + "loss": 6.7006, + "step": 92800 + }, + { + "epoch": 6.85, + "learning_rate": 7.233478317646232e-06, + "loss": 6.093, + "step": 92850 + }, + { + "epoch": 6.85, + "learning_rate": 7.183252805094877e-06, + "loss": 5.3032, + "step": 92900 + }, + { + "epoch": 6.86, + "learning_rate": 7.13302729254352e-06, + "loss": 5.417, + "step": 92950 + }, + { + "epoch": 6.86, + "learning_rate": 7.082801779992165e-06, + "loss": 5.2938, + "step": 93000 + }, + { + "epoch": 6.86, + "learning_rate": 7.03257626744081e-06, + "loss": 5.0712, + "step": 93050 + }, + { + "epoch": 6.87, + "learning_rate": 6.982350754889453e-06, + "loss": 5.4341, + "step": 93100 + }, + { + "epoch": 6.87, + "learning_rate": 6.932125242338098e-06, + "loss": 4.828, + "step": 93150 + }, + { + "epoch": 6.87, + "learning_rate": 6.881899729786742e-06, + "loss": 5.5724, + "step": 93200 + }, + { + "epoch": 6.88, + "learning_rate": 6.831674217235387e-06, + "loss": 4.9367, + "step": 93250 + }, + { + "epoch": 6.88, + "learning_rate": 6.781448704684032e-06, + "loss": 5.3421, + "step": 93300 + }, + { + "epoch": 6.89, + "learning_rate": 6.731223192132675e-06, + "loss": 5.5862, + "step": 93350 + }, + { + "epoch": 6.89, + "learning_rate": 6.68099767958132e-06, + "loss": 4.857, + "step": 93400 + }, + { + "epoch": 6.89, + "learning_rate": 6.630772167029965e-06, + "loss": 5.4573, + "step": 93450 + }, + { + "epoch": 6.9, + "learning_rate": 6.580546654478609e-06, + "loss": 5.536, + "step": 93500 + }, + { + "epoch": 6.9, + "learning_rate": 6.530321141927253e-06, + "loss": 5.818, + "step": 93550 + }, + { + "epoch": 6.9, + "learning_rate": 6.480095629375898e-06, + "loss": 5.3187, + "step": 93600 + }, + { + "epoch": 6.91, + "learning_rate": 6.429870116824542e-06, + "loss": 5.2464, + "step": 93650 + }, + { + "epoch": 6.91, + "learning_rate": 6.379644604273187e-06, + "loss": 5.4407, + "step": 93700 + }, + { + "epoch": 6.92, + "learning_rate": 6.329419091721832e-06, + "loss": 5.2338, + "step": 93750 + }, + { + "epoch": 6.92, + "learning_rate": 6.279193579170475e-06, + "loss": 5.4482, + "step": 93800 + }, + { + "epoch": 6.92, + "learning_rate": 6.22896806661912e-06, + "loss": 5.1899, + "step": 93850 + }, + { + "epoch": 6.93, + "learning_rate": 6.178742554067764e-06, + "loss": 5.4041, + "step": 93900 + }, + { + "epoch": 6.93, + "learning_rate": 6.128517041516409e-06, + "loss": 6.1085, + "step": 93950 + }, + { + "epoch": 6.93, + "learning_rate": 6.078291528965054e-06, + "loss": 5.363, + "step": 94000 + }, + { + "epoch": 6.94, + "learning_rate": 6.028066016413698e-06, + "loss": 6.0109, + "step": 94050 + }, + { + "epoch": 6.94, + "learning_rate": 5.977840503862342e-06, + "loss": 5.5074, + "step": 94100 + }, + { + "epoch": 6.94, + "learning_rate": 5.927614991310987e-06, + "loss": 5.831, + "step": 94150 + }, + { + "epoch": 6.95, + "learning_rate": 5.877389478759631e-06, + "loss": 6.5099, + "step": 94200 + }, + { + "epoch": 6.95, + "learning_rate": 5.827163966208275e-06, + "loss": 5.213, + "step": 94250 + }, + { + "epoch": 6.96, + "learning_rate": 5.77693845365692e-06, + "loss": 5.3367, + "step": 94300 + }, + { + "epoch": 6.96, + "learning_rate": 5.726712941105564e-06, + "loss": 5.6918, + "step": 94350 + }, + { + "epoch": 6.96, + "learning_rate": 5.676487428554209e-06, + "loss": 5.8001, + "step": 94400 + }, + { + "epoch": 6.97, + "learning_rate": 5.626261916002853e-06, + "loss": 4.8384, + "step": 94450 + }, + { + "epoch": 6.97, + "learning_rate": 5.576036403451498e-06, + "loss": 5.9708, + "step": 94500 + }, + { + "epoch": 6.97, + "learning_rate": 5.525810890900142e-06, + "loss": 5.1562, + "step": 94550 + }, + { + "epoch": 6.98, + "learning_rate": 5.475585378348786e-06, + "loss": 6.0821, + "step": 94600 + }, + { + "epoch": 6.98, + "learning_rate": 5.425359865797431e-06, + "loss": 6.4639, + "step": 94650 + }, + { + "epoch": 6.99, + "learning_rate": 5.375134353246075e-06, + "loss": 5.8299, + "step": 94700 + }, + { + "epoch": 6.99, + "learning_rate": 5.324908840694719e-06, + "loss": 5.219, + "step": 94750 + }, + { + "epoch": 6.99, + "learning_rate": 5.274683328143364e-06, + "loss": 4.9022, + "step": 94800 + }, + { + "epoch": 7.0, + "learning_rate": 5.224457815592009e-06, + "loss": 5.8906, + "step": 94850 + }, + { + "epoch": 7.0, + "learning_rate": 5.174232303040653e-06, + "loss": 4.7464, + "step": 94900 + }, + { + "epoch": 7.0, + "learning_rate": 5.124006790489297e-06, + "loss": 5.2756, + "step": 94950 + }, + { + "epoch": 7.01, + "learning_rate": 5.073781277937942e-06, + "loss": 5.7393, + "step": 95000 + }, + { + "epoch": 7.01, + "learning_rate": 5.023555765386586e-06, + "loss": 5.8585, + "step": 95050 + }, + { + "epoch": 7.01, + "learning_rate": 4.97333025283523e-06, + "loss": 5.0746, + "step": 95100 + }, + { + "epoch": 7.02, + "learning_rate": 4.923104740283875e-06, + "loss": 5.2644, + "step": 95150 + }, + { + "epoch": 7.02, + "learning_rate": 4.87287922773252e-06, + "loss": 5.3518, + "step": 95200 + }, + { + "epoch": 7.03, + "learning_rate": 4.822653715181164e-06, + "loss": 5.6309, + "step": 95250 + }, + { + "epoch": 7.03, + "learning_rate": 4.772428202629809e-06, + "loss": 5.0614, + "step": 95300 + }, + { + "epoch": 7.03, + "learning_rate": 4.722202690078453e-06, + "loss": 5.6014, + "step": 95350 + }, + { + "epoch": 7.04, + "learning_rate": 4.671977177527097e-06, + "loss": 5.9759, + "step": 95400 + }, + { + "epoch": 7.04, + "learning_rate": 4.621751664975741e-06, + "loss": 5.6206, + "step": 95450 + }, + { + "epoch": 7.04, + "learning_rate": 4.571526152424386e-06, + "loss": 5.6578, + "step": 95500 + }, + { + "epoch": 7.05, + "learning_rate": 4.52130063987303e-06, + "loss": 4.8626, + "step": 95550 + }, + { + "epoch": 7.05, + "learning_rate": 4.471075127321675e-06, + "loss": 5.1913, + "step": 95600 + }, + { + "epoch": 7.06, + "learning_rate": 4.4208496147703196e-06, + "loss": 5.3974, + "step": 95650 + }, + { + "epoch": 7.06, + "learning_rate": 4.370624102218964e-06, + "loss": 5.0069, + "step": 95700 + }, + { + "epoch": 7.06, + "learning_rate": 4.320398589667608e-06, + "loss": 5.2273, + "step": 95750 + }, + { + "epoch": 7.07, + "learning_rate": 4.270173077116253e-06, + "loss": 6.3129, + "step": 95800 + }, + { + "epoch": 7.07, + "learning_rate": 4.219947564564897e-06, + "loss": 5.6421, + "step": 95850 + }, + { + "epoch": 7.07, + "learning_rate": 4.169722052013541e-06, + "loss": 5.9209, + "step": 95900 + }, + { + "epoch": 7.08, + "learning_rate": 4.119496539462185e-06, + "loss": 5.3418, + "step": 95950 + }, + { + "epoch": 7.08, + "learning_rate": 4.06927102691083e-06, + "loss": 5.1524, + "step": 96000 + }, + { + "epoch": 7.08, + "learning_rate": 4.0190455143594745e-06, + "loss": 5.8734, + "step": 96050 + }, + { + "epoch": 7.09, + "learning_rate": 3.968820001808119e-06, + "loss": 5.1922, + "step": 96100 + }, + { + "epoch": 7.09, + "learning_rate": 3.9185944892567635e-06, + "loss": 5.5077, + "step": 96150 + }, + { + "epoch": 7.1, + "learning_rate": 3.8683689767054076e-06, + "loss": 5.1205, + "step": 96200 + }, + { + "epoch": 7.1, + "learning_rate": 3.818143464154052e-06, + "loss": 5.6354, + "step": 96250 + }, + { + "epoch": 7.1, + "learning_rate": 3.7679179516026965e-06, + "loss": 5.4505, + "step": 96300 + }, + { + "epoch": 7.11, + "learning_rate": 3.717692439051341e-06, + "loss": 5.5936, + "step": 96350 + }, + { + "epoch": 7.11, + "learning_rate": 3.667466926499985e-06, + "loss": 6.0097, + "step": 96400 + }, + { + "epoch": 7.11, + "learning_rate": 3.617241413948629e-06, + "loss": 6.1105, + "step": 96450 + }, + { + "epoch": 7.12, + "learning_rate": 3.567015901397274e-06, + "loss": 6.4328, + "step": 96500 + }, + { + "epoch": 7.12, + "learning_rate": 3.5167903888459185e-06, + "loss": 5.0374, + "step": 96550 + }, + { + "epoch": 7.13, + "learning_rate": 3.4665648762945625e-06, + "loss": 5.6259, + "step": 96600 + }, + { + "epoch": 7.13, + "learning_rate": 3.4163393637432074e-06, + "loss": 5.2756, + "step": 96650 + }, + { + "epoch": 7.13, + "learning_rate": 3.3661138511918515e-06, + "loss": 5.4218, + "step": 96700 + }, + { + "epoch": 7.14, + "learning_rate": 3.315888338640496e-06, + "loss": 5.4047, + "step": 96750 + }, + { + "epoch": 7.14, + "learning_rate": 3.265662826089141e-06, + "loss": 5.6143, + "step": 96800 + }, + { + "epoch": 7.14, + "learning_rate": 3.215437313537785e-06, + "loss": 5.3633, + "step": 96850 + }, + { + "epoch": 7.15, + "learning_rate": 3.165211800986429e-06, + "loss": 4.9261, + "step": 96900 + }, + { + "epoch": 7.15, + "learning_rate": 3.114986288435074e-06, + "loss": 5.4425, + "step": 96950 + }, + { + "epoch": 7.15, + "learning_rate": 3.064760775883718e-06, + "loss": 4.9883, + "step": 97000 + }, + { + "epoch": 7.16, + "learning_rate": 3.0145352633323624e-06, + "loss": 4.4753, + "step": 97050 + }, + { + "epoch": 7.16, + "learning_rate": 2.964309750781007e-06, + "loss": 5.3568, + "step": 97100 + }, + { + "epoch": 7.17, + "learning_rate": 2.9140842382296514e-06, + "loss": 5.2898, + "step": 97150 + }, + { + "epoch": 7.17, + "learning_rate": 2.863858725678296e-06, + "loss": 5.2691, + "step": 97200 + }, + { + "epoch": 7.17, + "learning_rate": 2.81363321312694e-06, + "loss": 4.6615, + "step": 97250 + }, + { + "epoch": 7.18, + "learning_rate": 2.7634077005755844e-06, + "loss": 5.2432, + "step": 97300 + }, + { + "epoch": 7.18, + "learning_rate": 2.713182188024229e-06, + "loss": 4.7523, + "step": 97350 + }, + { + "epoch": 7.18, + "learning_rate": 2.6629566754728733e-06, + "loss": 6.4066, + "step": 97400 + }, + { + "epoch": 7.19, + "learning_rate": 2.612731162921518e-06, + "loss": 4.928, + "step": 97450 + }, + { + "epoch": 7.19, + "learning_rate": 2.562505650370162e-06, + "loss": 5.6982, + "step": 97500 + }, + { + "epoch": 7.2, + "learning_rate": 2.5122801378188068e-06, + "loss": 5.1039, + "step": 97550 + }, + { + "epoch": 7.2, + "learning_rate": 2.4620546252674512e-06, + "loss": 5.573, + "step": 97600 + }, + { + "epoch": 7.2, + "learning_rate": 2.4118291127160953e-06, + "loss": 5.3904, + "step": 97650 + }, + { + "epoch": 7.21, + "learning_rate": 2.3616036001647398e-06, + "loss": 5.0434, + "step": 97700 + }, + { + "epoch": 7.21, + "learning_rate": 2.3113780876133842e-06, + "loss": 5.1437, + "step": 97750 + }, + { + "epoch": 7.21, + "learning_rate": 2.2611525750620287e-06, + "loss": 5.2984, + "step": 97800 + }, + { + "epoch": 7.22, + "learning_rate": 2.210927062510673e-06, + "loss": 4.6668, + "step": 97850 + }, + { + "epoch": 7.22, + "learning_rate": 2.1607015499593172e-06, + "loss": 4.0901, + "step": 97900 + }, + { + "epoch": 7.23, + "learning_rate": 2.1104760374079617e-06, + "loss": 5.1942, + "step": 97950 + }, + { + "epoch": 7.23, + "learning_rate": 2.060250524856606e-06, + "loss": 5.2536, + "step": 98000 + }, + { + "epoch": 7.23, + "learning_rate": 2.0100250123052507e-06, + "loss": 5.6535, + "step": 98050 + }, + { + "epoch": 7.24, + "learning_rate": 1.959799499753895e-06, + "loss": 4.9945, + "step": 98100 + }, + { + "epoch": 7.24, + "learning_rate": 1.9095739872025396e-06, + "loss": 5.3495, + "step": 98150 + }, + { + "epoch": 7.24, + "learning_rate": 1.859348474651184e-06, + "loss": 5.2008, + "step": 98200 + }, + { + "epoch": 7.25, + "learning_rate": 1.8091229620998282e-06, + "loss": 5.2892, + "step": 98250 + }, + { + "epoch": 7.25, + "learning_rate": 1.7588974495484726e-06, + "loss": 5.3336, + "step": 98300 + }, + { + "epoch": 7.25, + "learning_rate": 1.7086719369971173e-06, + "loss": 5.6491, + "step": 98350 + }, + { + "epoch": 7.26, + "learning_rate": 1.6584464244457614e-06, + "loss": 6.371, + "step": 98400 + }, + { + "epoch": 7.26, + "learning_rate": 1.608220911894406e-06, + "loss": 5.6378, + "step": 98450 + }, + { + "epoch": 7.27, + "learning_rate": 1.5579953993430503e-06, + "loss": 5.3871, + "step": 98500 + }, + { + "epoch": 7.27, + "learning_rate": 1.5077698867916948e-06, + "loss": 5.5656, + "step": 98550 + }, + { + "epoch": 7.27, + "learning_rate": 1.457544374240339e-06, + "loss": 5.2364, + "step": 98600 + }, + { + "epoch": 7.28, + "learning_rate": 1.4073188616889836e-06, + "loss": 4.9547, + "step": 98650 + }, + { + "epoch": 7.28, + "learning_rate": 1.357093349137628e-06, + "loss": 5.0353, + "step": 98700 + }, + { + "epoch": 7.28, + "learning_rate": 1.3068678365862725e-06, + "loss": 5.2683, + "step": 98750 + }, + { + "epoch": 7.29, + "learning_rate": 1.2566423240349168e-06, + "loss": 5.4399, + "step": 98800 + }, + { + "epoch": 7.29, + "learning_rate": 1.2064168114835613e-06, + "loss": 4.6897, + "step": 98850 + }, + { + "epoch": 7.3, + "learning_rate": 1.1561912989322055e-06, + "loss": 5.4834, + "step": 98900 + }, + { + "epoch": 7.3, + "learning_rate": 1.1059657863808502e-06, + "loss": 6.2428, + "step": 98950 + }, + { + "epoch": 7.3, + "learning_rate": 1.0557402738294945e-06, + "loss": 5.3142, + "step": 99000 + }, + { + "epoch": 7.31, + "learning_rate": 1.005514761278139e-06, + "loss": 5.601, + "step": 99050 + }, + { + "epoch": 7.31, + "learning_rate": 9.552892487267832e-07, + "loss": 5.5382, + "step": 99100 + }, + { + "epoch": 7.31, + "learning_rate": 9.050637361754277e-07, + "loss": 6.1838, + "step": 99150 + }, + { + "epoch": 7.32, + "learning_rate": 8.548382236240722e-07, + "loss": 5.4926, + "step": 99200 + }, + { + "epoch": 7.32, + "learning_rate": 8.046127110727166e-07, + "loss": 5.5203, + "step": 99250 + }, + { + "epoch": 7.32, + "learning_rate": 7.543871985213609e-07, + "loss": 5.8525, + "step": 99300 + }, + { + "epoch": 7.33, + "learning_rate": 7.041616859700053e-07, + "loss": 5.5614, + "step": 99350 + }, + { + "epoch": 7.33, + "learning_rate": 6.539361734186498e-07, + "loss": 5.1589, + "step": 99400 + }, + { + "epoch": 7.34, + "learning_rate": 6.037106608672941e-07, + "loss": 4.9074, + "step": 99450 + }, + { + "epoch": 7.34, + "learning_rate": 5.534851483159385e-07, + "loss": 5.5361, + "step": 99500 + }, + { + "epoch": 7.34, + "learning_rate": 5.03259635764583e-07, + "loss": 5.632, + "step": 99550 + }, + { + "epoch": 7.35, + "learning_rate": 4.530341232132274e-07, + "loss": 5.3513, + "step": 99600 + }, + { + "epoch": 7.35, + "learning_rate": 4.0280861066187184e-07, + "loss": 5.5489, + "step": 99650 + }, + { + "epoch": 7.35, + "learning_rate": 3.525830981105162e-07, + "loss": 4.6669, + "step": 99700 + }, + { + "epoch": 7.36, + "learning_rate": 3.0235758555916064e-07, + "loss": 5.5143, + "step": 99750 + }, + { + "epoch": 7.36, + "learning_rate": 2.5213207300780506e-07, + "loss": 5.9166, + "step": 99800 + }, + { + "epoch": 7.37, + "learning_rate": 2.0190656045644946e-07, + "loss": 4.9058, + "step": 99850 + }, + { + "epoch": 7.37, + "learning_rate": 1.5168104790509386e-07, + "loss": 5.1908, + "step": 99900 + }, + { + "epoch": 7.37, + "learning_rate": 1.014555353537383e-07, + "loss": 5.2584, + "step": 99950 + }, + { + "epoch": 7.38, + "learning_rate": 5.12300228023827e-08, + "loss": 5.1109, + "step": 100000 + }, + { + "epoch": 7.38, + "eval_loss": 9.184836387634277, + "eval_runtime": 964.8586, + "eval_samples_per_second": 13.575, + "eval_steps_per_second": 3.394, + "eval_wer": 0.21346226533954896, + "step": 100000 + }, + { + "epoch": 7.38, + "learning_rate": 1.0045102510271118e-09, + "loss": 5.2737, + "step": 100050 + }, + { + "epoch": 7.38, + "step": 100051, + "total_flos": 0.0, + "train_loss": 10.025987887954182, + "train_runtime": 56856.134, + "train_samples_per_second": 14.077, + "train_steps_per_second": 1.76 + } + ], + "max_steps": 100051, + "num_train_epochs": 8, + "total_flos": 0.0, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..dbc064046a36220dd960e955c565bc3e2c9e3abd --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b64c669f66dd7a2e54d3001ce7e31c26cc60dd58136e8ce90e6055bd0ae15eb +size 3503