{ "best_metric": 0.5241779497098646, "best_model_checkpoint": "/content/drive/MyDrive/datasaur/train-properly/checkpoint-2112", "epoch": 703.8785714285714, "global_step": 2112, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29, "learning_rate": 0.0001, "loss": 4.6233, "step": 1 }, { "epoch": 1.29, "learning_rate": 0.0001, "loss": 3.6654, "step": 4 }, { "epoch": 2.59, "learning_rate": 0.0001, "loss": 2.3799, "step": 8 }, { "epoch": 3.88, "learning_rate": 0.0001, "loss": 1.7143, "step": 12 }, { "epoch": 5.29, "learning_rate": 0.0001, "loss": 1.4283, "step": 16 }, { "epoch": 6.59, "learning_rate": 0.0001, "loss": 1.0806, "step": 20 }, { "epoch": 7.88, "learning_rate": 0.0001, "loss": 0.9319, "step": 24 }, { "epoch": 9.29, "learning_rate": 0.0001, "loss": 0.8745, "step": 28 }, { "epoch": 10.59, "learning_rate": 0.0001, "loss": 0.7126, "step": 32 }, { "epoch": 11.88, "learning_rate": 0.0001, "loss": 0.655, "step": 36 }, { "epoch": 13.29, "learning_rate": 0.0001, "loss": 0.6493, "step": 40 }, { "epoch": 14.59, "learning_rate": 0.0001, "loss": 0.5586, "step": 44 }, { "epoch": 15.88, "learning_rate": 0.0001, "loss": 0.5208, "step": 48 }, { "epoch": 17.29, "learning_rate": 0.0001, "loss": 0.5401, "step": 52 }, { "epoch": 18.59, "learning_rate": 0.0001, "loss": 0.4716, "step": 56 }, { "epoch": 19.88, "learning_rate": 0.0001, "loss": 0.4334, "step": 60 }, { "epoch": 21.29, "learning_rate": 0.0001, "loss": 0.4422, "step": 64 }, { "epoch": 21.29, "eval_exact_match": 0.22920696324951645, "eval_exec": 0.2572533849129594, "eval_loss": 0.33955129981040955, "eval_runtime": 122.338, "eval_samples_per_second": 8.452, "step": 64 }, { "epoch": 22.59, "learning_rate": 0.0001, "loss": 0.3947, "step": 68 }, { "epoch": 23.88, "learning_rate": 0.0001, "loss": 0.3672, "step": 72 }, { "epoch": 25.29, "learning_rate": 0.0001, "loss": 0.3774, "step": 76 }, { "epoch": 26.59, "learning_rate": 0.0001, "loss": 0.3324, "step": 80 }, { "epoch": 27.88, "learning_rate": 0.0001, "loss": 0.3089, "step": 84 }, { "epoch": 29.29, "learning_rate": 0.0001, "loss": 0.3263, "step": 88 }, { "epoch": 30.59, "learning_rate": 0.0001, "loss": 0.2813, "step": 92 }, { "epoch": 31.88, "learning_rate": 0.0001, "loss": 0.2779, "step": 96 }, { "epoch": 33.29, "learning_rate": 0.0001, "loss": 0.2944, "step": 100 }, { "epoch": 34.59, "learning_rate": 0.0001, "loss": 0.2548, "step": 104 }, { "epoch": 35.88, "learning_rate": 0.0001, "loss": 0.2419, "step": 108 }, { "epoch": 37.29, "learning_rate": 0.0001, "loss": 0.256, "step": 112 }, { "epoch": 38.59, "learning_rate": 0.0001, "loss": 0.2346, "step": 116 }, { "epoch": 39.88, "learning_rate": 0.0001, "loss": 0.2312, "step": 120 }, { "epoch": 41.29, "learning_rate": 0.0001, "loss": 0.2395, "step": 124 }, { "epoch": 42.59, "learning_rate": 0.0001, "loss": 0.2143, "step": 128 }, { "epoch": 42.59, "eval_exact_match": 0.3065764023210832, "eval_exec": 0.3413926499032882, "eval_loss": 0.25483238697052, "eval_runtime": 153.8574, "eval_samples_per_second": 6.721, "step": 128 }, { "epoch": 43.88, "learning_rate": 0.0001, "loss": 0.2018, "step": 132 }, { "epoch": 45.29, "learning_rate": 0.0001, "loss": 0.2105, "step": 136 }, { "epoch": 46.59, "learning_rate": 0.0001, "loss": 0.1906, "step": 140 }, { "epoch": 47.88, "learning_rate": 0.0001, "loss": 0.1876, "step": 144 }, { "epoch": 49.29, "learning_rate": 0.0001, "loss": 0.1949, "step": 148 }, { "epoch": 50.59, "learning_rate": 0.0001, "loss": 0.1777, "step": 152 }, { "epoch": 51.88, "learning_rate": 0.0001, "loss": 0.1672, "step": 156 }, { "epoch": 53.29, "learning_rate": 0.0001, "loss": 0.178, "step": 160 }, { "epoch": 54.59, "learning_rate": 0.0001, "loss": 0.1597, "step": 164 }, { "epoch": 55.88, "learning_rate": 0.0001, "loss": 0.157, "step": 168 }, { "epoch": 57.29, "learning_rate": 0.0001, "loss": 0.1681, "step": 172 }, { "epoch": 58.59, "learning_rate": 0.0001, "loss": 0.1467, "step": 176 }, { "epoch": 59.88, "learning_rate": 0.0001, "loss": 0.1472, "step": 180 }, { "epoch": 61.29, "learning_rate": 0.0001, "loss": 0.1579, "step": 184 }, { "epoch": 62.59, "learning_rate": 0.0001, "loss": 0.1413, "step": 188 }, { "epoch": 63.88, "learning_rate": 0.0001, "loss": 0.1334, "step": 192 }, { "epoch": 63.88, "eval_exact_match": 0.3646034816247582, "eval_exec": 0.390715667311412, "eval_loss": 0.23727674782276154, "eval_runtime": 164.2118, "eval_samples_per_second": 6.297, "step": 192 }, { "epoch": 65.29, "learning_rate": 0.0001, "loss": 0.1412, "step": 196 }, { "epoch": 66.59, "learning_rate": 0.0001, "loss": 0.1256, "step": 200 }, { "epoch": 67.88, "learning_rate": 0.0001, "loss": 0.1243, "step": 204 }, { "epoch": 69.29, "learning_rate": 0.0001, "loss": 0.1325, "step": 208 }, { "epoch": 70.59, "learning_rate": 0.0001, "loss": 0.1194, "step": 212 }, { "epoch": 71.88, "learning_rate": 0.0001, "loss": 0.1159, "step": 216 }, { "epoch": 73.29, "learning_rate": 0.0001, "loss": 0.1242, "step": 220 }, { "epoch": 74.59, "learning_rate": 0.0001, "loss": 0.1106, "step": 224 }, { "epoch": 75.88, "learning_rate": 0.0001, "loss": 0.1104, "step": 228 }, { "epoch": 77.29, "learning_rate": 0.0001, "loss": 0.1121, "step": 232 }, { "epoch": 78.59, "learning_rate": 0.0001, "loss": 0.1021, "step": 236 }, { "epoch": 79.88, "learning_rate": 0.0001, "loss": 0.1021, "step": 240 }, { "epoch": 81.29, "learning_rate": 0.0001, "loss": 0.11, "step": 244 }, { "epoch": 82.59, "learning_rate": 0.0001, "loss": 0.0998, "step": 248 }, { "epoch": 83.88, "learning_rate": 0.0001, "loss": 0.0933, "step": 252 }, { "epoch": 85.29, "learning_rate": 0.0001, "loss": 0.0998, "step": 256 }, { "epoch": 85.29, "eval_exact_match": 0.36847195357833656, "eval_exec": 0.38684719535783363, "eval_loss": 0.24835824966430664, "eval_runtime": 185.4895, "eval_samples_per_second": 5.574, "step": 256 }, { "epoch": 86.59, "learning_rate": 0.0001, "loss": 0.0915, "step": 260 }, { "epoch": 87.88, "learning_rate": 0.0001, "loss": 0.0887, "step": 264 }, { "epoch": 89.29, "learning_rate": 0.0001, "loss": 0.0946, "step": 268 }, { "epoch": 90.59, "learning_rate": 0.0001, "loss": 0.0866, "step": 272 }, { "epoch": 91.88, "learning_rate": 0.0001, "loss": 0.0816, "step": 276 }, { "epoch": 93.29, "learning_rate": 0.0001, "loss": 0.0875, "step": 280 }, { "epoch": 94.59, "learning_rate": 0.0001, "loss": 0.0782, "step": 284 }, { "epoch": 95.88, "learning_rate": 0.0001, "loss": 0.0789, "step": 288 }, { "epoch": 97.29, "learning_rate": 0.0001, "loss": 0.0841, "step": 292 }, { "epoch": 98.59, "learning_rate": 0.0001, "loss": 0.0754, "step": 296 }, { "epoch": 99.88, "learning_rate": 0.0001, "loss": 0.0732, "step": 300 }, { "epoch": 101.29, "learning_rate": 0.0001, "loss": 0.0757, "step": 304 }, { "epoch": 102.59, "learning_rate": 0.0001, "loss": 0.0685, "step": 308 }, { "epoch": 103.88, "learning_rate": 0.0001, "loss": 0.0665, "step": 312 }, { "epoch": 105.29, "learning_rate": 0.0001, "loss": 0.0735, "step": 316 }, { "epoch": 106.59, "learning_rate": 0.0001, "loss": 0.0686, "step": 320 }, { "epoch": 106.59, "eval_exact_match": 0.3945841392649903, "eval_exec": 0.43423597678916825, "eval_loss": 0.25631850957870483, "eval_runtime": 177.6509, "eval_samples_per_second": 5.82, "step": 320 }, { "epoch": 107.88, "learning_rate": 0.0001, "loss": 0.0653, "step": 324 }, { "epoch": 109.29, "learning_rate": 0.0001, "loss": 0.0685, "step": 328 }, { "epoch": 110.59, "learning_rate": 0.0001, "loss": 0.0607, "step": 332 }, { "epoch": 111.88, "learning_rate": 0.0001, "loss": 0.0604, "step": 336 }, { "epoch": 113.29, "learning_rate": 0.0001, "loss": 0.0634, "step": 340 }, { "epoch": 114.59, "learning_rate": 0.0001, "loss": 0.0568, "step": 344 }, { "epoch": 115.88, "learning_rate": 0.0001, "loss": 0.0563, "step": 348 }, { "epoch": 117.29, "learning_rate": 0.0001, "loss": 0.0596, "step": 352 }, { "epoch": 118.59, "learning_rate": 0.0001, "loss": 0.0562, "step": 356 }, { "epoch": 119.88, "learning_rate": 0.0001, "loss": 0.0584, "step": 360 }, { "epoch": 121.29, "learning_rate": 0.0001, "loss": 0.0579, "step": 364 }, { "epoch": 122.59, "learning_rate": 0.0001, "loss": 0.0525, "step": 368 }, { "epoch": 123.88, "learning_rate": 0.0001, "loss": 0.0504, "step": 372 }, { "epoch": 125.29, "learning_rate": 0.0001, "loss": 0.0541, "step": 376 }, { "epoch": 126.59, "learning_rate": 0.0001, "loss": 0.0475, "step": 380 }, { "epoch": 127.88, "learning_rate": 0.0001, "loss": 0.0469, "step": 384 }, { "epoch": 127.88, "eval_exact_match": 0.4796905222437137, "eval_exec": 0.5019342359767892, "eval_loss": 0.27194276452064514, "eval_runtime": 142.4018, "eval_samples_per_second": 7.261, "step": 384 }, { "epoch": 129.29, "learning_rate": 0.0001, "loss": 0.0513, "step": 388 }, { "epoch": 130.59, "learning_rate": 0.0001, "loss": 0.0477, "step": 392 }, { "epoch": 131.88, "learning_rate": 0.0001, "loss": 0.0443, "step": 396 }, { "epoch": 133.29, "learning_rate": 0.0001, "loss": 0.047, "step": 400 }, { "epoch": 134.59, "learning_rate": 0.0001, "loss": 0.0424, "step": 404 }, { "epoch": 135.88, "learning_rate": 0.0001, "loss": 0.0425, "step": 408 }, { "epoch": 137.29, "learning_rate": 0.0001, "loss": 0.0474, "step": 412 }, { "epoch": 138.59, "learning_rate": 0.0001, "loss": 0.0446, "step": 416 }, { "epoch": 139.88, "learning_rate": 0.0001, "loss": 0.0409, "step": 420 }, { "epoch": 141.29, "learning_rate": 0.0001, "loss": 0.0416, "step": 424 }, { "epoch": 142.59, "learning_rate": 0.0001, "loss": 0.0375, "step": 428 }, { "epoch": 143.88, "learning_rate": 0.0001, "loss": 0.0377, "step": 432 }, { "epoch": 145.29, "learning_rate": 0.0001, "loss": 0.0398, "step": 436 }, { "epoch": 146.59, "learning_rate": 0.0001, "loss": 0.0383, "step": 440 }, { "epoch": 147.88, "learning_rate": 0.0001, "loss": 0.0344, "step": 444 }, { "epoch": 149.29, "learning_rate": 0.0001, "loss": 0.0372, "step": 448 }, { "epoch": 149.29, "eval_exact_match": 0.47775628626692457, "eval_exec": 0.5009671179883946, "eval_loss": 0.29459667205810547, "eval_runtime": 133.4155, "eval_samples_per_second": 7.75, "step": 448 }, { "epoch": 150.59, "learning_rate": 0.0001, "loss": 0.034, "step": 452 }, { "epoch": 151.88, "learning_rate": 0.0001, "loss": 0.0345, "step": 456 }, { "epoch": 153.29, "learning_rate": 0.0001, "loss": 0.0394, "step": 460 }, { "epoch": 154.59, "learning_rate": 0.0001, "loss": 0.0348, "step": 464 }, { "epoch": 155.88, "learning_rate": 0.0001, "loss": 0.0316, "step": 468 }, { "epoch": 157.29, "learning_rate": 0.0001, "loss": 0.0361, "step": 472 }, { "epoch": 158.59, "learning_rate": 0.0001, "loss": 0.034, "step": 476 }, { "epoch": 159.88, "learning_rate": 0.0001, "loss": 0.0308, "step": 480 }, { "epoch": 161.29, "learning_rate": 0.0001, "loss": 0.0316, "step": 484 }, { "epoch": 162.59, "learning_rate": 0.0001, "loss": 0.0282, "step": 488 }, { "epoch": 163.88, "learning_rate": 0.0001, "loss": 0.0282, "step": 492 }, { "epoch": 165.29, "learning_rate": 0.0001, "loss": 0.0303, "step": 496 }, { "epoch": 166.59, "learning_rate": 0.0001, "loss": 0.0287, "step": 500 }, { "epoch": 167.88, "learning_rate": 0.0001, "loss": 0.0271, "step": 504 }, { "epoch": 169.29, "learning_rate": 0.0001, "loss": 0.0289, "step": 508 }, { "epoch": 170.59, "learning_rate": 0.0001, "loss": 0.028, "step": 512 }, { "epoch": 170.59, "eval_exact_match": 0.4738878143133462, "eval_exec": 0.49709864603481624, "eval_loss": 0.30677586793899536, "eval_runtime": 151.116, "eval_samples_per_second": 6.842, "step": 512 }, { "epoch": 171.88, "learning_rate": 0.0001, "loss": 0.0295, "step": 516 }, { "epoch": 173.29, "learning_rate": 0.0001, "loss": 0.0277, "step": 520 }, { "epoch": 174.59, "learning_rate": 0.0001, "loss": 0.0243, "step": 524 }, { "epoch": 175.88, "learning_rate": 0.0001, "loss": 0.0241, "step": 528 }, { "epoch": 177.29, "learning_rate": 0.0001, "loss": 0.0272, "step": 532 }, { "epoch": 178.59, "learning_rate": 0.0001, "loss": 0.0247, "step": 536 }, { "epoch": 179.88, "learning_rate": 0.0001, "loss": 0.0238, "step": 540 }, { "epoch": 181.29, "learning_rate": 0.0001, "loss": 0.0247, "step": 544 }, { "epoch": 182.59, "learning_rate": 0.0001, "loss": 0.0222, "step": 548 }, { "epoch": 183.88, "learning_rate": 0.0001, "loss": 0.0224, "step": 552 }, { "epoch": 185.29, "learning_rate": 0.0001, "loss": 0.024, "step": 556 }, { "epoch": 186.59, "learning_rate": 0.0001, "loss": 0.0223, "step": 560 }, { "epoch": 187.88, "learning_rate": 0.0001, "loss": 0.0241, "step": 564 }, { "epoch": 189.29, "learning_rate": 0.0001, "loss": 0.024, "step": 568 }, { "epoch": 190.59, "learning_rate": 0.0001, "loss": 0.0207, "step": 572 }, { "epoch": 191.88, "learning_rate": 0.0001, "loss": 0.0197, "step": 576 }, { "epoch": 191.88, "eval_exact_match": 0.47775628626692457, "eval_exec": 0.5009671179883946, "eval_loss": 0.3227923512458801, "eval_runtime": 165.1002, "eval_samples_per_second": 6.263, "step": 576 }, { "epoch": 193.29, "learning_rate": 0.0001, "loss": 0.0205, "step": 580 }, { "epoch": 194.59, "learning_rate": 0.0001, "loss": 0.0192, "step": 584 }, { "epoch": 195.88, "learning_rate": 0.0001, "loss": 0.0187, "step": 588 }, { "epoch": 197.29, "learning_rate": 0.0001, "loss": 0.0207, "step": 592 }, { "epoch": 198.59, "learning_rate": 0.0001, "loss": 0.0181, "step": 596 }, { "epoch": 199.88, "learning_rate": 0.0001, "loss": 0.018, "step": 600 }, { "epoch": 201.29, "learning_rate": 0.0001, "loss": 0.0193, "step": 604 }, { "epoch": 202.59, "learning_rate": 0.0001, "loss": 0.0173, "step": 608 }, { "epoch": 203.88, "learning_rate": 0.0001, "loss": 0.0176, "step": 612 }, { "epoch": 205.29, "learning_rate": 0.0001, "loss": 0.0186, "step": 616 }, { "epoch": 206.59, "learning_rate": 0.0001, "loss": 0.0195, "step": 620 }, { "epoch": 207.88, "learning_rate": 0.0001, "loss": 0.0174, "step": 624 }, { "epoch": 209.29, "learning_rate": 0.0001, "loss": 0.0174, "step": 628 }, { "epoch": 210.59, "learning_rate": 0.0001, "loss": 0.0163, "step": 632 }, { "epoch": 211.88, "learning_rate": 0.0001, "loss": 0.0162, "step": 636 }, { "epoch": 213.29, "learning_rate": 0.0001, "loss": 0.0175, "step": 640 }, { "epoch": 213.29, "eval_exact_match": 0.4661508704061896, "eval_exec": 0.48452611218568664, "eval_loss": 0.33815476298332214, "eval_runtime": 159.4098, "eval_samples_per_second": 6.486, "step": 640 }, { "epoch": 214.59, "learning_rate": 0.0001, "loss": 0.0157, "step": 644 }, { "epoch": 215.88, "learning_rate": 0.0001, "loss": 0.015, "step": 648 }, { "epoch": 217.29, "learning_rate": 0.0001, "loss": 0.0156, "step": 652 }, { "epoch": 218.59, "learning_rate": 0.0001, "loss": 0.014, "step": 656 }, { "epoch": 219.88, "learning_rate": 0.0001, "loss": 0.0148, "step": 660 }, { "epoch": 221.29, "learning_rate": 0.0001, "loss": 0.0155, "step": 664 }, { "epoch": 222.59, "learning_rate": 0.0001, "loss": 0.0141, "step": 668 }, { "epoch": 223.88, "learning_rate": 0.0001, "loss": 0.0133, "step": 672 }, { "epoch": 225.29, "learning_rate": 0.0001, "loss": 0.0144, "step": 676 }, { "epoch": 226.59, "learning_rate": 0.0001, "loss": 0.0134, "step": 680 }, { "epoch": 227.88, "learning_rate": 0.0001, "loss": 0.0143, "step": 684 }, { "epoch": 229.29, "learning_rate": 0.0001, "loss": 0.0137, "step": 688 }, { "epoch": 230.59, "learning_rate": 0.0001, "loss": 0.0127, "step": 692 }, { "epoch": 231.88, "learning_rate": 0.0001, "loss": 0.0125, "step": 696 }, { "epoch": 233.29, "learning_rate": 0.0001, "loss": 0.0136, "step": 700 }, { "epoch": 234.59, "learning_rate": 0.0001, "loss": 0.0126, "step": 704 }, { "epoch": 234.59, "eval_exact_match": 0.49032882011605416, "eval_exec": 0.5019342359767892, "eval_loss": 0.35344886779785156, "eval_runtime": 194.4016, "eval_samples_per_second": 5.319, "step": 704 }, { "epoch": 235.88, "learning_rate": 0.0001, "loss": 0.0127, "step": 708 }, { "epoch": 237.29, "learning_rate": 0.0001, "loss": 0.0129, "step": 712 }, { "epoch": 238.59, "learning_rate": 0.0001, "loss": 0.0117, "step": 716 }, { "epoch": 239.88, "learning_rate": 0.0001, "loss": 0.0117, "step": 720 }, { "epoch": 241.29, "learning_rate": 0.0001, "loss": 0.0148, "step": 724 }, { "epoch": 242.59, "learning_rate": 0.0001, "loss": 0.0121, "step": 728 }, { "epoch": 243.88, "learning_rate": 0.0001, "loss": 0.0106, "step": 732 }, { "epoch": 245.29, "learning_rate": 0.0001, "loss": 0.0122, "step": 736 }, { "epoch": 246.59, "learning_rate": 0.0001, "loss": 0.0113, "step": 740 }, { "epoch": 247.88, "learning_rate": 0.0001, "loss": 0.01, "step": 744 }, { "epoch": 249.29, "learning_rate": 0.0001, "loss": 0.0109, "step": 748 }, { "epoch": 250.59, "learning_rate": 0.0001, "loss": 0.0103, "step": 752 }, { "epoch": 251.88, "learning_rate": 0.0001, "loss": 0.0095, "step": 756 }, { "epoch": 253.29, "learning_rate": 0.0001, "loss": 0.0104, "step": 760 }, { "epoch": 254.59, "learning_rate": 0.0001, "loss": 0.0091, "step": 764 }, { "epoch": 255.88, "learning_rate": 0.0001, "loss": 0.0093, "step": 768 }, { "epoch": 255.88, "eval_exact_match": 0.4990328820116054, "eval_exec": 0.5096711798839458, "eval_loss": 0.36643365025520325, "eval_runtime": 176.9801, "eval_samples_per_second": 5.842, "step": 768 }, { "epoch": 257.29, "learning_rate": 0.0001, "loss": 0.0106, "step": 772 }, { "epoch": 258.59, "learning_rate": 0.0001, "loss": 0.0089, "step": 776 }, { "epoch": 259.88, "learning_rate": 0.0001, "loss": 0.0094, "step": 780 }, { "epoch": 261.29, "learning_rate": 0.0001, "loss": 0.0106, "step": 784 }, { "epoch": 262.59, "learning_rate": 0.0001, "loss": 0.0096, "step": 788 }, { "epoch": 263.88, "learning_rate": 0.0001, "loss": 0.0086, "step": 792 }, { "epoch": 265.29, "learning_rate": 0.0001, "loss": 0.0094, "step": 796 }, { "epoch": 266.59, "learning_rate": 0.0001, "loss": 0.0086, "step": 800 }, { "epoch": 267.88, "learning_rate": 0.0001, "loss": 0.0085, "step": 804 }, { "epoch": 269.29, "learning_rate": 0.0001, "loss": 0.0092, "step": 808 }, { "epoch": 270.59, "learning_rate": 0.0001, "loss": 0.0084, "step": 812 }, { "epoch": 271.88, "learning_rate": 0.0001, "loss": 0.0078, "step": 816 }, { "epoch": 273.29, "learning_rate": 0.0001, "loss": 0.009, "step": 820 }, { "epoch": 274.59, "learning_rate": 0.0001, "loss": 0.0086, "step": 824 }, { "epoch": 275.88, "learning_rate": 0.0001, "loss": 0.0079, "step": 828 }, { "epoch": 277.29, "learning_rate": 0.0001, "loss": 0.0082, "step": 832 }, { "epoch": 277.29, "eval_exact_match": 0.49806576402321084, "eval_exec": 0.5096711798839458, "eval_loss": 0.3839401602745056, "eval_runtime": 178.4278, "eval_samples_per_second": 5.795, "step": 832 }, { "epoch": 278.59, "learning_rate": 0.0001, "loss": 0.0077, "step": 836 }, { "epoch": 279.88, "learning_rate": 0.0001, "loss": 0.0076, "step": 840 }, { "epoch": 281.29, "learning_rate": 0.0001, "loss": 0.0081, "step": 844 }, { "epoch": 282.59, "learning_rate": 0.0001, "loss": 0.0076, "step": 848 }, { "epoch": 283.88, "learning_rate": 0.0001, "loss": 0.0085, "step": 852 }, { "epoch": 285.29, "learning_rate": 0.0001, "loss": 0.0132, "step": 856 }, { "epoch": 286.59, "learning_rate": 0.0001, "loss": 0.0084, "step": 860 }, { "epoch": 287.88, "learning_rate": 0.0001, "loss": 0.0068, "step": 864 }, { "epoch": 289.29, "learning_rate": 0.0001, "loss": 0.0072, "step": 868 }, { "epoch": 290.59, "learning_rate": 0.0001, "loss": 0.0073, "step": 872 }, { "epoch": 291.88, "learning_rate": 0.0001, "loss": 0.007, "step": 876 }, { "epoch": 293.29, "learning_rate": 0.0001, "loss": 0.0074, "step": 880 }, { "epoch": 294.59, "learning_rate": 0.0001, "loss": 0.0059, "step": 884 }, { "epoch": 295.88, "learning_rate": 0.0001, "loss": 0.0062, "step": 888 }, { "epoch": 297.29, "learning_rate": 0.0001, "loss": 0.0065, "step": 892 }, { "epoch": 298.59, "learning_rate": 0.0001, "loss": 0.0064, "step": 896 }, { "epoch": 298.59, "eval_exact_match": 0.504835589941973, "eval_exec": 0.5241779497098646, "eval_loss": 0.3868894875049591, "eval_runtime": 176.2661, "eval_samples_per_second": 5.866, "step": 896 }, { "epoch": 299.88, "learning_rate": 0.0001, "loss": 0.0064, "step": 900 }, { "epoch": 301.29, "learning_rate": 0.0001, "loss": 0.0066, "step": 904 }, { "epoch": 302.59, "learning_rate": 0.0001, "loss": 0.0063, "step": 908 }, { "epoch": 303.88, "learning_rate": 0.0001, "loss": 0.006, "step": 912 }, { "epoch": 305.29, "learning_rate": 0.0001, "loss": 0.0065, "step": 916 }, { "epoch": 306.59, "learning_rate": 0.0001, "loss": 0.0059, "step": 920 }, { "epoch": 307.88, "learning_rate": 0.0001, "loss": 0.0055, "step": 924 }, { "epoch": 309.29, "learning_rate": 0.0001, "loss": 0.006, "step": 928 }, { "epoch": 310.59, "learning_rate": 0.0001, "loss": 0.0052, "step": 932 }, { "epoch": 311.88, "learning_rate": 0.0001, "loss": 0.0059, "step": 936 }, { "epoch": 313.29, "learning_rate": 0.0001, "loss": 0.0061, "step": 940 }, { "epoch": 314.59, "learning_rate": 0.0001, "loss": 0.0051, "step": 944 }, { "epoch": 315.88, "learning_rate": 0.0001, "loss": 0.0056, "step": 948 }, { "epoch": 317.29, "learning_rate": 0.0001, "loss": 0.0056, "step": 952 }, { "epoch": 318.59, "learning_rate": 0.0001, "loss": 0.005, "step": 956 }, { "epoch": 319.88, "learning_rate": 0.0001, "loss": 0.0049, "step": 960 }, { "epoch": 319.88, "eval_exact_match": 0.5009671179883946, "eval_exec": 0.5135396518375241, "eval_loss": 0.39836612343788147, "eval_runtime": 183.0329, "eval_samples_per_second": 5.649, "step": 960 }, { "epoch": 321.29, "learning_rate": 0.0001, "loss": 0.0054, "step": 964 }, { "epoch": 322.59, "learning_rate": 0.0001, "loss": 0.005, "step": 968 }, { "epoch": 323.88, "learning_rate": 0.0001, "loss": 0.0049, "step": 972 }, { "epoch": 325.29, "learning_rate": 0.0001, "loss": 0.0052, "step": 976 }, { "epoch": 326.59, "learning_rate": 0.0001, "loss": 0.005, "step": 980 }, { "epoch": 327.88, "learning_rate": 0.0001, "loss": 0.0048, "step": 984 }, { "epoch": 329.29, "learning_rate": 0.0001, "loss": 0.0052, "step": 988 }, { "epoch": 330.59, "learning_rate": 0.0001, "loss": 0.0046, "step": 992 }, { "epoch": 331.88, "learning_rate": 0.0001, "loss": 0.0048, "step": 996 }, { "epoch": 333.29, "learning_rate": 0.0001, "loss": 0.0051, "step": 1000 }, { "epoch": 334.59, "learning_rate": 0.0001, "loss": 0.005, "step": 1004 }, { "epoch": 335.88, "learning_rate": 0.0001, "loss": 0.0049, "step": 1008 }, { "epoch": 337.29, "learning_rate": 0.0001, "loss": 0.005, "step": 1012 }, { "epoch": 338.59, "learning_rate": 0.0001, "loss": 0.0046, "step": 1016 }, { "epoch": 339.88, "learning_rate": 0.0001, "loss": 0.0041, "step": 1020 }, { "epoch": 341.29, "learning_rate": 0.0001, "loss": 0.0052, "step": 1024 }, { "epoch": 341.29, "eval_exact_match": 0.5125725338491296, "eval_exec": 0.5193423597678917, "eval_loss": 0.40509259700775146, "eval_runtime": 174.5602, "eval_samples_per_second": 5.923, "step": 1024 }, { "epoch": 342.59, "learning_rate": 0.0001, "loss": 0.0043, "step": 1028 }, { "epoch": 343.88, "learning_rate": 0.0001, "loss": 0.0042, "step": 1032 }, { "epoch": 345.29, "learning_rate": 0.0001, "loss": 0.0043, "step": 1036 }, { "epoch": 346.59, "learning_rate": 0.0001, "loss": 0.0043, "step": 1040 }, { "epoch": 347.88, "learning_rate": 0.0001, "loss": 0.0042, "step": 1044 }, { "epoch": 349.29, "learning_rate": 0.0001, "loss": 0.0044, "step": 1048 }, { "epoch": 350.59, "learning_rate": 0.0001, "loss": 0.0036, "step": 1052 }, { "epoch": 351.88, "learning_rate": 0.0001, "loss": 0.0039, "step": 1056 }, { "epoch": 353.29, "learning_rate": 0.0001, "loss": 0.0045, "step": 1060 }, { "epoch": 354.59, "learning_rate": 0.0001, "loss": 0.0041, "step": 1064 }, { "epoch": 355.88, "learning_rate": 0.0001, "loss": 0.0037, "step": 1068 }, { "epoch": 357.29, "learning_rate": 0.0001, "loss": 0.0043, "step": 1072 }, { "epoch": 358.59, "learning_rate": 0.0001, "loss": 0.0038, "step": 1076 }, { "epoch": 359.88, "learning_rate": 0.0001, "loss": 0.0038, "step": 1080 }, { "epoch": 361.29, "learning_rate": 0.0001, "loss": 0.0039, "step": 1084 }, { "epoch": 362.59, "learning_rate": 0.0001, "loss": 0.0037, "step": 1088 }, { "epoch": 362.59, "eval_exact_match": 0.5038684719535783, "eval_exec": 0.5154738878143134, "eval_loss": 0.41651493310928345, "eval_runtime": 179.3833, "eval_samples_per_second": 5.764, "step": 1088 }, { "epoch": 363.88, "learning_rate": 0.0001, "loss": 0.0038, "step": 1092 }, { "epoch": 365.29, "learning_rate": 0.0001, "loss": 0.0041, "step": 1096 }, { "epoch": 366.59, "learning_rate": 0.0001, "loss": 0.0038, "step": 1100 }, { "epoch": 367.88, "learning_rate": 0.0001, "loss": 0.0039, "step": 1104 }, { "epoch": 369.29, "learning_rate": 0.0001, "loss": 0.0039, "step": 1108 }, { "epoch": 370.59, "learning_rate": 0.0001, "loss": 0.0036, "step": 1112 }, { "epoch": 371.88, "learning_rate": 0.0001, "loss": 0.0034, "step": 1116 }, { "epoch": 373.29, "learning_rate": 0.0001, "loss": 0.0039, "step": 1120 }, { "epoch": 374.59, "learning_rate": 0.0001, "loss": 0.0034, "step": 1124 }, { "epoch": 375.88, "learning_rate": 0.0001, "loss": 0.0033, "step": 1128 }, { "epoch": 377.29, "learning_rate": 0.0001, "loss": 0.0036, "step": 1132 }, { "epoch": 378.59, "learning_rate": 0.0001, "loss": 0.0037, "step": 1136 }, { "epoch": 379.88, "learning_rate": 0.0001, "loss": 0.0032, "step": 1140 }, { "epoch": 381.29, "learning_rate": 0.0001, "loss": 0.0034, "step": 1144 }, { "epoch": 382.59, "learning_rate": 0.0001, "loss": 0.0031, "step": 1148 }, { "epoch": 383.88, "learning_rate": 0.0001, "loss": 0.0032, "step": 1152 }, { "epoch": 383.88, "eval_exact_match": 0.5009671179883946, "eval_exec": 0.5135396518375241, "eval_loss": 0.4194105863571167, "eval_runtime": 187.7814, "eval_samples_per_second": 5.506, "step": 1152 }, { "epoch": 385.29, "learning_rate": 0.0001, "loss": 0.0034, "step": 1156 }, { "epoch": 386.59, "learning_rate": 0.0001, "loss": 0.0029, "step": 1160 }, { "epoch": 387.88, "learning_rate": 0.0001, "loss": 0.0029, "step": 1164 }, { "epoch": 389.29, "learning_rate": 0.0001, "loss": 0.0032, "step": 1168 }, { "epoch": 390.59, "learning_rate": 0.0001, "loss": 0.0029, "step": 1172 }, { "epoch": 391.88, "learning_rate": 0.0001, "loss": 0.0033, "step": 1176 }, { "epoch": 393.29, "learning_rate": 0.0001, "loss": 0.0032, "step": 1180 }, { "epoch": 394.59, "learning_rate": 0.0001, "loss": 0.0033, "step": 1184 }, { "epoch": 395.88, "learning_rate": 0.0001, "loss": 0.003, "step": 1188 }, { "epoch": 397.29, "learning_rate": 0.0001, "loss": 0.0034, "step": 1192 }, { "epoch": 398.59, "learning_rate": 0.0001, "loss": 0.0031, "step": 1196 }, { "epoch": 399.88, "learning_rate": 0.0001, "loss": 0.0028, "step": 1200 }, { "epoch": 401.29, "learning_rate": 0.0001, "loss": 0.0031, "step": 1204 }, { "epoch": 402.59, "learning_rate": 0.0001, "loss": 0.003, "step": 1208 }, { "epoch": 403.88, "learning_rate": 0.0001, "loss": 0.0027, "step": 1212 }, { "epoch": 405.29, "learning_rate": 0.0001, "loss": 0.0032, "step": 1216 }, { "epoch": 405.29, "eval_exact_match": 0.5125725338491296, "eval_exec": 0.5193423597678917, "eval_loss": 0.42960309982299805, "eval_runtime": 173.0051, "eval_samples_per_second": 5.977, "step": 1216 }, { "epoch": 406.59, "learning_rate": 0.0001, "loss": 0.0027, "step": 1220 }, { "epoch": 407.88, "learning_rate": 0.0001, "loss": 0.0028, "step": 1224 }, { "epoch": 409.29, "learning_rate": 0.0001, "loss": 0.003, "step": 1228 }, { "epoch": 410.59, "learning_rate": 0.0001, "loss": 0.0027, "step": 1232 }, { "epoch": 411.88, "learning_rate": 0.0001, "loss": 0.0026, "step": 1236 }, { "epoch": 413.29, "learning_rate": 0.0001, "loss": 0.0028, "step": 1240 }, { "epoch": 414.59, "learning_rate": 0.0001, "loss": 0.0027, "step": 1244 }, { "epoch": 415.88, "learning_rate": 0.0001, "loss": 0.0027, "step": 1248 }, { "epoch": 417.29, "learning_rate": 0.0001, "loss": 0.0029, "step": 1252 }, { "epoch": 418.59, "learning_rate": 0.0001, "loss": 0.0024, "step": 1256 }, { "epoch": 419.88, "learning_rate": 0.0001, "loss": 0.0025, "step": 1260 }, { "epoch": 421.29, "learning_rate": 0.0001, "loss": 0.0028, "step": 1264 }, { "epoch": 422.59, "learning_rate": 0.0001, "loss": 0.0023, "step": 1268 }, { "epoch": 423.88, "learning_rate": 0.0001, "loss": 0.0024, "step": 1272 }, { "epoch": 425.29, "learning_rate": 0.0001, "loss": 0.0026, "step": 1276 }, { "epoch": 426.59, "learning_rate": 0.0001, "loss": 0.0025, "step": 1280 }, { "epoch": 426.59, "eval_exact_match": 0.4922630560928433, "eval_exec": 0.5029013539651838, "eval_loss": 0.42588016390800476, "eval_runtime": 182.0847, "eval_samples_per_second": 5.679, "step": 1280 }, { "epoch": 427.88, "learning_rate": 0.0001, "loss": 0.0024, "step": 1284 }, { "epoch": 429.29, "learning_rate": 0.0001, "loss": 0.0027, "step": 1288 }, { "epoch": 430.59, "learning_rate": 0.0001, "loss": 0.0024, "step": 1292 }, { "epoch": 431.88, "learning_rate": 0.0001, "loss": 0.0023, "step": 1296 }, { "epoch": 433.29, "learning_rate": 0.0001, "loss": 0.0025, "step": 1300 }, { "epoch": 434.59, "learning_rate": 0.0001, "loss": 0.0022, "step": 1304 }, { "epoch": 435.88, "learning_rate": 0.0001, "loss": 0.0023, "step": 1308 }, { "epoch": 437.29, "learning_rate": 0.0001, "loss": 0.0024, "step": 1312 }, { "epoch": 438.59, "learning_rate": 0.0001, "loss": 0.0027, "step": 1316 }, { "epoch": 439.88, "learning_rate": 0.0001, "loss": 0.004, "step": 1320 }, { "epoch": 441.29, "learning_rate": 0.0001, "loss": 0.0055, "step": 1324 }, { "epoch": 442.59, "learning_rate": 0.0001, "loss": 0.0024, "step": 1328 }, { "epoch": 443.88, "learning_rate": 0.0001, "loss": 0.0024, "step": 1332 }, { "epoch": 445.29, "learning_rate": 0.0001, "loss": 0.0026, "step": 1336 }, { "epoch": 446.59, "learning_rate": 0.0001, "loss": 0.0022, "step": 1340 }, { "epoch": 447.88, "learning_rate": 0.0001, "loss": 0.0023, "step": 1344 }, { "epoch": 447.88, "eval_exact_match": 0.4941972920696325, "eval_exec": 0.5058027079303675, "eval_loss": 0.43824511766433716, "eval_runtime": 181.3806, "eval_samples_per_second": 5.701, "step": 1344 }, { "epoch": 449.29, "learning_rate": 0.0001, "loss": 0.0026, "step": 1348 }, { "epoch": 450.59, "learning_rate": 0.0001, "loss": 0.0022, "step": 1352 }, { "epoch": 451.88, "learning_rate": 0.0001, "loss": 0.002, "step": 1356 }, { "epoch": 453.29, "learning_rate": 0.0001, "loss": 0.0021, "step": 1360 }, { "epoch": 454.59, "learning_rate": 0.0001, "loss": 0.0021, "step": 1364 }, { "epoch": 455.88, "learning_rate": 0.0001, "loss": 0.002, "step": 1368 }, { "epoch": 457.29, "learning_rate": 0.0001, "loss": 0.0022, "step": 1372 }, { "epoch": 458.59, "learning_rate": 0.0001, "loss": 0.0021, "step": 1376 }, { "epoch": 459.88, "learning_rate": 0.0001, "loss": 0.002, "step": 1380 }, { "epoch": 461.29, "learning_rate": 0.0001, "loss": 0.0022, "step": 1384 }, { "epoch": 462.59, "learning_rate": 0.0001, "loss": 0.0021, "step": 1388 }, { "epoch": 463.88, "learning_rate": 0.0001, "loss": 0.0019, "step": 1392 }, { "epoch": 465.29, "learning_rate": 0.0001, "loss": 0.0021, "step": 1396 }, { "epoch": 466.59, "learning_rate": 0.0001, "loss": 0.0021, "step": 1400 }, { "epoch": 467.88, "learning_rate": 0.0001, "loss": 0.0019, "step": 1404 }, { "epoch": 469.29, "learning_rate": 0.0001, "loss": 0.0021, "step": 1408 }, { "epoch": 469.29, "eval_exact_match": 0.5106382978723404, "eval_exec": 0.5183752417794971, "eval_loss": 0.4374188780784607, "eval_runtime": 174.2227, "eval_samples_per_second": 5.935, "step": 1408 }, { "epoch": 470.59, "learning_rate": 0.0001, "loss": 0.0019, "step": 1412 }, { "epoch": 471.88, "learning_rate": 0.0001, "loss": 0.002, "step": 1416 }, { "epoch": 473.29, "learning_rate": 0.0001, "loss": 0.0022, "step": 1420 }, { "epoch": 474.59, "learning_rate": 0.0001, "loss": 0.0019, "step": 1424 }, { "epoch": 475.88, "learning_rate": 0.0001, "loss": 0.002, "step": 1428 }, { "epoch": 477.29, "learning_rate": 0.0001, "loss": 0.0019, "step": 1432 }, { "epoch": 478.59, "learning_rate": 0.0001, "loss": 0.0019, "step": 1436 }, { "epoch": 479.88, "learning_rate": 0.0001, "loss": 0.0017, "step": 1440 }, { "epoch": 481.29, "learning_rate": 0.0001, "loss": 0.0019, "step": 1444 }, { "epoch": 482.59, "learning_rate": 0.0001, "loss": 0.0019, "step": 1448 }, { "epoch": 483.88, "learning_rate": 0.0001, "loss": 0.0016, "step": 1452 }, { "epoch": 485.29, "learning_rate": 0.0001, "loss": 0.002, "step": 1456 }, { "epoch": 486.59, "learning_rate": 0.0001, "loss": 0.0019, "step": 1460 }, { "epoch": 487.88, "learning_rate": 0.0001, "loss": 0.0017, "step": 1464 }, { "epoch": 489.29, "learning_rate": 0.0001, "loss": 0.0018, "step": 1468 }, { "epoch": 490.59, "learning_rate": 0.0001, "loss": 0.0018, "step": 1472 }, { "epoch": 490.59, "eval_exact_match": 0.5135396518375241, "eval_exec": 0.5222437137330754, "eval_loss": 0.4480360150337219, "eval_runtime": 173.7903, "eval_samples_per_second": 5.95, "step": 1472 }, { "epoch": 491.88, "learning_rate": 0.0001, "loss": 0.0017, "step": 1476 }, { "epoch": 493.29, "learning_rate": 0.0001, "loss": 0.0019, "step": 1480 }, { "epoch": 494.59, "learning_rate": 0.0001, "loss": 0.0018, "step": 1484 }, { "epoch": 495.88, "learning_rate": 0.0001, "loss": 0.0016, "step": 1488 }, { "epoch": 497.29, "learning_rate": 0.0001, "loss": 0.0018, "step": 1492 }, { "epoch": 498.59, "learning_rate": 0.0001, "loss": 0.0017, "step": 1496 }, { "epoch": 499.88, "learning_rate": 0.0001, "loss": 0.0016, "step": 1500 }, { "epoch": 501.29, "learning_rate": 0.0001, "loss": 0.0019, "step": 1504 }, { "epoch": 502.59, "learning_rate": 0.0001, "loss": 0.002, "step": 1508 }, { "epoch": 503.88, "learning_rate": 0.0001, "loss": 0.0019, "step": 1512 }, { "epoch": 505.29, "learning_rate": 0.0001, "loss": 0.002, "step": 1516 }, { "epoch": 506.59, "learning_rate": 0.0001, "loss": 0.0015, "step": 1520 }, { "epoch": 507.88, "learning_rate": 0.0001, "loss": 0.0016, "step": 1524 }, { "epoch": 509.29, "learning_rate": 0.0001, "loss": 0.0018, "step": 1528 }, { "epoch": 510.59, "learning_rate": 0.0001, "loss": 0.0016, "step": 1532 }, { "epoch": 511.88, "learning_rate": 0.0001, "loss": 0.0017, "step": 1536 }, { "epoch": 511.88, "eval_exact_match": 0.5029013539651838, "eval_exec": 0.5193423597678917, "eval_loss": 0.45330750942230225, "eval_runtime": 174.0196, "eval_samples_per_second": 5.942, "step": 1536 }, { "epoch": 513.29, "learning_rate": 0.0001, "loss": 0.0015, "step": 1540 }, { "epoch": 514.59, "learning_rate": 0.0001, "loss": 0.0016, "step": 1544 }, { "epoch": 515.88, "learning_rate": 0.0001, "loss": 0.0017, "step": 1548 }, { "epoch": 517.29, "learning_rate": 0.0001, "loss": 0.0017, "step": 1552 }, { "epoch": 518.59, "learning_rate": 0.0001, "loss": 0.0015, "step": 1556 }, { "epoch": 519.88, "learning_rate": 0.0001, "loss": 0.0016, "step": 1560 }, { "epoch": 521.29, "learning_rate": 0.0001, "loss": 0.0016, "step": 1564 }, { "epoch": 522.59, "learning_rate": 0.0001, "loss": 0.0015, "step": 1568 }, { "epoch": 523.88, "learning_rate": 0.0001, "loss": 0.0016, "step": 1572 }, { "epoch": 525.29, "learning_rate": 0.0001, "loss": 0.0015, "step": 1576 }, { "epoch": 526.59, "learning_rate": 0.0001, "loss": 0.0014, "step": 1580 }, { "epoch": 527.88, "learning_rate": 0.0001, "loss": 0.0015, "step": 1584 }, { "epoch": 529.29, "learning_rate": 0.0001, "loss": 0.0016, "step": 1588 }, { "epoch": 530.59, "learning_rate": 0.0001, "loss": 0.0015, "step": 1592 }, { "epoch": 531.88, "learning_rate": 0.0001, "loss": 0.0014, "step": 1596 }, { "epoch": 533.29, "learning_rate": 0.0001, "loss": 0.0016, "step": 1600 }, { "epoch": 533.29, "eval_exact_match": 0.5077369439071566, "eval_exec": 0.52321083172147, "eval_loss": 0.45847541093826294, "eval_runtime": 185.2473, "eval_samples_per_second": 5.582, "step": 1600 }, { "epoch": 534.59, "learning_rate": 0.0001, "loss": 0.0016, "step": 1604 }, { "epoch": 535.88, "learning_rate": 0.0001, "loss": 0.0014, "step": 1608 }, { "epoch": 537.29, "learning_rate": 0.0001, "loss": 0.0015, "step": 1612 }, { "epoch": 538.59, "learning_rate": 0.0001, "loss": 0.0013, "step": 1616 }, { "epoch": 539.88, "learning_rate": 0.0001, "loss": 0.0012, "step": 1620 }, { "epoch": 541.29, "learning_rate": 0.0001, "loss": 0.0014, "step": 1624 }, { "epoch": 542.59, "learning_rate": 0.0001, "loss": 0.0016, "step": 1628 }, { "epoch": 543.88, "learning_rate": 0.0001, "loss": 0.0015, "step": 1632 }, { "epoch": 545.29, "learning_rate": 0.0001, "loss": 0.0015, "step": 1636 }, { "epoch": 546.59, "learning_rate": 0.0001, "loss": 0.0014, "step": 1640 }, { "epoch": 547.88, "learning_rate": 0.0001, "loss": 0.0014, "step": 1644 }, { "epoch": 549.29, "learning_rate": 0.0001, "loss": 0.0016, "step": 1648 }, { "epoch": 550.59, "learning_rate": 0.0001, "loss": 0.0014, "step": 1652 }, { "epoch": 551.88, "learning_rate": 0.0001, "loss": 0.0012, "step": 1656 }, { "epoch": 553.29, "learning_rate": 0.0001, "loss": 0.0015, "step": 1660 }, { "epoch": 554.59, "learning_rate": 0.0001, "loss": 0.0012, "step": 1664 }, { "epoch": 554.59, "eval_exact_match": 0.511605415860735, "eval_exec": 0.5241779497098646, "eval_loss": 0.4577661156654358, "eval_runtime": 175.2544, "eval_samples_per_second": 5.9, "step": 1664 }, { "epoch": 555.88, "learning_rate": 0.0001, "loss": 0.0014, "step": 1668 }, { "epoch": 557.29, "learning_rate": 0.0001, "loss": 0.0014, "step": 1672 }, { "epoch": 558.59, "learning_rate": 0.0001, "loss": 0.0013, "step": 1676 }, { "epoch": 559.88, "learning_rate": 0.0001, "loss": 0.0014, "step": 1680 }, { "epoch": 561.29, "learning_rate": 0.0001, "loss": 0.0015, "step": 1684 }, { "epoch": 562.59, "learning_rate": 0.0001, "loss": 0.003, "step": 1688 }, { "epoch": 563.88, "learning_rate": 0.0001, "loss": 0.0022, "step": 1692 }, { "epoch": 565.29, "learning_rate": 0.0001, "loss": 0.0016, "step": 1696 }, { "epoch": 566.59, "learning_rate": 0.0001, "loss": 0.0013, "step": 1700 }, { "epoch": 567.88, "learning_rate": 0.0001, "loss": 0.0014, "step": 1704 }, { "epoch": 569.29, "learning_rate": 0.0001, "loss": 0.0014, "step": 1708 }, { "epoch": 570.59, "learning_rate": 0.0001, "loss": 0.0014, "step": 1712 }, { "epoch": 571.88, "learning_rate": 0.0001, "loss": 0.0013, "step": 1716 }, { "epoch": 573.29, "learning_rate": 0.0001, "loss": 0.0016, "step": 1720 }, { "epoch": 574.59, "learning_rate": 0.0001, "loss": 0.0011, "step": 1724 }, { "epoch": 575.88, "learning_rate": 0.0001, "loss": 0.0011, "step": 1728 }, { "epoch": 575.88, "eval_exact_match": 0.511605415860735, "eval_exec": 0.52321083172147, "eval_loss": 0.4673304259777069, "eval_runtime": 178.9293, "eval_samples_per_second": 5.779, "step": 1728 }, { "epoch": 577.29, "learning_rate": 0.0001, "loss": 0.0013, "step": 1732 }, { "epoch": 578.59, "learning_rate": 0.0001, "loss": 0.0011, "step": 1736 }, { "epoch": 579.88, "learning_rate": 0.0001, "loss": 0.001, "step": 1740 }, { "epoch": 581.29, "learning_rate": 0.0001, "loss": 0.0012, "step": 1744 }, { "epoch": 582.59, "learning_rate": 0.0001, "loss": 0.0012, "step": 1748 }, { "epoch": 583.88, "learning_rate": 0.0001, "loss": 0.0013, "step": 1752 }, { "epoch": 585.29, "learning_rate": 0.0001, "loss": 0.0015, "step": 1756 }, { "epoch": 586.59, "learning_rate": 0.0001, "loss": 0.0012, "step": 1760 }, { "epoch": 587.88, "learning_rate": 0.0001, "loss": 0.0012, "step": 1764 }, { "epoch": 589.29, "learning_rate": 0.0001, "loss": 0.0012, "step": 1768 }, { "epoch": 590.59, "learning_rate": 0.0001, "loss": 0.0013, "step": 1772 }, { "epoch": 591.88, "learning_rate": 0.0001, "loss": 0.001, "step": 1776 }, { "epoch": 593.29, "learning_rate": 0.0001, "loss": 0.0011, "step": 1780 }, { "epoch": 594.59, "learning_rate": 0.0001, "loss": 0.0011, "step": 1784 }, { "epoch": 595.88, "learning_rate": 0.0001, "loss": 0.0011, "step": 1788 }, { "epoch": 597.29, "learning_rate": 0.0001, "loss": 0.001, "step": 1792 }, { "epoch": 597.29, "eval_exact_match": 0.5087040618955513, "eval_exec": 0.5145067698259188, "eval_loss": 0.4705192744731903, "eval_runtime": 176.1126, "eval_samples_per_second": 5.871, "step": 1792 }, { "epoch": 598.59, "learning_rate": 0.0001, "loss": 0.0012, "step": 1796 }, { "epoch": 599.88, "learning_rate": 0.0001, "loss": 0.0012, "step": 1800 }, { "epoch": 601.29, "learning_rate": 0.0001, "loss": 0.0013, "step": 1804 }, { "epoch": 602.59, "learning_rate": 0.0001, "loss": 0.0011, "step": 1808 }, { "epoch": 603.88, "learning_rate": 0.0001, "loss": 0.0011, "step": 1812 }, { "epoch": 605.29, "learning_rate": 0.0001, "loss": 0.0012, "step": 1816 }, { "epoch": 606.59, "learning_rate": 0.0001, "loss": 0.0011, "step": 1820 }, { "epoch": 607.88, "learning_rate": 0.0001, "loss": 0.001, "step": 1824 }, { "epoch": 609.29, "learning_rate": 0.0001, "loss": 0.0012, "step": 1828 }, { "epoch": 610.59, "learning_rate": 0.0001, "loss": 0.0011, "step": 1832 }, { "epoch": 611.88, "learning_rate": 0.0001, "loss": 0.0014, "step": 1836 }, { "epoch": 613.29, "learning_rate": 0.0001, "loss": 0.0014, "step": 1840 }, { "epoch": 614.59, "learning_rate": 0.0001, "loss": 0.0016, "step": 1844 }, { "epoch": 615.88, "learning_rate": 0.0001, "loss": 0.0041, "step": 1848 }, { "epoch": 617.29, "learning_rate": 0.0001, "loss": 0.0017, "step": 1852 }, { "epoch": 618.59, "learning_rate": 0.0001, "loss": 0.001, "step": 1856 }, { "epoch": 618.59, "eval_exact_match": 0.5038684719535783, "eval_exec": 0.511605415860735, "eval_loss": 0.4638679027557373, "eval_runtime": 178.5071, "eval_samples_per_second": 5.792, "step": 1856 }, { "epoch": 619.88, "learning_rate": 0.0001, "loss": 0.001, "step": 1860 }, { "epoch": 621.29, "learning_rate": 0.0001, "loss": 0.001, "step": 1864 }, { "epoch": 622.59, "learning_rate": 0.0001, "loss": 0.0011, "step": 1868 }, { "epoch": 623.88, "learning_rate": 0.0001, "loss": 0.0011, "step": 1872 }, { "epoch": 625.29, "learning_rate": 0.0001, "loss": 0.001, "step": 1876 }, { "epoch": 626.59, "learning_rate": 0.0001, "loss": 0.0009, "step": 1880 }, { "epoch": 627.88, "learning_rate": 0.0001, "loss": 0.001, "step": 1884 }, { "epoch": 629.29, "learning_rate": 0.0001, "loss": 0.0009, "step": 1888 }, { "epoch": 630.59, "learning_rate": 0.0001, "loss": 0.001, "step": 1892 }, { "epoch": 631.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 1896 }, { "epoch": 633.29, "learning_rate": 0.0001, "loss": 0.0013, "step": 1900 }, { "epoch": 634.59, "learning_rate": 0.0001, "loss": 0.0009, "step": 1904 }, { "epoch": 635.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 1908 }, { "epoch": 637.29, "learning_rate": 0.0001, "loss": 0.0009, "step": 1912 }, { "epoch": 638.59, "learning_rate": 0.0001, "loss": 0.0009, "step": 1916 }, { "epoch": 639.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 1920 }, { "epoch": 639.88, "eval_exact_match": 0.5096711798839458, "eval_exec": 0.5193423597678917, "eval_loss": 0.47883152961730957, "eval_runtime": 183.0234, "eval_samples_per_second": 5.65, "step": 1920 }, { "epoch": 641.29, "learning_rate": 0.0001, "loss": 0.0009, "step": 1924 }, { "epoch": 642.59, "learning_rate": 0.0001, "loss": 0.0007, "step": 1928 }, { "epoch": 643.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 1932 }, { "epoch": 645.29, "learning_rate": 0.0001, "loss": 0.001, "step": 1936 }, { "epoch": 646.59, "learning_rate": 0.0001, "loss": 0.0009, "step": 1940 }, { "epoch": 647.88, "learning_rate": 0.0001, "loss": 0.0013, "step": 1944 }, { "epoch": 649.29, "learning_rate": 0.0001, "loss": 0.002, "step": 1948 }, { "epoch": 650.59, "learning_rate": 0.0001, "loss": 0.001, "step": 1952 }, { "epoch": 651.88, "learning_rate": 0.0001, "loss": 0.001, "step": 1956 }, { "epoch": 653.29, "learning_rate": 0.0001, "loss": 0.0011, "step": 1960 }, { "epoch": 654.59, "learning_rate": 0.0001, "loss": 0.0011, "step": 1964 }, { "epoch": 655.88, "learning_rate": 0.0001, "loss": 0.001, "step": 1968 }, { "epoch": 657.29, "learning_rate": 0.0001, "loss": 0.0011, "step": 1972 }, { "epoch": 658.59, "learning_rate": 0.0001, "loss": 0.001, "step": 1976 }, { "epoch": 659.88, "learning_rate": 0.0001, "loss": 0.001, "step": 1980 }, { "epoch": 661.29, "learning_rate": 0.0001, "loss": 0.001, "step": 1984 }, { "epoch": 661.29, "eval_exact_match": 0.504835589941973, "eval_exec": 0.5203094777562862, "eval_loss": 0.4922162890434265, "eval_runtime": 181.7058, "eval_samples_per_second": 5.691, "step": 1984 }, { "epoch": 662.59, "learning_rate": 0.0001, "loss": 0.0008, "step": 1988 }, { "epoch": 663.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 1992 }, { "epoch": 665.29, "learning_rate": 0.0001, "loss": 0.0009, "step": 1996 }, { "epoch": 666.59, "learning_rate": 0.0001, "loss": 0.0009, "step": 2000 }, { "epoch": 667.88, "learning_rate": 0.0001, "loss": 0.0011, "step": 2004 }, { "epoch": 669.29, "learning_rate": 0.0001, "loss": 0.0008, "step": 2008 }, { "epoch": 670.59, "learning_rate": 0.0001, "loss": 0.0009, "step": 2012 }, { "epoch": 671.88, "learning_rate": 0.0001, "loss": 0.0011, "step": 2016 }, { "epoch": 673.29, "learning_rate": 0.0001, "loss": 0.001, "step": 2020 }, { "epoch": 674.59, "learning_rate": 0.0001, "loss": 0.0008, "step": 2024 }, { "epoch": 675.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 2028 }, { "epoch": 677.29, "learning_rate": 0.0001, "loss": 0.0009, "step": 2032 }, { "epoch": 678.59, "learning_rate": 0.0001, "loss": 0.0009, "step": 2036 }, { "epoch": 679.88, "learning_rate": 0.0001, "loss": 0.001, "step": 2040 }, { "epoch": 681.29, "learning_rate": 0.0001, "loss": 0.0009, "step": 2044 }, { "epoch": 682.59, "learning_rate": 0.0001, "loss": 0.0009, "step": 2048 }, { "epoch": 682.59, "eval_exact_match": 0.5164410058027079, "eval_exec": 0.5290135396518375, "eval_loss": 0.47683951258659363, "eval_runtime": 185.6081, "eval_samples_per_second": 5.571, "step": 2048 }, { "epoch": 683.88, "learning_rate": 0.0001, "loss": 0.0009, "step": 2052 }, { "epoch": 685.29, "learning_rate": 0.0001, "loss": 0.0011, "step": 2056 }, { "epoch": 686.59, "learning_rate": 0.0001, "loss": 0.0008, "step": 2060 }, { "epoch": 687.88, "learning_rate": 0.0001, "loss": 0.0007, "step": 2064 }, { "epoch": 689.29, "learning_rate": 0.0001, "loss": 0.0008, "step": 2068 }, { "epoch": 690.59, "learning_rate": 0.0001, "loss": 0.0007, "step": 2072 }, { "epoch": 691.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 2076 }, { "epoch": 693.29, "learning_rate": 0.0001, "loss": 0.001, "step": 2080 }, { "epoch": 694.59, "learning_rate": 0.0001, "loss": 0.0009, "step": 2084 }, { "epoch": 695.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 2088 }, { "epoch": 697.29, "learning_rate": 0.0001, "loss": 0.0008, "step": 2092 }, { "epoch": 698.59, "learning_rate": 0.0001, "loss": 0.0007, "step": 2096 }, { "epoch": 699.88, "learning_rate": 0.0001, "loss": 0.0008, "step": 2100 }, { "epoch": 701.29, "learning_rate": 0.0001, "loss": 0.0009, "step": 2104 }, { "epoch": 702.59, "learning_rate": 0.0001, "loss": 0.0046, "step": 2108 }, { "epoch": 703.88, "learning_rate": 0.0001, "loss": 0.0015, "step": 2112 }, { "epoch": 703.88, "eval_exact_match": 0.5241779497098646, "eval_exec": 0.5338491295938105, "eval_loss": 0.47757720947265625, "eval_runtime": 180.9881, "eval_samples_per_second": 5.713, "step": 2112 } ], "max_steps": 9216, "num_train_epochs": 3072, "total_flos": 2.1502023256705536e+18, "trial_name": null, "trial_params": null }