{ "best_metric": 0.049902621656656265, "best_model_checkpoint": "/content/drive/MyDrive/Hkth_data/WNS Hackathon/model/checkpoint-1400", "epoch": 3.953488372093023, "eval_steps": 100, "global_step": 1700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.00019895348837209301, "loss": 0.3296, "step": 10 }, { "epoch": 0.05, "learning_rate": 0.00019779069767441863, "loss": 0.3506, "step": 20 }, { "epoch": 0.07, "learning_rate": 0.0001966279069767442, "loss": 0.3365, "step": 30 }, { "epoch": 0.09, "learning_rate": 0.0001954651162790698, "loss": 0.1444, "step": 40 }, { "epoch": 0.12, "learning_rate": 0.00019430232558139535, "loss": 0.2242, "step": 50 }, { "epoch": 0.14, "learning_rate": 0.00019313953488372094, "loss": 0.1912, "step": 60 }, { "epoch": 0.16, "learning_rate": 0.00019197674418604653, "loss": 0.1604, "step": 70 }, { "epoch": 0.19, "learning_rate": 0.0001908139534883721, "loss": 0.2392, "step": 80 }, { "epoch": 0.21, "learning_rate": 0.0001896511627906977, "loss": 0.1543, "step": 90 }, { "epoch": 0.23, "learning_rate": 0.00018848837209302325, "loss": 0.3238, "step": 100 }, { "epoch": 0.23, "eval_accuracy": 0.9471947194719472, "eval_loss": 0.1719779074192047, "eval_runtime": 418.2527, "eval_samples_per_second": 2.898, "eval_steps_per_second": 0.363, "step": 100 }, { "epoch": 0.26, "learning_rate": 0.00018732558139534884, "loss": 0.2097, "step": 110 }, { "epoch": 0.28, "learning_rate": 0.00018616279069767443, "loss": 0.1673, "step": 120 }, { "epoch": 0.3, "learning_rate": 0.00018500000000000002, "loss": 0.2386, "step": 130 }, { "epoch": 0.33, "learning_rate": 0.0001838372093023256, "loss": 0.3442, "step": 140 }, { "epoch": 0.35, "learning_rate": 0.00018267441860465118, "loss": 0.2316, "step": 150 }, { "epoch": 0.37, "learning_rate": 0.00018151162790697674, "loss": 0.1904, "step": 160 }, { "epoch": 0.4, "learning_rate": 0.00018034883720930233, "loss": 0.1186, "step": 170 }, { "epoch": 0.42, "learning_rate": 0.00017918604651162793, "loss": 0.145, "step": 180 }, { "epoch": 0.44, "learning_rate": 0.0001780232558139535, "loss": 0.1418, "step": 190 }, { "epoch": 0.47, "learning_rate": 0.00017686046511627908, "loss": 0.1078, "step": 200 }, { "epoch": 0.47, "eval_accuracy": 0.9562706270627063, "eval_loss": 0.14491133391857147, "eval_runtime": 43.7761, "eval_samples_per_second": 27.686, "eval_steps_per_second": 3.472, "step": 200 }, { "epoch": 0.49, "learning_rate": 0.00017569767441860464, "loss": 0.0595, "step": 210 }, { "epoch": 0.51, "learning_rate": 0.00017453488372093023, "loss": 0.0677, "step": 220 }, { "epoch": 0.53, "learning_rate": 0.00017337209302325583, "loss": 0.1667, "step": 230 }, { "epoch": 0.56, "learning_rate": 0.00017220930232558142, "loss": 0.199, "step": 240 }, { "epoch": 0.58, "learning_rate": 0.00017104651162790698, "loss": 0.2125, "step": 250 }, { "epoch": 0.6, "learning_rate": 0.00016988372093023257, "loss": 0.2511, "step": 260 }, { "epoch": 0.63, "learning_rate": 0.00016872093023255816, "loss": 0.1656, "step": 270 }, { "epoch": 0.65, "learning_rate": 0.00016755813953488373, "loss": 0.1384, "step": 280 }, { "epoch": 0.67, "learning_rate": 0.00016639534883720932, "loss": 0.0628, "step": 290 }, { "epoch": 0.7, "learning_rate": 0.00016534883720930235, "loss": 0.2622, "step": 300 }, { "epoch": 0.7, "eval_accuracy": 0.9249174917491749, "eval_loss": 0.18467029929161072, "eval_runtime": 43.6859, "eval_samples_per_second": 27.743, "eval_steps_per_second": 3.479, "step": 300 }, { "epoch": 0.72, "learning_rate": 0.0001641860465116279, "loss": 0.1592, "step": 310 }, { "epoch": 0.74, "learning_rate": 0.0001630232558139535, "loss": 0.1944, "step": 320 }, { "epoch": 0.77, "learning_rate": 0.00016186046511627907, "loss": 0.0672, "step": 330 }, { "epoch": 0.79, "learning_rate": 0.00016069767441860466, "loss": 0.0579, "step": 340 }, { "epoch": 0.81, "learning_rate": 0.00015953488372093025, "loss": 0.0437, "step": 350 }, { "epoch": 0.84, "learning_rate": 0.0001583720930232558, "loss": 0.308, "step": 360 }, { "epoch": 0.86, "learning_rate": 0.0001572093023255814, "loss": 0.2108, "step": 370 }, { "epoch": 0.88, "learning_rate": 0.00015604651162790697, "loss": 0.1127, "step": 380 }, { "epoch": 0.91, "learning_rate": 0.00015488372093023256, "loss": 0.1465, "step": 390 }, { "epoch": 0.93, "learning_rate": 0.00015372093023255815, "loss": 0.0433, "step": 400 }, { "epoch": 0.93, "eval_accuracy": 0.9702970297029703, "eval_loss": 0.110055111348629, "eval_runtime": 44.1236, "eval_samples_per_second": 27.468, "eval_steps_per_second": 3.445, "step": 400 }, { "epoch": 0.95, "learning_rate": 0.00015255813953488374, "loss": 0.0939, "step": 410 }, { "epoch": 0.98, "learning_rate": 0.0001513953488372093, "loss": 0.2253, "step": 420 }, { "epoch": 1.0, "learning_rate": 0.0001502325581395349, "loss": 0.0982, "step": 430 }, { "epoch": 1.02, "learning_rate": 0.0001490697674418605, "loss": 0.0136, "step": 440 }, { "epoch": 1.05, "learning_rate": 0.00014790697674418605, "loss": 0.0822, "step": 450 }, { "epoch": 1.07, "learning_rate": 0.00014674418604651164, "loss": 0.0479, "step": 460 }, { "epoch": 1.09, "learning_rate": 0.0001455813953488372, "loss": 0.1399, "step": 470 }, { "epoch": 1.12, "learning_rate": 0.0001444186046511628, "loss": 0.0975, "step": 480 }, { "epoch": 1.14, "learning_rate": 0.00014325581395348836, "loss": 0.0308, "step": 490 }, { "epoch": 1.16, "learning_rate": 0.00014209302325581395, "loss": 0.0962, "step": 500 }, { "epoch": 1.16, "eval_accuracy": 0.9702970297029703, "eval_loss": 0.09571434557437897, "eval_runtime": 44.8921, "eval_samples_per_second": 26.998, "eval_steps_per_second": 3.386, "step": 500 }, { "epoch": 1.19, "learning_rate": 0.00014093023255813954, "loss": 0.12, "step": 510 }, { "epoch": 1.21, "learning_rate": 0.00013976744186046513, "loss": 0.0626, "step": 520 }, { "epoch": 1.23, "learning_rate": 0.00013860465116279072, "loss": 0.0878, "step": 530 }, { "epoch": 1.26, "learning_rate": 0.0001374418604651163, "loss": 0.1058, "step": 540 }, { "epoch": 1.28, "learning_rate": 0.00013627906976744188, "loss": 0.0938, "step": 550 }, { "epoch": 1.3, "learning_rate": 0.00013511627906976744, "loss": 0.0142, "step": 560 }, { "epoch": 1.33, "learning_rate": 0.00013395348837209303, "loss": 0.0696, "step": 570 }, { "epoch": 1.35, "learning_rate": 0.0001327906976744186, "loss": 0.0015, "step": 580 }, { "epoch": 1.37, "learning_rate": 0.0001316279069767442, "loss": 0.0736, "step": 590 }, { "epoch": 1.4, "learning_rate": 0.00013046511627906975, "loss": 0.0479, "step": 600 }, { "epoch": 1.4, "eval_accuracy": 0.9678217821782178, "eval_loss": 0.1245083436369896, "eval_runtime": 44.2549, "eval_samples_per_second": 27.387, "eval_steps_per_second": 3.435, "step": 600 }, { "epoch": 1.42, "learning_rate": 0.00012930232558139534, "loss": 0.0414, "step": 610 }, { "epoch": 1.44, "learning_rate": 0.00012813953488372093, "loss": 0.0234, "step": 620 }, { "epoch": 1.47, "learning_rate": 0.00012697674418604653, "loss": 0.0148, "step": 630 }, { "epoch": 1.49, "learning_rate": 0.00012581395348837212, "loss": 0.0723, "step": 640 }, { "epoch": 1.51, "learning_rate": 0.00012465116279069768, "loss": 0.1016, "step": 650 }, { "epoch": 1.53, "learning_rate": 0.00012348837209302327, "loss": 0.0057, "step": 660 }, { "epoch": 1.56, "learning_rate": 0.00012232558139534883, "loss": 0.0512, "step": 670 }, { "epoch": 1.58, "learning_rate": 0.00012116279069767443, "loss": 0.0263, "step": 680 }, { "epoch": 1.6, "learning_rate": 0.00012, "loss": 0.0562, "step": 690 }, { "epoch": 1.63, "learning_rate": 0.00011883720930232558, "loss": 0.0594, "step": 700 }, { "epoch": 1.63, "eval_accuracy": 0.985973597359736, "eval_loss": 0.06257853657007217, "eval_runtime": 44.5792, "eval_samples_per_second": 27.188, "eval_steps_per_second": 3.41, "step": 700 }, { "epoch": 1.65, "learning_rate": 0.00011767441860465116, "loss": 0.0027, "step": 710 }, { "epoch": 1.67, "learning_rate": 0.00011651162790697674, "loss": 0.0164, "step": 720 }, { "epoch": 1.7, "learning_rate": 0.00011534883720930234, "loss": 0.0406, "step": 730 }, { "epoch": 1.72, "learning_rate": 0.00011418604651162792, "loss": 0.0317, "step": 740 }, { "epoch": 1.74, "learning_rate": 0.0001130232558139535, "loss": 0.0539, "step": 750 }, { "epoch": 1.77, "learning_rate": 0.00011186046511627907, "loss": 0.0158, "step": 760 }, { "epoch": 1.79, "learning_rate": 0.00011069767441860466, "loss": 0.1054, "step": 770 }, { "epoch": 1.81, "learning_rate": 0.00010953488372093024, "loss": 0.0112, "step": 780 }, { "epoch": 1.84, "learning_rate": 0.00010837209302325582, "loss": 0.019, "step": 790 }, { "epoch": 1.86, "learning_rate": 0.0001072093023255814, "loss": 0.027, "step": 800 }, { "epoch": 1.86, "eval_accuracy": 0.985973597359736, "eval_loss": 0.056669652462005615, "eval_runtime": 44.7355, "eval_samples_per_second": 27.093, "eval_steps_per_second": 3.398, "step": 800 }, { "epoch": 1.88, "learning_rate": 0.00010604651162790697, "loss": 0.0206, "step": 810 }, { "epoch": 1.91, "learning_rate": 0.00010488372093023255, "loss": 0.0296, "step": 820 }, { "epoch": 1.93, "learning_rate": 0.00010372093023255815, "loss": 0.0775, "step": 830 }, { "epoch": 1.95, "learning_rate": 0.00010255813953488373, "loss": 0.0278, "step": 840 }, { "epoch": 1.98, "learning_rate": 0.00010139534883720931, "loss": 0.0091, "step": 850 }, { "epoch": 2.0, "learning_rate": 0.0001002325581395349, "loss": 0.0019, "step": 860 }, { "epoch": 2.02, "learning_rate": 9.906976744186048e-05, "loss": 0.0014, "step": 870 }, { "epoch": 2.05, "learning_rate": 9.790697674418605e-05, "loss": 0.0044, "step": 880 }, { "epoch": 2.07, "learning_rate": 9.674418604651163e-05, "loss": 0.0002, "step": 890 }, { "epoch": 2.09, "learning_rate": 9.558139534883721e-05, "loss": 0.0003, "step": 900 }, { "epoch": 2.09, "eval_accuracy": 0.985973597359736, "eval_loss": 0.0684327706694603, "eval_runtime": 44.3211, "eval_samples_per_second": 27.346, "eval_steps_per_second": 3.43, "step": 900 }, { "epoch": 2.12, "learning_rate": 9.441860465116279e-05, "loss": 0.0243, "step": 910 }, { "epoch": 2.14, "learning_rate": 9.325581395348838e-05, "loss": 0.0383, "step": 920 }, { "epoch": 2.16, "learning_rate": 9.209302325581396e-05, "loss": 0.0046, "step": 930 }, { "epoch": 2.19, "learning_rate": 9.093023255813953e-05, "loss": 0.0955, "step": 940 }, { "epoch": 2.21, "learning_rate": 8.976744186046512e-05, "loss": 0.0122, "step": 950 }, { "epoch": 2.23, "learning_rate": 8.86046511627907e-05, "loss": 0.0197, "step": 960 }, { "epoch": 2.26, "learning_rate": 8.744186046511629e-05, "loss": 0.0197, "step": 970 }, { "epoch": 2.28, "learning_rate": 8.627906976744187e-05, "loss": 0.0025, "step": 980 }, { "epoch": 2.3, "learning_rate": 8.511627906976745e-05, "loss": 0.0001, "step": 990 }, { "epoch": 2.33, "learning_rate": 8.395348837209302e-05, "loss": 0.0272, "step": 1000 }, { "epoch": 2.33, "eval_accuracy": 0.9851485148514851, "eval_loss": 0.07581602782011032, "eval_runtime": 44.565, "eval_samples_per_second": 27.196, "eval_steps_per_second": 3.411, "step": 1000 }, { "epoch": 2.35, "learning_rate": 8.27906976744186e-05, "loss": 0.0516, "step": 1010 }, { "epoch": 2.37, "learning_rate": 8.162790697674419e-05, "loss": 0.001, "step": 1020 }, { "epoch": 2.4, "learning_rate": 8.046511627906977e-05, "loss": 0.0027, "step": 1030 }, { "epoch": 2.42, "learning_rate": 7.930232558139535e-05, "loss": 0.0002, "step": 1040 }, { "epoch": 2.44, "learning_rate": 7.813953488372094e-05, "loss": 0.0683, "step": 1050 }, { "epoch": 2.47, "learning_rate": 7.697674418604652e-05, "loss": 0.0016, "step": 1060 }, { "epoch": 2.49, "learning_rate": 7.58139534883721e-05, "loss": 0.0195, "step": 1070 }, { "epoch": 2.51, "learning_rate": 7.465116279069768e-05, "loss": 0.0818, "step": 1080 }, { "epoch": 2.53, "learning_rate": 7.348837209302326e-05, "loss": 0.0016, "step": 1090 }, { "epoch": 2.56, "learning_rate": 7.232558139534884e-05, "loss": 0.0275, "step": 1100 }, { "epoch": 2.56, "eval_accuracy": 0.9876237623762376, "eval_loss": 0.06026723235845566, "eval_runtime": 44.5699, "eval_samples_per_second": 27.193, "eval_steps_per_second": 3.41, "step": 1100 }, { "epoch": 2.58, "learning_rate": 7.116279069767442e-05, "loss": 0.0005, "step": 1110 }, { "epoch": 2.6, "learning_rate": 7e-05, "loss": 0.0017, "step": 1120 }, { "epoch": 2.63, "learning_rate": 6.883720930232558e-05, "loss": 0.0004, "step": 1130 }, { "epoch": 2.65, "learning_rate": 6.767441860465116e-05, "loss": 0.0023, "step": 1140 }, { "epoch": 2.67, "learning_rate": 6.651162790697675e-05, "loss": 0.0004, "step": 1150 }, { "epoch": 2.7, "learning_rate": 6.534883720930233e-05, "loss": 0.0017, "step": 1160 }, { "epoch": 2.72, "learning_rate": 6.418604651162791e-05, "loss": 0.0005, "step": 1170 }, { "epoch": 2.74, "learning_rate": 6.30232558139535e-05, "loss": 0.0001, "step": 1180 }, { "epoch": 2.77, "learning_rate": 6.186046511627908e-05, "loss": 0.0013, "step": 1190 }, { "epoch": 2.79, "learning_rate": 6.0697674418604654e-05, "loss": 0.0007, "step": 1200 }, { "epoch": 2.79, "eval_accuracy": 0.9900990099009901, "eval_loss": 0.05105312541127205, "eval_runtime": 44.7068, "eval_samples_per_second": 27.11, "eval_steps_per_second": 3.4, "step": 1200 }, { "epoch": 2.81, "learning_rate": 5.953488372093024e-05, "loss": 0.0002, "step": 1210 }, { "epoch": 2.84, "learning_rate": 5.8372093023255815e-05, "loss": 0.0002, "step": 1220 }, { "epoch": 2.86, "learning_rate": 5.720930232558139e-05, "loss": 0.0024, "step": 1230 }, { "epoch": 2.88, "learning_rate": 5.6046511627906984e-05, "loss": 0.0009, "step": 1240 }, { "epoch": 2.91, "learning_rate": 5.488372093023256e-05, "loss": 0.0002, "step": 1250 }, { "epoch": 2.93, "learning_rate": 5.3720930232558145e-05, "loss": 0.0497, "step": 1260 }, { "epoch": 2.95, "learning_rate": 5.255813953488372e-05, "loss": 0.0006, "step": 1270 }, { "epoch": 2.98, "learning_rate": 5.13953488372093e-05, "loss": 0.0004, "step": 1280 }, { "epoch": 3.0, "learning_rate": 5.023255813953489e-05, "loss": 0.0001, "step": 1290 }, { "epoch": 3.02, "learning_rate": 4.906976744186046e-05, "loss": 0.0001, "step": 1300 }, { "epoch": 3.02, "eval_accuracy": 0.9900990099009901, "eval_loss": 0.061022818088531494, "eval_runtime": 44.3547, "eval_samples_per_second": 27.325, "eval_steps_per_second": 3.427, "step": 1300 }, { "epoch": 3.05, "learning_rate": 4.790697674418605e-05, "loss": 0.0241, "step": 1310 }, { "epoch": 3.07, "learning_rate": 4.674418604651163e-05, "loss": 0.0005, "step": 1320 }, { "epoch": 3.09, "learning_rate": 4.5581395348837214e-05, "loss": 0.0003, "step": 1330 }, { "epoch": 3.12, "learning_rate": 4.441860465116279e-05, "loss": 0.0002, "step": 1340 }, { "epoch": 3.14, "learning_rate": 4.325581395348837e-05, "loss": 0.0006, "step": 1350 }, { "epoch": 3.16, "learning_rate": 4.209302325581396e-05, "loss": 0.0001, "step": 1360 }, { "epoch": 3.19, "learning_rate": 4.093023255813954e-05, "loss": 0.0001, "step": 1370 }, { "epoch": 3.21, "learning_rate": 3.9767441860465115e-05, "loss": 0.0005, "step": 1380 }, { "epoch": 3.23, "learning_rate": 3.86046511627907e-05, "loss": 0.0001, "step": 1390 }, { "epoch": 3.26, "learning_rate": 3.7441860465116276e-05, "loss": 0.0001, "step": 1400 }, { "epoch": 3.26, "eval_accuracy": 0.990924092409241, "eval_loss": 0.049902621656656265, "eval_runtime": 44.1915, "eval_samples_per_second": 27.426, "eval_steps_per_second": 3.44, "step": 1400 }, { "epoch": 3.28, "learning_rate": 3.627906976744187e-05, "loss": 0.0002, "step": 1410 }, { "epoch": 3.3, "learning_rate": 3.5116279069767445e-05, "loss": 0.0001, "step": 1420 }, { "epoch": 3.33, "learning_rate": 3.395348837209302e-05, "loss": 0.0001, "step": 1430 }, { "epoch": 3.35, "learning_rate": 3.2790697674418606e-05, "loss": 0.0001, "step": 1440 }, { "epoch": 3.37, "learning_rate": 3.162790697674419e-05, "loss": 0.0001, "step": 1450 }, { "epoch": 3.4, "learning_rate": 3.0465116279069768e-05, "loss": 0.0001, "step": 1460 }, { "epoch": 3.42, "learning_rate": 2.9302325581395352e-05, "loss": 0.0001, "step": 1470 }, { "epoch": 3.44, "learning_rate": 2.813953488372093e-05, "loss": 0.0001, "step": 1480 }, { "epoch": 3.47, "learning_rate": 2.6976744186046517e-05, "loss": 0.0002, "step": 1490 }, { "epoch": 3.49, "learning_rate": 2.5813953488372094e-05, "loss": 0.0001, "step": 1500 }, { "epoch": 3.49, "eval_accuracy": 0.990924092409241, "eval_loss": 0.05676552653312683, "eval_runtime": 44.3782, "eval_samples_per_second": 27.311, "eval_steps_per_second": 3.425, "step": 1500 }, { "epoch": 3.51, "learning_rate": 2.4651162790697675e-05, "loss": 0.0001, "step": 1510 }, { "epoch": 3.53, "learning_rate": 2.3488372093023256e-05, "loss": 0.0, "step": 1520 }, { "epoch": 3.56, "learning_rate": 2.2325581395348837e-05, "loss": 0.0, "step": 1530 }, { "epoch": 3.58, "learning_rate": 2.116279069767442e-05, "loss": 0.0001, "step": 1540 }, { "epoch": 3.6, "learning_rate": 2e-05, "loss": 0.0, "step": 1550 }, { "epoch": 3.63, "learning_rate": 1.8837209302325582e-05, "loss": 0.0001, "step": 1560 }, { "epoch": 3.65, "learning_rate": 1.7674418604651163e-05, "loss": 0.0, "step": 1570 }, { "epoch": 3.67, "learning_rate": 1.6511627906976744e-05, "loss": 0.0, "step": 1580 }, { "epoch": 3.7, "learning_rate": 1.5348837209302328e-05, "loss": 0.0004, "step": 1590 }, { "epoch": 3.72, "learning_rate": 1.4186046511627907e-05, "loss": 0.0001, "step": 1600 }, { "epoch": 3.72, "eval_accuracy": 0.9917491749174917, "eval_loss": 0.05982881784439087, "eval_runtime": 44.2442, "eval_samples_per_second": 27.393, "eval_steps_per_second": 3.435, "step": 1600 }, { "epoch": 3.74, "learning_rate": 1.3023255813953488e-05, "loss": 0.0001, "step": 1610 }, { "epoch": 3.77, "learning_rate": 1.186046511627907e-05, "loss": 0.0055, "step": 1620 }, { "epoch": 3.79, "learning_rate": 1.0697674418604651e-05, "loss": 0.0001, "step": 1630 }, { "epoch": 3.81, "learning_rate": 9.534883720930234e-06, "loss": 0.0, "step": 1640 }, { "epoch": 3.84, "learning_rate": 8.372093023255815e-06, "loss": 0.0001, "step": 1650 }, { "epoch": 3.86, "learning_rate": 7.209302325581396e-06, "loss": 0.0001, "step": 1660 }, { "epoch": 3.88, "learning_rate": 6.046511627906977e-06, "loss": 0.0, "step": 1670 }, { "epoch": 3.91, "learning_rate": 4.883720930232559e-06, "loss": 0.0, "step": 1680 }, { "epoch": 3.93, "learning_rate": 3.72093023255814e-06, "loss": 0.0, "step": 1690 }, { "epoch": 3.95, "learning_rate": 2.558139534883721e-06, "loss": 0.0, "step": 1700 }, { "epoch": 3.95, "eval_accuracy": 0.990924092409241, "eval_loss": 0.05301262065768242, "eval_runtime": 44.0827, "eval_samples_per_second": 27.494, "eval_steps_per_second": 3.448, "step": 1700 } ], "logging_steps": 10, "max_steps": 1720, "num_train_epochs": 4, "save_steps": 100, "total_flos": 6.205770439364837e+18, "trial_name": null, "trial_params": null }