{ "best_metric": 0.16060182452201843, "best_model_checkpoint": "AlexWang99/byt5_add/checkpoint-650", "epoch": 50.0, "eval_steps": 500, "global_step": 650, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 4.225934982299805, "eval_runtime": 10.7417, "eval_samples_per_second": 930.95, "eval_steps_per_second": 1.21, "step": 13 }, { "epoch": 2.0, "eval_loss": 2.4177823066711426, "eval_runtime": 10.8153, "eval_samples_per_second": 924.617, "eval_steps_per_second": 1.202, "step": 26 }, { "epoch": 3.0, "eval_loss": 1.9256454706192017, "eval_runtime": 10.7408, "eval_samples_per_second": 931.032, "eval_steps_per_second": 1.21, "step": 39 }, { "epoch": 4.0, "eval_loss": 1.7310363054275513, "eval_runtime": 11.0217, "eval_samples_per_second": 907.305, "eval_steps_per_second": 1.179, "step": 52 }, { "epoch": 5.0, "eval_loss": 1.6576673984527588, "eval_runtime": 10.8631, "eval_samples_per_second": 920.548, "eval_steps_per_second": 1.197, "step": 65 }, { "epoch": 6.0, "eval_loss": 1.6385396718978882, "eval_runtime": 10.9023, "eval_samples_per_second": 917.236, "eval_steps_per_second": 1.192, "step": 78 }, { "epoch": 7.0, "eval_loss": 1.6109799146652222, "eval_runtime": 11.0728, "eval_samples_per_second": 903.112, "eval_steps_per_second": 1.174, "step": 91 }, { "epoch": 8.0, "eval_loss": 1.5810920000076294, "eval_runtime": 10.807, "eval_samples_per_second": 925.329, "eval_steps_per_second": 1.203, "step": 104 }, { "epoch": 9.0, "eval_loss": 1.523699402809143, "eval_runtime": 10.8206, "eval_samples_per_second": 924.164, "eval_steps_per_second": 1.201, "step": 117 }, { "epoch": 10.0, "eval_loss": 1.480878472328186, "eval_runtime": 10.8089, "eval_samples_per_second": 925.161, "eval_steps_per_second": 1.203, "step": 130 }, { "epoch": 11.0, "eval_loss": 1.437759280204773, "eval_runtime": 10.9724, "eval_samples_per_second": 911.378, "eval_steps_per_second": 1.185, "step": 143 }, { "epoch": 12.0, "eval_loss": 1.3976322412490845, "eval_runtime": 10.8773, "eval_samples_per_second": 919.349, "eval_steps_per_second": 1.195, "step": 156 }, { "epoch": 13.0, "eval_loss": 1.346189260482788, "eval_runtime": 10.8878, "eval_samples_per_second": 918.455, "eval_steps_per_second": 1.194, "step": 169 }, { "epoch": 14.0, "eval_loss": 1.2587027549743652, "eval_runtime": 10.7933, "eval_samples_per_second": 926.498, "eval_steps_per_second": 1.204, "step": 182 }, { "epoch": 15.0, "eval_loss": 1.2259936332702637, "eval_runtime": 10.7904, "eval_samples_per_second": 926.753, "eval_steps_per_second": 1.205, "step": 195 }, { "epoch": 16.0, "eval_loss": 1.1018248796463013, "eval_runtime": 11.0439, "eval_samples_per_second": 905.473, "eval_steps_per_second": 1.177, "step": 208 }, { "epoch": 17.0, "eval_loss": 1.0272711515426636, "eval_runtime": 10.8037, "eval_samples_per_second": 925.605, "eval_steps_per_second": 1.203, "step": 221 }, { "epoch": 18.0, "eval_loss": 0.9436440467834473, "eval_runtime": 10.8135, "eval_samples_per_second": 924.773, "eval_steps_per_second": 1.202, "step": 234 }, { "epoch": 19.0, "eval_loss": 0.8007376194000244, "eval_runtime": 10.8029, "eval_samples_per_second": 925.679, "eval_steps_per_second": 1.203, "step": 247 }, { "epoch": 20.0, "eval_loss": 0.6918877959251404, "eval_runtime": 10.8958, "eval_samples_per_second": 917.781, "eval_steps_per_second": 1.193, "step": 260 }, { "epoch": 21.0, "eval_loss": 0.6201093792915344, "eval_runtime": 10.7944, "eval_samples_per_second": 926.41, "eval_steps_per_second": 1.204, "step": 273 }, { "epoch": 22.0, "eval_loss": 0.5485938191413879, "eval_runtime": 10.7982, "eval_samples_per_second": 926.077, "eval_steps_per_second": 1.204, "step": 286 }, { "epoch": 23.0, "eval_loss": 0.480381041765213, "eval_runtime": 10.7925, "eval_samples_per_second": 926.572, "eval_steps_per_second": 1.205, "step": 299 }, { "epoch": 24.0, "eval_loss": 0.40801915526390076, "eval_runtime": 10.7939, "eval_samples_per_second": 926.447, "eval_steps_per_second": 1.204, "step": 312 }, { "epoch": 25.0, "eval_loss": 0.386056512594223, "eval_runtime": 11.0386, "eval_samples_per_second": 905.916, "eval_steps_per_second": 1.178, "step": 325 }, { "epoch": 26.0, "eval_loss": 0.34766075015068054, "eval_runtime": 10.8947, "eval_samples_per_second": 917.876, "eval_steps_per_second": 1.193, "step": 338 }, { "epoch": 27.0, "eval_loss": 0.3181471526622772, "eval_runtime": 10.7999, "eval_samples_per_second": 925.935, "eval_steps_per_second": 1.204, "step": 351 }, { "epoch": 28.0, "eval_loss": 0.2921164631843567, "eval_runtime": 10.8802, "eval_samples_per_second": 919.099, "eval_steps_per_second": 1.195, "step": 364 }, { "epoch": 29.0, "eval_loss": 0.2831590473651886, "eval_runtime": 10.9022, "eval_samples_per_second": 917.242, "eval_steps_per_second": 1.192, "step": 377 }, { "epoch": 30.0, "eval_loss": 0.2693236470222473, "eval_runtime": 10.8973, "eval_samples_per_second": 917.655, "eval_steps_per_second": 1.193, "step": 390 }, { "epoch": 31.0, "eval_loss": 0.24693337082862854, "eval_runtime": 10.8754, "eval_samples_per_second": 919.507, "eval_steps_per_second": 1.195, "step": 403 }, { "epoch": 32.0, "eval_loss": 0.2452966272830963, "eval_runtime": 10.8896, "eval_samples_per_second": 918.306, "eval_steps_per_second": 1.194, "step": 416 }, { "epoch": 33.0, "eval_loss": 0.2312968224287033, "eval_runtime": 10.8133, "eval_samples_per_second": 924.783, "eval_steps_per_second": 1.202, "step": 429 }, { "epoch": 34.0, "eval_loss": 0.213392972946167, "eval_runtime": 10.7935, "eval_samples_per_second": 926.482, "eval_steps_per_second": 1.204, "step": 442 }, { "epoch": 35.0, "eval_loss": 0.21386706829071045, "eval_runtime": 10.809, "eval_samples_per_second": 925.156, "eval_steps_per_second": 1.203, "step": 455 }, { "epoch": 36.0, "eval_loss": 0.20882292091846466, "eval_runtime": 10.7998, "eval_samples_per_second": 925.947, "eval_steps_per_second": 1.204, "step": 468 }, { "epoch": 37.0, "eval_loss": 0.2006647139787674, "eval_runtime": 10.7969, "eval_samples_per_second": 926.189, "eval_steps_per_second": 1.204, "step": 481 }, { "epoch": 38.0, "eval_loss": 0.19596967101097107, "eval_runtime": 10.8081, "eval_samples_per_second": 925.232, "eval_steps_per_second": 1.203, "step": 494 }, { "epoch": 38.46, "learning_rate": 1.153846153846154e-05, "loss": 1.3, "step": 500 }, { "epoch": 39.0, "eval_loss": 0.18297980725765228, "eval_runtime": 10.8038, "eval_samples_per_second": 925.597, "eval_steps_per_second": 1.203, "step": 507 }, { "epoch": 40.0, "eval_loss": 0.17824265360832214, "eval_runtime": 11.0511, "eval_samples_per_second": 904.887, "eval_steps_per_second": 1.176, "step": 520 }, { "epoch": 41.0, "eval_loss": 0.17460107803344727, "eval_runtime": 10.8972, "eval_samples_per_second": 917.67, "eval_steps_per_second": 1.193, "step": 533 }, { "epoch": 42.0, "eval_loss": 0.17409959435462952, "eval_runtime": 10.7994, "eval_samples_per_second": 925.978, "eval_steps_per_second": 1.204, "step": 546 }, { "epoch": 43.0, "eval_loss": 0.17077761888504028, "eval_runtime": 10.8856, "eval_samples_per_second": 918.641, "eval_steps_per_second": 1.194, "step": 559 }, { "epoch": 44.0, "eval_loss": 0.16676855087280273, "eval_runtime": 10.8901, "eval_samples_per_second": 918.263, "eval_steps_per_second": 1.194, "step": 572 }, { "epoch": 45.0, "eval_loss": 0.16503272950649261, "eval_runtime": 10.8935, "eval_samples_per_second": 917.976, "eval_steps_per_second": 1.193, "step": 585 }, { "epoch": 46.0, "eval_loss": 0.1650797426700592, "eval_runtime": 10.8935, "eval_samples_per_second": 917.978, "eval_steps_per_second": 1.193, "step": 598 }, { "epoch": 47.0, "eval_loss": 0.1628640592098236, "eval_runtime": 10.8953, "eval_samples_per_second": 917.828, "eval_steps_per_second": 1.193, "step": 611 }, { "epoch": 48.0, "eval_loss": 0.1627463847398758, "eval_runtime": 10.8324, "eval_samples_per_second": 923.155, "eval_steps_per_second": 1.2, "step": 624 }, { "epoch": 49.0, "eval_loss": 0.16095298528671265, "eval_runtime": 10.8256, "eval_samples_per_second": 923.738, "eval_steps_per_second": 1.201, "step": 637 }, { "epoch": 50.0, "eval_loss": 0.16060182452201843, "eval_runtime": 10.7991, "eval_samples_per_second": 926.006, "eval_steps_per_second": 1.204, "step": 650 } ], "logging_steps": 500, "max_steps": 650, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1.4355462144e+16, "trial_name": null, "trial_params": null }