{ "best_metric": 0.27745380997657776, "best_model_checkpoint": "uk-mt5-base-gec/checkpoint-1350", "epoch": 3.0177409816676524, "global_step": 2550, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 0.000970414201183432, "loss": 2.8559, "step": 150 }, { "epoch": 0.18, "eval_google_bleu": 0.4161949252310698, "eval_loss": 0.3385647237300873, "eval_runtime": 574.9325, "eval_samples_per_second": 5.227, "eval_steps_per_second": 0.327, "step": 150 }, { "epoch": 0.35, "learning_rate": 0.000940828402366864, "loss": 0.353, "step": 300 }, { "epoch": 0.35, "eval_google_bleu": 0.4197968462318859, "eval_loss": 0.30946752429008484, "eval_runtime": 574.235, "eval_samples_per_second": 5.233, "eval_steps_per_second": 0.327, "step": 300 }, { "epoch": 0.53, "learning_rate": 0.0009112426035502958, "loss": 0.3433, "step": 450 }, { "epoch": 0.53, "eval_google_bleu": 0.42082026039416087, "eval_loss": 0.30230990052223206, "eval_runtime": 573.7714, "eval_samples_per_second": 5.237, "eval_steps_per_second": 0.328, "step": 450 }, { "epoch": 0.71, "learning_rate": 0.0008816568047337278, "loss": 0.3248, "step": 600 }, { "epoch": 0.71, "eval_google_bleu": 0.4194576852971206, "eval_loss": 0.2984682619571686, "eval_runtime": 574.3927, "eval_samples_per_second": 5.232, "eval_steps_per_second": 0.327, "step": 600 }, { "epoch": 0.89, "learning_rate": 0.0008520710059171598, "loss": 0.3046, "step": 750 }, { "epoch": 0.89, "eval_google_bleu": 0.4217920913982863, "eval_loss": 0.28489378094673157, "eval_runtime": 574.7617, "eval_samples_per_second": 5.228, "eval_steps_per_second": 0.327, "step": 750 }, { "epoch": 1.07, "learning_rate": 0.0008224852071005917, "loss": 0.2625, "step": 900 }, { "epoch": 1.07, "eval_google_bleu": 0.4213197969543147, "eval_loss": 0.29553136229515076, "eval_runtime": 573.965, "eval_samples_per_second": 5.236, "eval_steps_per_second": 0.328, "step": 900 }, { "epoch": 1.24, "learning_rate": 0.0007928994082840238, "loss": 0.2127, "step": 1050 }, { "epoch": 1.24, "eval_google_bleu": 0.4211682670038433, "eval_loss": 0.30292925238609314, "eval_runtime": 574.6793, "eval_samples_per_second": 5.229, "eval_steps_per_second": 0.327, "step": 1050 }, { "epoch": 1.42, "learning_rate": 0.0007633136094674556, "loss": 0.224, "step": 1200 }, { "epoch": 1.42, "eval_google_bleu": 0.4224696723929531, "eval_loss": 0.3068563640117645, "eval_runtime": 574.5278, "eval_samples_per_second": 5.23, "eval_steps_per_second": 0.327, "step": 1200 }, { "epoch": 1.6, "learning_rate": 0.0007337278106508876, "loss": 0.2332, "step": 1350 }, { "epoch": 1.6, "eval_google_bleu": 0.41897146578336036, "eval_loss": 0.27745380997657776, "eval_runtime": 575.5901, "eval_samples_per_second": 5.221, "eval_steps_per_second": 0.327, "step": 1350 }, { "epoch": 1.77, "learning_rate": 0.0007041420118343196, "loss": 0.238, "step": 1500 }, { "epoch": 1.77, "eval_google_bleu": 0.4164072628882445, "eval_loss": 0.2903579771518707, "eval_runtime": 573.985, "eval_samples_per_second": 5.235, "eval_steps_per_second": 0.328, "step": 1500 }, { "epoch": 1.95, "learning_rate": 0.0006745562130177515, "loss": 0.2297, "step": 1650 }, { "epoch": 1.95, "eval_google_bleu": 0.41891172732452214, "eval_loss": 0.2825988233089447, "eval_runtime": 574.7646, "eval_samples_per_second": 5.228, "eval_steps_per_second": 0.327, "step": 1650 }, { "epoch": 2.13, "learning_rate": 0.0006449704142011834, "loss": 0.1649, "step": 1800 }, { "epoch": 2.13, "eval_google_bleu": 0.41988701131139533, "eval_loss": 0.304867148399353, "eval_runtime": 575.4878, "eval_samples_per_second": 5.222, "eval_steps_per_second": 0.327, "step": 1800 }, { "epoch": 2.31, "learning_rate": 0.0006153846153846154, "loss": 0.1458, "step": 1950 }, { "epoch": 2.31, "eval_google_bleu": 0.4201902185823143, "eval_loss": 0.3138478994369507, "eval_runtime": 575.2695, "eval_samples_per_second": 5.224, "eval_steps_per_second": 0.327, "step": 1950 }, { "epoch": 2.48, "learning_rate": 0.0005857988165680473, "loss": 0.1564, "step": 2100 }, { "epoch": 2.48, "eval_google_bleu": 0.415792735992587, "eval_loss": 0.3027360141277313, "eval_runtime": 566.4822, "eval_samples_per_second": 5.305, "eval_steps_per_second": 0.332, "step": 2100 }, { "epoch": 2.66, "learning_rate": 0.0005562130177514793, "loss": 0.1572, "step": 2250 }, { "epoch": 2.66, "eval_google_bleu": 0.4212085345156907, "eval_loss": 0.3020596504211426, "eval_runtime": 574.5482, "eval_samples_per_second": 5.23, "eval_steps_per_second": 0.327, "step": 2250 }, { "epoch": 2.84, "learning_rate": 0.0005266272189349113, "loss": 0.159, "step": 2400 }, { "epoch": 2.84, "eval_google_bleu": 0.41866493031642477, "eval_loss": 0.29025933146476746, "eval_runtime": 574.442, "eval_samples_per_second": 5.231, "eval_steps_per_second": 0.327, "step": 2400 }, { "epoch": 3.02, "learning_rate": 0.0004970414201183431, "loss": 0.159, "step": 2550 }, { "epoch": 3.02, "eval_google_bleu": 0.4191764521684672, "eval_loss": 0.3248673677444458, "eval_runtime": 573.9254, "eval_samples_per_second": 5.236, "eval_steps_per_second": 0.328, "step": 2550 } ], "max_steps": 5070, "num_train_epochs": 6, "total_flos": 2.984077609323725e+16, "trial_name": null, "trial_params": null }