{ "best_metric": 7.84664249420166, "best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/fict-full-lstm-42/checkpoints/checkpoint-120", "epoch": 0.96, "eval_steps": 10, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 4.600000000000001e-05, "loss": 10.8142, "step": 10 }, { "epoch": 0.08, "eval_loss": 10.805203437805176, "eval_runtime": 2.7628, "eval_samples_per_second": 361.95, "eval_steps_per_second": 45.244, "step": 10 }, { "epoch": 0.16, "learning_rate": 4.2e-05, "loss": 10.7959, "step": 20 }, { "epoch": 0.16, "eval_loss": 10.783019065856934, "eval_runtime": 2.9141, "eval_samples_per_second": 343.159, "eval_steps_per_second": 42.895, "step": 20 }, { "epoch": 0.24, "learning_rate": 3.8e-05, "loss": 10.7637, "step": 30 }, { "epoch": 0.24, "eval_loss": 10.725115776062012, "eval_runtime": 2.7934, "eval_samples_per_second": 357.988, "eval_steps_per_second": 44.748, "step": 30 }, { "epoch": 0.32, "learning_rate": 3.4000000000000007e-05, "loss": 10.62, "step": 40 }, { "epoch": 0.32, "eval_loss": 10.306303977966309, "eval_runtime": 2.7909, "eval_samples_per_second": 358.31, "eval_steps_per_second": 44.789, "step": 40 }, { "epoch": 0.4, "learning_rate": 3e-05, "loss": 9.8971, "step": 50 }, { "epoch": 0.4, "eval_loss": 9.273224830627441, "eval_runtime": 2.8125, "eval_samples_per_second": 355.55, "eval_steps_per_second": 44.444, "step": 50 }, { "epoch": 0.48, "learning_rate": 2.6000000000000002e-05, "loss": 9.0782, "step": 60 }, { "epoch": 0.48, "eval_loss": 8.667804718017578, "eval_runtime": 2.8983, "eval_samples_per_second": 345.031, "eval_steps_per_second": 43.129, "step": 60 }, { "epoch": 0.56, "learning_rate": 2.2000000000000003e-05, "loss": 8.5836, "step": 70 }, { "epoch": 0.56, "eval_loss": 8.331869125366211, "eval_runtime": 2.7654, "eval_samples_per_second": 361.611, "eval_steps_per_second": 45.201, "step": 70 }, { "epoch": 0.64, "learning_rate": 1.8e-05, "loss": 8.4033, "step": 80 }, { "epoch": 0.64, "eval_loss": 8.129607200622559, "eval_runtime": 2.7665, "eval_samples_per_second": 361.474, "eval_steps_per_second": 45.184, "step": 80 }, { "epoch": 0.72, "learning_rate": 1.4000000000000001e-05, "loss": 8.2454, "step": 90 }, { "epoch": 0.72, "eval_loss": 8.00214672088623, "eval_runtime": 2.7731, "eval_samples_per_second": 360.605, "eval_steps_per_second": 45.076, "step": 90 }, { "epoch": 0.8, "learning_rate": 1e-05, "loss": 8.1265, "step": 100 }, { "epoch": 0.8, "eval_loss": 7.9211931228637695, "eval_runtime": 2.7627, "eval_samples_per_second": 361.96, "eval_steps_per_second": 45.245, "step": 100 }, { "epoch": 0.88, "learning_rate": 6e-06, "loss": 8.0655, "step": 110 }, { "epoch": 0.88, "eval_loss": 7.871333122253418, "eval_runtime": 2.7896, "eval_samples_per_second": 358.475, "eval_steps_per_second": 44.809, "step": 110 }, { "epoch": 0.96, "learning_rate": 2.0000000000000003e-06, "loss": 8.1315, "step": 120 }, { "epoch": 0.96, "eval_loss": 7.84664249420166, "eval_runtime": 2.9029, "eval_samples_per_second": 344.487, "eval_steps_per_second": 43.061, "step": 120 } ], "logging_steps": 10, "max_steps": 125, "num_train_epochs": 1, "save_steps": 10, "total_flos": 8440665834240.0, "trial_name": null, "trial_params": null }