{ "best_metric": 1.675691843032837, "best_model_checkpoint": "Pricer-FineTune-OpenSource-2024-10-23_08.48.15/checkpoint-1250", "epoch": 3.0, "eval_steps": 50, "global_step": 1875, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "grad_norm": 1.3769932985305786, "learning_rate": 8.771929824561403e-05, "loss": 2.0308, "step": 50 }, { "epoch": 0.08, "eval_loss": 1.740922212600708, "eval_runtime": 4.9828, "eval_samples_per_second": 20.069, "eval_steps_per_second": 5.017, "step": 50 }, { "epoch": 0.16, "grad_norm": 1.283903956413269, "learning_rate": 9.986202859963424e-05, "loss": 1.7637, "step": 100 }, { "epoch": 0.16, "eval_loss": 1.7477470636367798, "eval_runtime": 4.9882, "eval_samples_per_second": 20.047, "eval_steps_per_second": 5.012, "step": 100 }, { "epoch": 0.24, "grad_norm": 1.5214710235595703, "learning_rate": 9.935570765205927e-05, "loss": 1.7386, "step": 150 }, { "epoch": 0.24, "eval_loss": 1.69858980178833, "eval_runtime": 4.988, "eval_samples_per_second": 20.048, "eval_steps_per_second": 5.012, "step": 150 }, { "epoch": 0.32, "grad_norm": 0.9870548248291016, "learning_rate": 9.848115669304158e-05, "loss": 1.7239, "step": 200 }, { "epoch": 0.32, "eval_loss": 1.7267831563949585, "eval_runtime": 4.9814, "eval_samples_per_second": 20.075, "eval_steps_per_second": 5.019, "step": 200 }, { "epoch": 0.4, "grad_norm": 1.197464108467102, "learning_rate": 9.724490051829306e-05, "loss": 1.7004, "step": 250 }, { "epoch": 0.4, "eval_loss": 1.7107303142547607, "eval_runtime": 4.9953, "eval_samples_per_second": 20.019, "eval_steps_per_second": 5.005, "step": 250 }, { "epoch": 0.48, "grad_norm": 1.062972903251648, "learning_rate": 9.565616251143094e-05, "loss": 1.7174, "step": 300 }, { "epoch": 0.48, "eval_loss": 1.7235748767852783, "eval_runtime": 4.9773, "eval_samples_per_second": 20.091, "eval_steps_per_second": 5.023, "step": 300 }, { "epoch": 0.56, "grad_norm": 0.8863743543624878, "learning_rate": 9.372679583072762e-05, "loss": 1.7156, "step": 350 }, { "epoch": 0.56, "eval_loss": 1.705377459526062, "eval_runtime": 4.9832, "eval_samples_per_second": 20.067, "eval_steps_per_second": 5.017, "step": 350 }, { "epoch": 0.64, "grad_norm": 1.1850109100341797, "learning_rate": 9.147119497580047e-05, "loss": 1.7013, "step": 400 }, { "epoch": 0.64, "eval_loss": 1.7099401950836182, "eval_runtime": 4.9875, "eval_samples_per_second": 20.05, "eval_steps_per_second": 5.013, "step": 400 }, { "epoch": 0.72, "grad_norm": 1.9806734323501587, "learning_rate": 8.890618839401924e-05, "loss": 1.6887, "step": 450 }, { "epoch": 0.72, "eval_loss": 1.6758233308792114, "eval_runtime": 4.9812, "eval_samples_per_second": 20.075, "eval_steps_per_second": 5.019, "step": 450 }, { "epoch": 0.8, "grad_norm": 1.8731200695037842, "learning_rate": 8.605091292786664e-05, "loss": 1.6963, "step": 500 }, { "epoch": 0.8, "eval_loss": 1.6987907886505127, "eval_runtime": 4.9844, "eval_samples_per_second": 20.063, "eval_steps_per_second": 5.016, "step": 500 }, { "epoch": 0.88, "grad_norm": 1.2332689762115479, "learning_rate": 8.292667103996738e-05, "loss": 1.7313, "step": 550 }, { "epoch": 0.88, "eval_loss": 1.7032952308654785, "eval_runtime": 4.9781, "eval_samples_per_second": 20.088, "eval_steps_per_second": 5.022, "step": 550 }, { "epoch": 0.96, "grad_norm": 1.2375348806381226, "learning_rate": 7.955677188099235e-05, "loss": 1.6986, "step": 600 }, { "epoch": 0.96, "eval_loss": 1.6838198900222778, "eval_runtime": 4.9789, "eval_samples_per_second": 20.085, "eval_steps_per_second": 5.021, "step": 600 }, { "epoch": 1.04, "grad_norm": 1.6269843578338623, "learning_rate": 7.59663573861888e-05, "loss": 1.6478, "step": 650 }, { "epoch": 1.04, "eval_loss": 1.717869520187378, "eval_runtime": 4.9837, "eval_samples_per_second": 20.065, "eval_steps_per_second": 5.016, "step": 650 }, { "epoch": 1.12, "grad_norm": 1.6488033533096313, "learning_rate": 7.218221469798465e-05, "loss": 1.6154, "step": 700 }, { "epoch": 1.12, "eval_loss": 1.671476125717163, "eval_runtime": 5.0061, "eval_samples_per_second": 19.976, "eval_steps_per_second": 4.994, "step": 700 }, { "epoch": 1.2, "grad_norm": 2.118487596511841, "learning_rate": 6.823257631413276e-05, "loss": 1.5951, "step": 750 }, { "epoch": 1.2, "eval_loss": 1.6910121440887451, "eval_runtime": 4.9788, "eval_samples_per_second": 20.085, "eval_steps_per_second": 5.021, "step": 750 }, { "epoch": 1.28, "grad_norm": 1.9570444822311401, "learning_rate": 6.414690945243768e-05, "loss": 1.6109, "step": 800 }, { "epoch": 1.28, "eval_loss": 1.7327255010604858, "eval_runtime": 4.9795, "eval_samples_per_second": 20.082, "eval_steps_per_second": 5.021, "step": 800 }, { "epoch": 1.3599999999999999, "grad_norm": 1.9022583961486816, "learning_rate": 5.9955696203559285e-05, "loss": 1.615, "step": 850 }, { "epoch": 1.3599999999999999, "eval_loss": 1.7244207859039307, "eval_runtime": 4.9869, "eval_samples_per_second": 20.052, "eval_steps_per_second": 5.013, "step": 850 }, { "epoch": 1.44, "grad_norm": 1.445749044418335, "learning_rate": 5.5690206112115884e-05, "loss": 1.6122, "step": 900 }, { "epoch": 1.44, "eval_loss": 1.689263939857483, "eval_runtime": 4.9757, "eval_samples_per_second": 20.098, "eval_steps_per_second": 5.024, "step": 900 }, { "epoch": 1.52, "grad_norm": 2.5496785640716553, "learning_rate": 5.1382262882799395e-05, "loss": 1.6248, "step": 950 }, { "epoch": 1.52, "eval_loss": 1.6721502542495728, "eval_runtime": 4.9817, "eval_samples_per_second": 20.073, "eval_steps_per_second": 5.018, "step": 950 }, { "epoch": 1.6, "grad_norm": 1.7256929874420166, "learning_rate": 4.706400695204749e-05, "loss": 1.5938, "step": 1000 }, { "epoch": 1.6, "eval_loss": 1.698430061340332, "eval_runtime": 4.9786, "eval_samples_per_second": 20.086, "eval_steps_per_second": 5.021, "step": 1000 }, { "epoch": 1.6800000000000002, "grad_norm": 2.525702714920044, "learning_rate": 4.276765569666291e-05, "loss": 1.616, "step": 1050 }, { "epoch": 1.6800000000000002, "eval_loss": 1.6906436681747437, "eval_runtime": 4.9808, "eval_samples_per_second": 20.077, "eval_steps_per_second": 5.019, "step": 1050 }, { "epoch": 1.76, "grad_norm": 2.1740636825561523, "learning_rate": 3.8525263068401055e-05, "loss": 1.5903, "step": 1100 }, { "epoch": 1.76, "eval_loss": 1.6799463033676147, "eval_runtime": 4.9781, "eval_samples_per_second": 20.088, "eval_steps_per_second": 5.022, "step": 1100 }, { "epoch": 1.8399999999999999, "grad_norm": 2.1382858753204346, "learning_rate": 3.436848044782893e-05, "loss": 1.583, "step": 1150 }, { "epoch": 1.8399999999999999, "eval_loss": 1.6741628646850586, "eval_runtime": 4.9792, "eval_samples_per_second": 20.084, "eval_steps_per_second": 5.021, "step": 1150 }, { "epoch": 1.92, "grad_norm": 1.8631138801574707, "learning_rate": 3.032832050166239e-05, "loss": 1.5876, "step": 1200 }, { "epoch": 1.92, "eval_loss": 1.6653131246566772, "eval_runtime": 4.9832, "eval_samples_per_second": 20.067, "eval_steps_per_second": 5.017, "step": 1200 }, { "epoch": 2.0, "grad_norm": 2.0618934631347656, "learning_rate": 2.6434925805380144e-05, "loss": 1.5982, "step": 1250 }, { "epoch": 2.0, "eval_loss": 1.675691843032837, "eval_runtime": 4.9832, "eval_samples_per_second": 20.067, "eval_steps_per_second": 5.017, "step": 1250 }, { "epoch": 2.08, "grad_norm": 2.440321207046509, "learning_rate": 2.2717343957360653e-05, "loss": 1.4717, "step": 1300 }, { "epoch": 2.08, "eval_loss": 1.7430232763290405, "eval_runtime": 4.9759, "eval_samples_per_second": 20.097, "eval_steps_per_second": 5.024, "step": 1300 }, { "epoch": 2.16, "grad_norm": 3.0594778060913086, "learning_rate": 1.9203310862356577e-05, "loss": 1.4536, "step": 1350 }, { "epoch": 2.16, "eval_loss": 1.765580177307129, "eval_runtime": 4.9891, "eval_samples_per_second": 20.044, "eval_steps_per_second": 5.011, "step": 1350 }, { "epoch": 2.24, "grad_norm": 3.2999825477600098, "learning_rate": 1.5919043801171672e-05, "loss": 1.4503, "step": 1400 }, { "epoch": 2.24, "eval_loss": 1.7877963781356812, "eval_runtime": 4.98, "eval_samples_per_second": 20.08, "eval_steps_per_second": 5.02, "step": 1400 }, { "epoch": 2.32, "grad_norm": 4.290762901306152, "learning_rate": 1.288904583039358e-05, "loss": 1.4655, "step": 1450 }, { "epoch": 2.32, "eval_loss": 1.7701205015182495, "eval_runtime": 4.986, "eval_samples_per_second": 20.056, "eval_steps_per_second": 5.014, "step": 1450 }, { "epoch": 2.4, "grad_norm": 3.0238780975341797, "learning_rate": 1.013592297150449e-05, "loss": 1.4481, "step": 1500 }, { "epoch": 2.4, "eval_loss": 1.7533233165740967, "eval_runtime": 4.9911, "eval_samples_per_second": 20.036, "eval_steps_per_second": 5.009, "step": 1500 }, { "epoch": 2.48, "grad_norm": 3.3997039794921875, "learning_rate": 7.680215553274045e-06, "loss": 1.4335, "step": 1550 }, { "epoch": 2.48, "eval_loss": 1.7479755878448486, "eval_runtime": 4.9816, "eval_samples_per_second": 20.074, "eval_steps_per_second": 5.018, "step": 1550 }, { "epoch": 2.56, "grad_norm": 4.031099319458008, "learning_rate": 5.5402449657446956e-06, "loss": 1.4505, "step": 1600 }, { "epoch": 2.56, "eval_loss": 1.7693780660629272, "eval_runtime": 4.9807, "eval_samples_per_second": 20.078, "eval_steps_per_second": 5.019, "step": 1600 }, { "epoch": 2.64, "grad_norm": 4.104003429412842, "learning_rate": 3.731976969137929e-06, "loss": 1.4537, "step": 1650 }, { "epoch": 2.64, "eval_loss": 1.7657314538955688, "eval_runtime": 4.9778, "eval_samples_per_second": 20.089, "eval_steps_per_second": 5.022, "step": 1650 }, { "epoch": 2.7199999999999998, "grad_norm": 2.632106304168701, "learning_rate": 2.268902577497639e-06, "loss": 1.4496, "step": 1700 }, { "epoch": 2.7199999999999998, "eval_loss": 1.7661254405975342, "eval_runtime": 4.9885, "eval_samples_per_second": 20.046, "eval_steps_per_second": 5.012, "step": 1700 }, { "epoch": 2.8, "grad_norm": 2.8151047229766846, "learning_rate": 1.1619374057669662e-06, "loss": 1.4586, "step": 1750 }, { "epoch": 2.8, "eval_loss": 1.7688201665878296, "eval_runtime": 4.9887, "eval_samples_per_second": 20.045, "eval_steps_per_second": 5.011, "step": 1750 }, { "epoch": 2.88, "grad_norm": 3.2328884601593018, "learning_rate": 4.1934023124329257e-07, "loss": 1.4583, "step": 1800 }, { "epoch": 2.88, "eval_loss": 1.7641412019729614, "eval_runtime": 4.9803, "eval_samples_per_second": 20.079, "eval_steps_per_second": 5.02, "step": 1800 }, { "epoch": 2.96, "grad_norm": 4.39493465423584, "learning_rate": 4.665137700333166e-08, "loss": 1.4495, "step": 1850 }, { "epoch": 2.96, "eval_loss": 1.765012502670288, "eval_runtime": 4.9788, "eval_samples_per_second": 20.085, "eval_steps_per_second": 5.021, "step": 1850 } ], "logging_steps": 50, "max_steps": 1875, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.819416637396746e+17, "train_batch_size": 16, "trial_name": null, "trial_params": null }