jeffrey03's picture
Training in progress, step 1875, checkpoint
740b1b6 verified
{
"best_metric": 1.675691843032837,
"best_model_checkpoint": "Pricer-FineTune-OpenSource-2024-10-23_08.48.15/checkpoint-1250",
"epoch": 3.0,
"eval_steps": 50,
"global_step": 1875,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08,
"grad_norm": 1.3769932985305786,
"learning_rate": 8.771929824561403e-05,
"loss": 2.0308,
"step": 50
},
{
"epoch": 0.08,
"eval_loss": 1.740922212600708,
"eval_runtime": 4.9828,
"eval_samples_per_second": 20.069,
"eval_steps_per_second": 5.017,
"step": 50
},
{
"epoch": 0.16,
"grad_norm": 1.283903956413269,
"learning_rate": 9.986202859963424e-05,
"loss": 1.7637,
"step": 100
},
{
"epoch": 0.16,
"eval_loss": 1.7477470636367798,
"eval_runtime": 4.9882,
"eval_samples_per_second": 20.047,
"eval_steps_per_second": 5.012,
"step": 100
},
{
"epoch": 0.24,
"grad_norm": 1.5214710235595703,
"learning_rate": 9.935570765205927e-05,
"loss": 1.7386,
"step": 150
},
{
"epoch": 0.24,
"eval_loss": 1.69858980178833,
"eval_runtime": 4.988,
"eval_samples_per_second": 20.048,
"eval_steps_per_second": 5.012,
"step": 150
},
{
"epoch": 0.32,
"grad_norm": 0.9870548248291016,
"learning_rate": 9.848115669304158e-05,
"loss": 1.7239,
"step": 200
},
{
"epoch": 0.32,
"eval_loss": 1.7267831563949585,
"eval_runtime": 4.9814,
"eval_samples_per_second": 20.075,
"eval_steps_per_second": 5.019,
"step": 200
},
{
"epoch": 0.4,
"grad_norm": 1.197464108467102,
"learning_rate": 9.724490051829306e-05,
"loss": 1.7004,
"step": 250
},
{
"epoch": 0.4,
"eval_loss": 1.7107303142547607,
"eval_runtime": 4.9953,
"eval_samples_per_second": 20.019,
"eval_steps_per_second": 5.005,
"step": 250
},
{
"epoch": 0.48,
"grad_norm": 1.062972903251648,
"learning_rate": 9.565616251143094e-05,
"loss": 1.7174,
"step": 300
},
{
"epoch": 0.48,
"eval_loss": 1.7235748767852783,
"eval_runtime": 4.9773,
"eval_samples_per_second": 20.091,
"eval_steps_per_second": 5.023,
"step": 300
},
{
"epoch": 0.56,
"grad_norm": 0.8863743543624878,
"learning_rate": 9.372679583072762e-05,
"loss": 1.7156,
"step": 350
},
{
"epoch": 0.56,
"eval_loss": 1.705377459526062,
"eval_runtime": 4.9832,
"eval_samples_per_second": 20.067,
"eval_steps_per_second": 5.017,
"step": 350
},
{
"epoch": 0.64,
"grad_norm": 1.1850109100341797,
"learning_rate": 9.147119497580047e-05,
"loss": 1.7013,
"step": 400
},
{
"epoch": 0.64,
"eval_loss": 1.7099401950836182,
"eval_runtime": 4.9875,
"eval_samples_per_second": 20.05,
"eval_steps_per_second": 5.013,
"step": 400
},
{
"epoch": 0.72,
"grad_norm": 1.9806734323501587,
"learning_rate": 8.890618839401924e-05,
"loss": 1.6887,
"step": 450
},
{
"epoch": 0.72,
"eval_loss": 1.6758233308792114,
"eval_runtime": 4.9812,
"eval_samples_per_second": 20.075,
"eval_steps_per_second": 5.019,
"step": 450
},
{
"epoch": 0.8,
"grad_norm": 1.8731200695037842,
"learning_rate": 8.605091292786664e-05,
"loss": 1.6963,
"step": 500
},
{
"epoch": 0.8,
"eval_loss": 1.6987907886505127,
"eval_runtime": 4.9844,
"eval_samples_per_second": 20.063,
"eval_steps_per_second": 5.016,
"step": 500
},
{
"epoch": 0.88,
"grad_norm": 1.2332689762115479,
"learning_rate": 8.292667103996738e-05,
"loss": 1.7313,
"step": 550
},
{
"epoch": 0.88,
"eval_loss": 1.7032952308654785,
"eval_runtime": 4.9781,
"eval_samples_per_second": 20.088,
"eval_steps_per_second": 5.022,
"step": 550
},
{
"epoch": 0.96,
"grad_norm": 1.2375348806381226,
"learning_rate": 7.955677188099235e-05,
"loss": 1.6986,
"step": 600
},
{
"epoch": 0.96,
"eval_loss": 1.6838198900222778,
"eval_runtime": 4.9789,
"eval_samples_per_second": 20.085,
"eval_steps_per_second": 5.021,
"step": 600
},
{
"epoch": 1.04,
"grad_norm": 1.6269843578338623,
"learning_rate": 7.59663573861888e-05,
"loss": 1.6478,
"step": 650
},
{
"epoch": 1.04,
"eval_loss": 1.717869520187378,
"eval_runtime": 4.9837,
"eval_samples_per_second": 20.065,
"eval_steps_per_second": 5.016,
"step": 650
},
{
"epoch": 1.12,
"grad_norm": 1.6488033533096313,
"learning_rate": 7.218221469798465e-05,
"loss": 1.6154,
"step": 700
},
{
"epoch": 1.12,
"eval_loss": 1.671476125717163,
"eval_runtime": 5.0061,
"eval_samples_per_second": 19.976,
"eval_steps_per_second": 4.994,
"step": 700
},
{
"epoch": 1.2,
"grad_norm": 2.118487596511841,
"learning_rate": 6.823257631413276e-05,
"loss": 1.5951,
"step": 750
},
{
"epoch": 1.2,
"eval_loss": 1.6910121440887451,
"eval_runtime": 4.9788,
"eval_samples_per_second": 20.085,
"eval_steps_per_second": 5.021,
"step": 750
},
{
"epoch": 1.28,
"grad_norm": 1.9570444822311401,
"learning_rate": 6.414690945243768e-05,
"loss": 1.6109,
"step": 800
},
{
"epoch": 1.28,
"eval_loss": 1.7327255010604858,
"eval_runtime": 4.9795,
"eval_samples_per_second": 20.082,
"eval_steps_per_second": 5.021,
"step": 800
},
{
"epoch": 1.3599999999999999,
"grad_norm": 1.9022583961486816,
"learning_rate": 5.9955696203559285e-05,
"loss": 1.615,
"step": 850
},
{
"epoch": 1.3599999999999999,
"eval_loss": 1.7244207859039307,
"eval_runtime": 4.9869,
"eval_samples_per_second": 20.052,
"eval_steps_per_second": 5.013,
"step": 850
},
{
"epoch": 1.44,
"grad_norm": 1.445749044418335,
"learning_rate": 5.5690206112115884e-05,
"loss": 1.6122,
"step": 900
},
{
"epoch": 1.44,
"eval_loss": 1.689263939857483,
"eval_runtime": 4.9757,
"eval_samples_per_second": 20.098,
"eval_steps_per_second": 5.024,
"step": 900
},
{
"epoch": 1.52,
"grad_norm": 2.5496785640716553,
"learning_rate": 5.1382262882799395e-05,
"loss": 1.6248,
"step": 950
},
{
"epoch": 1.52,
"eval_loss": 1.6721502542495728,
"eval_runtime": 4.9817,
"eval_samples_per_second": 20.073,
"eval_steps_per_second": 5.018,
"step": 950
},
{
"epoch": 1.6,
"grad_norm": 1.7256929874420166,
"learning_rate": 4.706400695204749e-05,
"loss": 1.5938,
"step": 1000
},
{
"epoch": 1.6,
"eval_loss": 1.698430061340332,
"eval_runtime": 4.9786,
"eval_samples_per_second": 20.086,
"eval_steps_per_second": 5.021,
"step": 1000
},
{
"epoch": 1.6800000000000002,
"grad_norm": 2.525702714920044,
"learning_rate": 4.276765569666291e-05,
"loss": 1.616,
"step": 1050
},
{
"epoch": 1.6800000000000002,
"eval_loss": 1.6906436681747437,
"eval_runtime": 4.9808,
"eval_samples_per_second": 20.077,
"eval_steps_per_second": 5.019,
"step": 1050
},
{
"epoch": 1.76,
"grad_norm": 2.1740636825561523,
"learning_rate": 3.8525263068401055e-05,
"loss": 1.5903,
"step": 1100
},
{
"epoch": 1.76,
"eval_loss": 1.6799463033676147,
"eval_runtime": 4.9781,
"eval_samples_per_second": 20.088,
"eval_steps_per_second": 5.022,
"step": 1100
},
{
"epoch": 1.8399999999999999,
"grad_norm": 2.1382858753204346,
"learning_rate": 3.436848044782893e-05,
"loss": 1.583,
"step": 1150
},
{
"epoch": 1.8399999999999999,
"eval_loss": 1.6741628646850586,
"eval_runtime": 4.9792,
"eval_samples_per_second": 20.084,
"eval_steps_per_second": 5.021,
"step": 1150
},
{
"epoch": 1.92,
"grad_norm": 1.8631138801574707,
"learning_rate": 3.032832050166239e-05,
"loss": 1.5876,
"step": 1200
},
{
"epoch": 1.92,
"eval_loss": 1.6653131246566772,
"eval_runtime": 4.9832,
"eval_samples_per_second": 20.067,
"eval_steps_per_second": 5.017,
"step": 1200
},
{
"epoch": 2.0,
"grad_norm": 2.0618934631347656,
"learning_rate": 2.6434925805380144e-05,
"loss": 1.5982,
"step": 1250
},
{
"epoch": 2.0,
"eval_loss": 1.675691843032837,
"eval_runtime": 4.9832,
"eval_samples_per_second": 20.067,
"eval_steps_per_second": 5.017,
"step": 1250
},
{
"epoch": 2.08,
"grad_norm": 2.440321207046509,
"learning_rate": 2.2717343957360653e-05,
"loss": 1.4717,
"step": 1300
},
{
"epoch": 2.08,
"eval_loss": 1.7430232763290405,
"eval_runtime": 4.9759,
"eval_samples_per_second": 20.097,
"eval_steps_per_second": 5.024,
"step": 1300
},
{
"epoch": 2.16,
"grad_norm": 3.0594778060913086,
"learning_rate": 1.9203310862356577e-05,
"loss": 1.4536,
"step": 1350
},
{
"epoch": 2.16,
"eval_loss": 1.765580177307129,
"eval_runtime": 4.9891,
"eval_samples_per_second": 20.044,
"eval_steps_per_second": 5.011,
"step": 1350
},
{
"epoch": 2.24,
"grad_norm": 3.2999825477600098,
"learning_rate": 1.5919043801171672e-05,
"loss": 1.4503,
"step": 1400
},
{
"epoch": 2.24,
"eval_loss": 1.7877963781356812,
"eval_runtime": 4.98,
"eval_samples_per_second": 20.08,
"eval_steps_per_second": 5.02,
"step": 1400
},
{
"epoch": 2.32,
"grad_norm": 4.290762901306152,
"learning_rate": 1.288904583039358e-05,
"loss": 1.4655,
"step": 1450
},
{
"epoch": 2.32,
"eval_loss": 1.7701205015182495,
"eval_runtime": 4.986,
"eval_samples_per_second": 20.056,
"eval_steps_per_second": 5.014,
"step": 1450
},
{
"epoch": 2.4,
"grad_norm": 3.0238780975341797,
"learning_rate": 1.013592297150449e-05,
"loss": 1.4481,
"step": 1500
},
{
"epoch": 2.4,
"eval_loss": 1.7533233165740967,
"eval_runtime": 4.9911,
"eval_samples_per_second": 20.036,
"eval_steps_per_second": 5.009,
"step": 1500
},
{
"epoch": 2.48,
"grad_norm": 3.3997039794921875,
"learning_rate": 7.680215553274045e-06,
"loss": 1.4335,
"step": 1550
},
{
"epoch": 2.48,
"eval_loss": 1.7479755878448486,
"eval_runtime": 4.9816,
"eval_samples_per_second": 20.074,
"eval_steps_per_second": 5.018,
"step": 1550
},
{
"epoch": 2.56,
"grad_norm": 4.031099319458008,
"learning_rate": 5.5402449657446956e-06,
"loss": 1.4505,
"step": 1600
},
{
"epoch": 2.56,
"eval_loss": 1.7693780660629272,
"eval_runtime": 4.9807,
"eval_samples_per_second": 20.078,
"eval_steps_per_second": 5.019,
"step": 1600
},
{
"epoch": 2.64,
"grad_norm": 4.104003429412842,
"learning_rate": 3.731976969137929e-06,
"loss": 1.4537,
"step": 1650
},
{
"epoch": 2.64,
"eval_loss": 1.7657314538955688,
"eval_runtime": 4.9778,
"eval_samples_per_second": 20.089,
"eval_steps_per_second": 5.022,
"step": 1650
},
{
"epoch": 2.7199999999999998,
"grad_norm": 2.632106304168701,
"learning_rate": 2.268902577497639e-06,
"loss": 1.4496,
"step": 1700
},
{
"epoch": 2.7199999999999998,
"eval_loss": 1.7661254405975342,
"eval_runtime": 4.9885,
"eval_samples_per_second": 20.046,
"eval_steps_per_second": 5.012,
"step": 1700
},
{
"epoch": 2.8,
"grad_norm": 2.8151047229766846,
"learning_rate": 1.1619374057669662e-06,
"loss": 1.4586,
"step": 1750
},
{
"epoch": 2.8,
"eval_loss": 1.7688201665878296,
"eval_runtime": 4.9887,
"eval_samples_per_second": 20.045,
"eval_steps_per_second": 5.011,
"step": 1750
},
{
"epoch": 2.88,
"grad_norm": 3.2328884601593018,
"learning_rate": 4.1934023124329257e-07,
"loss": 1.4583,
"step": 1800
},
{
"epoch": 2.88,
"eval_loss": 1.7641412019729614,
"eval_runtime": 4.9803,
"eval_samples_per_second": 20.079,
"eval_steps_per_second": 5.02,
"step": 1800
},
{
"epoch": 2.96,
"grad_norm": 4.39493465423584,
"learning_rate": 4.665137700333166e-08,
"loss": 1.4495,
"step": 1850
},
{
"epoch": 2.96,
"eval_loss": 1.765012502670288,
"eval_runtime": 4.9788,
"eval_samples_per_second": 20.085,
"eval_steps_per_second": 5.021,
"step": 1850
}
],
"logging_steps": 50,
"max_steps": 1875,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 250,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.819416637396746e+17,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}