|
{ |
|
"best_metric": 1.675691843032837, |
|
"best_model_checkpoint": "Pricer-FineTune-OpenSource-2024-10-23_08.48.15/checkpoint-1250", |
|
"epoch": 3.0, |
|
"eval_steps": 50, |
|
"global_step": 1875, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.3769932985305786, |
|
"learning_rate": 8.771929824561403e-05, |
|
"loss": 2.0308, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_loss": 1.740922212600708, |
|
"eval_runtime": 4.9828, |
|
"eval_samples_per_second": 20.069, |
|
"eval_steps_per_second": 5.017, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 1.283903956413269, |
|
"learning_rate": 9.986202859963424e-05, |
|
"loss": 1.7637, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_loss": 1.7477470636367798, |
|
"eval_runtime": 4.9882, |
|
"eval_samples_per_second": 20.047, |
|
"eval_steps_per_second": 5.012, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 1.5214710235595703, |
|
"learning_rate": 9.935570765205927e-05, |
|
"loss": 1.7386, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 1.69858980178833, |
|
"eval_runtime": 4.988, |
|
"eval_samples_per_second": 20.048, |
|
"eval_steps_per_second": 5.012, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.9870548248291016, |
|
"learning_rate": 9.848115669304158e-05, |
|
"loss": 1.7239, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 1.7267831563949585, |
|
"eval_runtime": 4.9814, |
|
"eval_samples_per_second": 20.075, |
|
"eval_steps_per_second": 5.019, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.197464108467102, |
|
"learning_rate": 9.724490051829306e-05, |
|
"loss": 1.7004, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 1.7107303142547607, |
|
"eval_runtime": 4.9953, |
|
"eval_samples_per_second": 20.019, |
|
"eval_steps_per_second": 5.005, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 1.062972903251648, |
|
"learning_rate": 9.565616251143094e-05, |
|
"loss": 1.7174, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 1.7235748767852783, |
|
"eval_runtime": 4.9773, |
|
"eval_samples_per_second": 20.091, |
|
"eval_steps_per_second": 5.023, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 0.8863743543624878, |
|
"learning_rate": 9.372679583072762e-05, |
|
"loss": 1.7156, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_loss": 1.705377459526062, |
|
"eval_runtime": 4.9832, |
|
"eval_samples_per_second": 20.067, |
|
"eval_steps_per_second": 5.017, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 1.1850109100341797, |
|
"learning_rate": 9.147119497580047e-05, |
|
"loss": 1.7013, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 1.7099401950836182, |
|
"eval_runtime": 4.9875, |
|
"eval_samples_per_second": 20.05, |
|
"eval_steps_per_second": 5.013, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.9806734323501587, |
|
"learning_rate": 8.890618839401924e-05, |
|
"loss": 1.6887, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_loss": 1.6758233308792114, |
|
"eval_runtime": 4.9812, |
|
"eval_samples_per_second": 20.075, |
|
"eval_steps_per_second": 5.019, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 1.8731200695037842, |
|
"learning_rate": 8.605091292786664e-05, |
|
"loss": 1.6963, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.6987907886505127, |
|
"eval_runtime": 4.9844, |
|
"eval_samples_per_second": 20.063, |
|
"eval_steps_per_second": 5.016, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.2332689762115479, |
|
"learning_rate": 8.292667103996738e-05, |
|
"loss": 1.7313, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 1.7032952308654785, |
|
"eval_runtime": 4.9781, |
|
"eval_samples_per_second": 20.088, |
|
"eval_steps_per_second": 5.022, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.2375348806381226, |
|
"learning_rate": 7.955677188099235e-05, |
|
"loss": 1.6986, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 1.6838198900222778, |
|
"eval_runtime": 4.9789, |
|
"eval_samples_per_second": 20.085, |
|
"eval_steps_per_second": 5.021, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 1.6269843578338623, |
|
"learning_rate": 7.59663573861888e-05, |
|
"loss": 1.6478, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 1.717869520187378, |
|
"eval_runtime": 4.9837, |
|
"eval_samples_per_second": 20.065, |
|
"eval_steps_per_second": 5.016, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 1.6488033533096313, |
|
"learning_rate": 7.218221469798465e-05, |
|
"loss": 1.6154, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_loss": 1.671476125717163, |
|
"eval_runtime": 5.0061, |
|
"eval_samples_per_second": 19.976, |
|
"eval_steps_per_second": 4.994, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 2.118487596511841, |
|
"learning_rate": 6.823257631413276e-05, |
|
"loss": 1.5951, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 1.6910121440887451, |
|
"eval_runtime": 4.9788, |
|
"eval_samples_per_second": 20.085, |
|
"eval_steps_per_second": 5.021, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 1.9570444822311401, |
|
"learning_rate": 6.414690945243768e-05, |
|
"loss": 1.6109, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 1.7327255010604858, |
|
"eval_runtime": 4.9795, |
|
"eval_samples_per_second": 20.082, |
|
"eval_steps_per_second": 5.021, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3599999999999999, |
|
"grad_norm": 1.9022583961486816, |
|
"learning_rate": 5.9955696203559285e-05, |
|
"loss": 1.615, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.3599999999999999, |
|
"eval_loss": 1.7244207859039307, |
|
"eval_runtime": 4.9869, |
|
"eval_samples_per_second": 20.052, |
|
"eval_steps_per_second": 5.013, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 1.445749044418335, |
|
"learning_rate": 5.5690206112115884e-05, |
|
"loss": 1.6122, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 1.689263939857483, |
|
"eval_runtime": 4.9757, |
|
"eval_samples_per_second": 20.098, |
|
"eval_steps_per_second": 5.024, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 2.5496785640716553, |
|
"learning_rate": 5.1382262882799395e-05, |
|
"loss": 1.6248, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 1.6721502542495728, |
|
"eval_runtime": 4.9817, |
|
"eval_samples_per_second": 20.073, |
|
"eval_steps_per_second": 5.018, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 1.7256929874420166, |
|
"learning_rate": 4.706400695204749e-05, |
|
"loss": 1.5938, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.698430061340332, |
|
"eval_runtime": 4.9786, |
|
"eval_samples_per_second": 20.086, |
|
"eval_steps_per_second": 5.021, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6800000000000002, |
|
"grad_norm": 2.525702714920044, |
|
"learning_rate": 4.276765569666291e-05, |
|
"loss": 1.616, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.6800000000000002, |
|
"eval_loss": 1.6906436681747437, |
|
"eval_runtime": 4.9808, |
|
"eval_samples_per_second": 20.077, |
|
"eval_steps_per_second": 5.019, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 2.1740636825561523, |
|
"learning_rate": 3.8525263068401055e-05, |
|
"loss": 1.5903, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 1.6799463033676147, |
|
"eval_runtime": 4.9781, |
|
"eval_samples_per_second": 20.088, |
|
"eval_steps_per_second": 5.022, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8399999999999999, |
|
"grad_norm": 2.1382858753204346, |
|
"learning_rate": 3.436848044782893e-05, |
|
"loss": 1.583, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.8399999999999999, |
|
"eval_loss": 1.6741628646850586, |
|
"eval_runtime": 4.9792, |
|
"eval_samples_per_second": 20.084, |
|
"eval_steps_per_second": 5.021, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 1.8631138801574707, |
|
"learning_rate": 3.032832050166239e-05, |
|
"loss": 1.5876, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 1.6653131246566772, |
|
"eval_runtime": 4.9832, |
|
"eval_samples_per_second": 20.067, |
|
"eval_steps_per_second": 5.017, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.0618934631347656, |
|
"learning_rate": 2.6434925805380144e-05, |
|
"loss": 1.5982, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.675691843032837, |
|
"eval_runtime": 4.9832, |
|
"eval_samples_per_second": 20.067, |
|
"eval_steps_per_second": 5.017, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 2.440321207046509, |
|
"learning_rate": 2.2717343957360653e-05, |
|
"loss": 1.4717, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 1.7430232763290405, |
|
"eval_runtime": 4.9759, |
|
"eval_samples_per_second": 20.097, |
|
"eval_steps_per_second": 5.024, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 3.0594778060913086, |
|
"learning_rate": 1.9203310862356577e-05, |
|
"loss": 1.4536, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 1.765580177307129, |
|
"eval_runtime": 4.9891, |
|
"eval_samples_per_second": 20.044, |
|
"eval_steps_per_second": 5.011, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 3.2999825477600098, |
|
"learning_rate": 1.5919043801171672e-05, |
|
"loss": 1.4503, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"eval_loss": 1.7877963781356812, |
|
"eval_runtime": 4.98, |
|
"eval_samples_per_second": 20.08, |
|
"eval_steps_per_second": 5.02, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 4.290762901306152, |
|
"learning_rate": 1.288904583039358e-05, |
|
"loss": 1.4655, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 1.7701205015182495, |
|
"eval_runtime": 4.986, |
|
"eval_samples_per_second": 20.056, |
|
"eval_steps_per_second": 5.014, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 3.0238780975341797, |
|
"learning_rate": 1.013592297150449e-05, |
|
"loss": 1.4481, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.7533233165740967, |
|
"eval_runtime": 4.9911, |
|
"eval_samples_per_second": 20.036, |
|
"eval_steps_per_second": 5.009, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 3.3997039794921875, |
|
"learning_rate": 7.680215553274045e-06, |
|
"loss": 1.4335, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.7479755878448486, |
|
"eval_runtime": 4.9816, |
|
"eval_samples_per_second": 20.074, |
|
"eval_steps_per_second": 5.018, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 4.031099319458008, |
|
"learning_rate": 5.5402449657446956e-06, |
|
"loss": 1.4505, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 1.7693780660629272, |
|
"eval_runtime": 4.9807, |
|
"eval_samples_per_second": 20.078, |
|
"eval_steps_per_second": 5.019, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"grad_norm": 4.104003429412842, |
|
"learning_rate": 3.731976969137929e-06, |
|
"loss": 1.4537, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"eval_loss": 1.7657314538955688, |
|
"eval_runtime": 4.9778, |
|
"eval_samples_per_second": 20.089, |
|
"eval_steps_per_second": 5.022, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.7199999999999998, |
|
"grad_norm": 2.632106304168701, |
|
"learning_rate": 2.268902577497639e-06, |
|
"loss": 1.4496, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.7199999999999998, |
|
"eval_loss": 1.7661254405975342, |
|
"eval_runtime": 4.9885, |
|
"eval_samples_per_second": 20.046, |
|
"eval_steps_per_second": 5.012, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 2.8151047229766846, |
|
"learning_rate": 1.1619374057669662e-06, |
|
"loss": 1.4586, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 1.7688201665878296, |
|
"eval_runtime": 4.9887, |
|
"eval_samples_per_second": 20.045, |
|
"eval_steps_per_second": 5.011, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 3.2328884601593018, |
|
"learning_rate": 4.1934023124329257e-07, |
|
"loss": 1.4583, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_loss": 1.7641412019729614, |
|
"eval_runtime": 4.9803, |
|
"eval_samples_per_second": 20.079, |
|
"eval_steps_per_second": 5.02, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 4.39493465423584, |
|
"learning_rate": 4.665137700333166e-08, |
|
"loss": 1.4495, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 1.765012502670288, |
|
"eval_runtime": 4.9788, |
|
"eval_samples_per_second": 20.085, |
|
"eval_steps_per_second": 5.021, |
|
"step": 1850 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 1875, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 250, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.819416637396746e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|