{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.04498931503767855, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00022494657518839275, "eval_loss": 1.3146981000900269, "eval_runtime": 67.0104, "eval_samples_per_second": 13.968, "eval_steps_per_second": 6.984, "step": 1 }, { "epoch": 0.0022494657518839275, "grad_norm": 1.1338897943496704, "learning_rate": 0.0002, "loss": 2.5636, "step": 10 }, { "epoch": 0.004498931503767855, "grad_norm": 1.8249558210372925, "learning_rate": 0.0002, "loss": 2.5914, "step": 20 }, { "epoch": 0.0067483972556517825, "grad_norm": 1.2825453281402588, "learning_rate": 0.0002, "loss": 2.4154, "step": 30 }, { "epoch": 0.00899786300753571, "grad_norm": 2.0352253913879395, "learning_rate": 0.0002, "loss": 2.4877, "step": 40 }, { "epoch": 0.011247328759419637, "grad_norm": 0.9270350337028503, "learning_rate": 0.0002, "loss": 2.0619, "step": 50 }, { "epoch": 0.011247328759419637, "eval_loss": 1.2109270095825195, "eval_runtime": 67.0303, "eval_samples_per_second": 13.964, "eval_steps_per_second": 6.982, "step": 50 }, { "epoch": 0.013496794511303565, "grad_norm": 1.5693869590759277, "learning_rate": 0.0002, "loss": 2.3889, "step": 60 }, { "epoch": 0.015746260263187494, "grad_norm": 1.03313148021698, "learning_rate": 0.0002, "loss": 2.2495, "step": 70 }, { "epoch": 0.01799572601507142, "grad_norm": 1.0436208248138428, "learning_rate": 0.0002, "loss": 2.6946, "step": 80 }, { "epoch": 0.02024519176695535, "grad_norm": 1.4744815826416016, "learning_rate": 0.0002, "loss": 2.2621, "step": 90 }, { "epoch": 0.022494657518839275, "grad_norm": 2.6527371406555176, "learning_rate": 0.0002, "loss": 2.5709, "step": 100 }, { "epoch": 0.022494657518839275, "eval_loss": 1.192226529121399, "eval_runtime": 67.1155, "eval_samples_per_second": 13.946, "eval_steps_per_second": 6.973, "step": 100 }, { "epoch": 0.024744123270723204, "grad_norm": 1.56536066532135, "learning_rate": 0.0002, "loss": 2.2297, "step": 110 }, { "epoch": 0.02699358902260713, "grad_norm": 1.3520946502685547, "learning_rate": 0.0002, "loss": 2.2107, "step": 120 }, { "epoch": 0.02924305477449106, "grad_norm": 0.8982442021369934, "learning_rate": 0.0002, "loss": 2.5156, "step": 130 }, { "epoch": 0.03149252052637499, "grad_norm": 2.5134899616241455, "learning_rate": 0.0002, "loss": 1.9862, "step": 140 }, { "epoch": 0.03374198627825891, "grad_norm": 1.1740270853042603, "learning_rate": 0.0002, "loss": 2.0565, "step": 150 }, { "epoch": 0.03374198627825891, "eval_loss": 1.182604432106018, "eval_runtime": 67.1743, "eval_samples_per_second": 13.934, "eval_steps_per_second": 6.967, "step": 150 }, { "epoch": 0.03599145203014284, "grad_norm": 0.9191089272499084, "learning_rate": 0.0002, "loss": 2.2336, "step": 160 }, { "epoch": 0.03824091778202677, "grad_norm": 3.097604274749756, "learning_rate": 0.0002, "loss": 2.3238, "step": 170 }, { "epoch": 0.0404903835339107, "grad_norm": 1.0487302541732788, "learning_rate": 0.0002, "loss": 2.3465, "step": 180 }, { "epoch": 0.04273984928579462, "grad_norm": 1.4951366186141968, "learning_rate": 0.0002, "loss": 2.3439, "step": 190 }, { "epoch": 0.04498931503767855, "grad_norm": 2.0182831287384033, "learning_rate": 0.0002, "loss": 2.3684, "step": 200 }, { "epoch": 0.04498931503767855, "eval_loss": 1.179027795791626, "eval_runtime": 67.2842, "eval_samples_per_second": 13.911, "eval_steps_per_second": 6.956, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.97508017504256e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }