{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 147, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10204081632653061, "grad_norm": 4.106373310089111, "learning_rate": 2e-05, "loss": 1.2854, "step": 5 }, { "epoch": 0.20408163265306123, "grad_norm": 1.1787455081939697, "learning_rate": 1.9938879040295508e-05, "loss": 0.5714, "step": 10 }, { "epoch": 0.30612244897959184, "grad_norm": 1.0353399515151978, "learning_rate": 1.975626331552507e-05, "loss": 0.424, "step": 15 }, { "epoch": 0.40816326530612246, "grad_norm": 0.9334375262260437, "learning_rate": 1.9454385155359704e-05, "loss": 0.3665, "step": 20 }, { "epoch": 0.5102040816326531, "grad_norm": 0.8077430725097656, "learning_rate": 1.903693477637204e-05, "loss": 0.3533, "step": 25 }, { "epoch": 0.6122448979591837, "grad_norm": 0.786541759967804, "learning_rate": 1.850901517212062e-05, "loss": 0.3211, "step": 30 }, { "epoch": 0.7142857142857143, "grad_norm": 0.6843534708023071, "learning_rate": 1.7877079733177185e-05, "loss": 0.3147, "step": 35 }, { "epoch": 0.8163265306122449, "grad_norm": 0.6673664450645447, "learning_rate": 1.7148853359641627e-05, "loss": 0.2972, "step": 40 }, { "epoch": 0.9183673469387755, "grad_norm": 0.6665471792221069, "learning_rate": 1.6333238030480473e-05, "loss": 0.2947, "step": 45 }, { "epoch": 1.0204081632653061, "grad_norm": 0.6398372650146484, "learning_rate": 1.5440203984027323e-05, "loss": 0.2689, "step": 50 }, { "epoch": 1.1224489795918366, "grad_norm": 0.5915564894676208, "learning_rate": 1.4480667839875786e-05, "loss": 0.2341, "step": 55 }, { "epoch": 1.2244897959183674, "grad_norm": 0.5560858845710754, "learning_rate": 1.3466359152026197e-05, "loss": 0.23, "step": 60 }, { "epoch": 1.3265306122448979, "grad_norm": 0.6363187432289124, "learning_rate": 1.2409677024566145e-05, "loss": 0.2209, "step": 65 }, { "epoch": 1.4285714285714286, "grad_norm": 0.5852870941162109, "learning_rate": 1.1323538542642227e-05, "loss": 0.2096, "step": 70 }, { "epoch": 1.5306122448979593, "grad_norm": 0.5593776702880859, "learning_rate": 1.022122087153187e-05, "loss": 0.2155, "step": 75 }, { "epoch": 1.6326530612244898, "grad_norm": 0.5509619116783142, "learning_rate": 9.116198954026577e-06, "loss": 0.2104, "step": 80 }, { "epoch": 1.7346938775510203, "grad_norm": 0.5514574646949768, "learning_rate": 8.021980790144828e-06, "loss": 0.2097, "step": 85 }, { "epoch": 1.836734693877551, "grad_norm": 0.6069679856300354, "learning_rate": 6.951942312747135e-06, "loss": 0.2062, "step": 90 }, { "epoch": 1.9387755102040818, "grad_norm": 0.6014193296432495, "learning_rate": 5.919163877565351e-06, "loss": 0.2075, "step": 95 }, { "epoch": 2.0408163265306123, "grad_norm": 0.5056990385055542, "learning_rate": 4.936270366423563e-06, "loss": 0.1778, "step": 100 }, { "epoch": 2.142857142857143, "grad_norm": 0.6073999404907227, "learning_rate": 4.015276858259427e-06, "loss": 0.1457, "step": 105 }, { "epoch": 2.2448979591836733, "grad_norm": 0.5745469331741333, "learning_rate": 3.167441754493066e-06, "loss": 0.1441, "step": 110 }, { "epoch": 2.3469387755102042, "grad_norm": 0.5980499386787415, "learning_rate": 2.403129154167153e-06, "loss": 0.1426, "step": 115 }, { "epoch": 2.4489795918367347, "grad_norm": 0.5385629534721375, "learning_rate": 1.7316821612109136e-06, "loss": 0.1341, "step": 120 }, { "epoch": 2.5510204081632653, "grad_norm": 0.5529075860977173, "learning_rate": 1.161308672544389e-06, "loss": 0.1367, "step": 125 }, { "epoch": 2.6530612244897958, "grad_norm": 0.5598864555358887, "learning_rate": 6.989810431710375e-07, "loss": 0.1356, "step": 130 }, { "epoch": 2.7551020408163263, "grad_norm": 0.5336365699768066, "learning_rate": 3.5035085477190143e-07, "loss": 0.1352, "step": 135 }, { "epoch": 2.857142857142857, "grad_norm": 0.516101598739624, "learning_rate": 1.1967982968635994e-07, "loss": 0.1373, "step": 140 }, { "epoch": 2.9591836734693877, "grad_norm": 0.5510626435279846, "learning_rate": 9.78773480026396e-09, "loss": 0.1373, "step": 145 }, { "epoch": 3.0, "step": 147, "total_flos": 1.9096132999184384e+17, "train_loss": 0.26934566870838605, "train_runtime": 589.2508, "train_samples_per_second": 15.879, "train_steps_per_second": 0.249 } ], "logging_steps": 5, "max_steps": 147, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.9096132999184384e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }