{ "best_metric": 2.814382314682007, "best_model_checkpoint": "miner_id_24/checkpoint-25", "epoch": 3.0617283950617282, "eval_steps": 25, "global_step": 31, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09876543209876543, "grad_norm": 6.233891010284424, "learning_rate": 5e-05, "loss": 5.1325, "step": 1 }, { "epoch": 0.09876543209876543, "eval_loss": 6.649930000305176, "eval_runtime": 0.4865, "eval_samples_per_second": 139.77, "eval_steps_per_second": 18.499, "step": 1 }, { "epoch": 0.19753086419753085, "grad_norm": 12.484169960021973, "learning_rate": 0.0001, "loss": 6.673, "step": 2 }, { "epoch": 0.2962962962962963, "grad_norm": 5.378587245941162, "learning_rate": 9.970689785771798e-05, "loss": 5.1172, "step": 3 }, { "epoch": 0.3950617283950617, "grad_norm": 6.7046661376953125, "learning_rate": 9.883102778550434e-05, "loss": 4.9902, "step": 4 }, { "epoch": 0.49382716049382713, "grad_norm": 14.028969764709473, "learning_rate": 9.738265855914013e-05, "loss": 5.5544, "step": 5 }, { "epoch": 0.5925925925925926, "grad_norm": 4.125074863433838, "learning_rate": 9.537877098354786e-05, "loss": 4.0607, "step": 6 }, { "epoch": 0.691358024691358, "grad_norm": 6.239777565002441, "learning_rate": 9.284285880837946e-05, "loss": 4.1057, "step": 7 }, { "epoch": 0.7901234567901234, "grad_norm": 4.690567970275879, "learning_rate": 8.980465328528219e-05, "loss": 3.8628, "step": 8 }, { "epoch": 0.8888888888888888, "grad_norm": 5.21114444732666, "learning_rate": 8.629977459615655e-05, "loss": 3.5969, "step": 9 }, { "epoch": 0.9876543209876543, "grad_norm": 9.486303329467773, "learning_rate": 8.236931423909138e-05, "loss": 3.5655, "step": 10 }, { "epoch": 1.0864197530864197, "grad_norm": 7.3250861167907715, "learning_rate": 7.805935326811912e-05, "loss": 6.019, "step": 11 }, { "epoch": 1.1851851851851851, "grad_norm": 6.11453914642334, "learning_rate": 7.342042203498951e-05, "loss": 3.0734, "step": 12 }, { "epoch": 1.2839506172839505, "grad_norm": 4.089361190795898, "learning_rate": 6.850690776699573e-05, "loss": 3.147, "step": 13 }, { "epoch": 1.382716049382716, "grad_norm": 4.826321125030518, "learning_rate": 6.337641692646106e-05, "loss": 3.099, "step": 14 }, { "epoch": 1.4814814814814814, "grad_norm": 9.735791206359863, "learning_rate": 5.808909982763825e-05, "loss": 3.2368, "step": 15 }, { "epoch": 1.5802469135802468, "grad_norm": 3.000741481781006, "learning_rate": 5.270694542927088e-05, "loss": 2.879, "step": 16 }, { "epoch": 1.6790123456790123, "grad_norm": 5.924734115600586, "learning_rate": 4.729305457072913e-05, "loss": 2.8802, "step": 17 }, { "epoch": 1.7777777777777777, "grad_norm": 3.4073195457458496, "learning_rate": 4.1910900172361764e-05, "loss": 2.6786, "step": 18 }, { "epoch": 1.876543209876543, "grad_norm": 5.1113362312316895, "learning_rate": 3.6623583073538966e-05, "loss": 2.9771, "step": 19 }, { "epoch": 1.9753086419753085, "grad_norm": 9.201824188232422, "learning_rate": 3.149309223300428e-05, "loss": 2.9374, "step": 20 }, { "epoch": 2.074074074074074, "grad_norm": 5.898633003234863, "learning_rate": 2.65795779650105e-05, "loss": 4.9788, "step": 21 }, { "epoch": 2.1728395061728394, "grad_norm": 5.2269511222839355, "learning_rate": 2.194064673188089e-05, "loss": 2.5893, "step": 22 }, { "epoch": 2.271604938271605, "grad_norm": 2.710216760635376, "learning_rate": 1.7630685760908622e-05, "loss": 2.1731, "step": 23 }, { "epoch": 2.3703703703703702, "grad_norm": 5.270141124725342, "learning_rate": 1.3700225403843469e-05, "loss": 2.9662, "step": 24 }, { "epoch": 2.4691358024691357, "grad_norm": 6.2401933670043945, "learning_rate": 1.0195346714717813e-05, "loss": 2.7014, "step": 25 }, { "epoch": 2.4691358024691357, "eval_loss": 2.814382314682007, "eval_runtime": 0.4823, "eval_samples_per_second": 140.979, "eval_steps_per_second": 18.659, "step": 25 }, { "epoch": 2.567901234567901, "grad_norm": 3.632655143737793, "learning_rate": 7.157141191620548e-06, "loss": 2.9517, "step": 26 }, { "epoch": 2.6666666666666665, "grad_norm": 5.402245998382568, "learning_rate": 4.621229016452156e-06, "loss": 2.4943, "step": 27 }, { "epoch": 2.765432098765432, "grad_norm": 2.5133705139160156, "learning_rate": 2.6173414408598827e-06, "loss": 2.0811, "step": 28 }, { "epoch": 2.8641975308641974, "grad_norm": 5.495011329650879, "learning_rate": 1.1689722144956671e-06, "loss": 3.0137, "step": 29 }, { "epoch": 2.962962962962963, "grad_norm": 6.708992004394531, "learning_rate": 2.9310214228202013e-07, "loss": 2.7247, "step": 30 }, { "epoch": 3.0617283950617282, "grad_norm": 5.700857162475586, "learning_rate": 0.0, "loss": 5.0588, "step": 31 } ], "logging_steps": 1, "max_steps": 31, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5775615131058176e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }