{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.08870490833826139, "eval_steps": 500, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5.4166666666666664e-05, "loss": 2.8488, "step": 1 }, { "epoch": 0.0, "learning_rate": 0.00010833333333333333, "loss": 2.7815, "step": 2 }, { "epoch": 0.01, "learning_rate": 0.0001625, "loss": 2.8002, "step": 3 }, { "epoch": 0.01, "learning_rate": 0.00021666666666666666, "loss": 2.7473, "step": 4 }, { "epoch": 0.01, "learning_rate": 0.0002708333333333333, "loss": 2.4233, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.000325, "loss": 1.9676, "step": 6 }, { "epoch": 0.01, "learning_rate": 0.00037916666666666665, "loss": 1.7562, "step": 7 }, { "epoch": 0.01, "learning_rate": 0.0004333333333333333, "loss": 1.3949, "step": 8 }, { "epoch": 0.02, "learning_rate": 0.0004875, "loss": 1.2908, "step": 9 }, { "epoch": 0.02, "learning_rate": 0.0005416666666666666, "loss": 1.2542, "step": 10 }, { "epoch": 0.02, "learning_rate": 0.0005958333333333333, "loss": 1.2959, "step": 11 }, { "epoch": 0.02, "learning_rate": 0.00065, "loss": 1.1706, "step": 12 }, { "epoch": 0.02, "learning_rate": 0.0006499947173877214, "loss": 1.0829, "step": 13 }, { "epoch": 0.02, "learning_rate": 0.0006499788697226147, "loss": 1.128, "step": 14 }, { "epoch": 0.03, "learning_rate": 0.0006499524575198621, "loss": 1.0847, "step": 15 }, { "epoch": 0.03, "learning_rate": 0.0006499154816380815, "loss": 1.1143, "step": 16 }, { "epoch": 0.03, "learning_rate": 0.0006498679432792988, "loss": 1.0751, "step": 17 }, { "epoch": 0.03, "learning_rate": 0.0006498098439889095, "loss": 1.179, "step": 18 }, { "epoch": 0.03, "learning_rate": 0.0006497411856556275, "loss": 1.0327, "step": 19 }, { "epoch": 0.04, "learning_rate": 0.0006496619705114241, "loss": 1.0672, "step": 20 }, { "epoch": 0.04, "learning_rate": 0.0006495722011314557, "loss": 1.1625, "step": 21 }, { "epoch": 0.04, "learning_rate": 0.0006494718804339797, "loss": 1.0751, "step": 22 }, { "epoch": 0.04, "learning_rate": 0.0006493610116802598, "loss": 0.996, "step": 23 }, { "epoch": 0.04, "learning_rate": 0.0006492395984744599, "loss": 1.0478, "step": 24 }, { "epoch": 0.04, "learning_rate": 0.0006491076447635269, "loss": 1.064, "step": 25 }, { "epoch": 0.05, "learning_rate": 0.0006489651548370628, "loss": 0.9393, "step": 26 }, { "epoch": 0.05, "learning_rate": 0.0006488121333271846, "loss": 0.9282, "step": 27 }, { "epoch": 0.05, "learning_rate": 0.0006486485852083744, "loss": 1.0558, "step": 28 }, { "epoch": 0.05, "learning_rate": 0.0006484745157973169, "loss": 1.0015, "step": 29 }, { "epoch": 0.05, "learning_rate": 0.0006482899307527272, "loss": 1.0261, "step": 30 }, { "epoch": 0.05, "learning_rate": 0.0006480948360751669, "loss": 1.0507, "step": 31 }, { "epoch": 0.06, "learning_rate": 0.0006478892381068483, "loss": 1.0225, "step": 32 }, { "epoch": 0.06, "learning_rate": 0.0006476731435314292, "loss": 0.9411, "step": 33 }, { "epoch": 0.06, "learning_rate": 0.0006474465593737948, "loss": 0.9884, "step": 34 }, { "epoch": 0.06, "learning_rate": 0.0006472094929998295, "loss": 0.9892, "step": 35 }, { "epoch": 0.06, "learning_rate": 0.0006469619521161782, "loss": 1.0527, "step": 36 }, { "epoch": 0.07, "learning_rate": 0.0006467039447699945, "loss": 0.969, "step": 37 }, { "epoch": 0.07, "learning_rate": 0.0006464354793486803, "loss": 1.0009, "step": 38 }, { "epoch": 0.07, "learning_rate": 0.0006461565645796124, "loss": 1.0068, "step": 39 }, { "epoch": 0.07, "learning_rate": 0.0006458672095298589, "loss": 0.9626, "step": 40 }, { "epoch": 0.07, "learning_rate": 0.0006455674236058847, "loss": 0.934, "step": 41 }, { "epoch": 0.07, "learning_rate": 0.0006452572165532456, "loss": 1.0217, "step": 42 }, { "epoch": 0.08, "learning_rate": 0.0006449365984562712, "loss": 1.0036, "step": 43 }, { "epoch": 0.08, "learning_rate": 0.0006446055797377376, "loss": 0.9234, "step": 44 }, { "epoch": 0.08, "learning_rate": 0.000644264171158528, "loss": 0.9771, "step": 45 }, { "epoch": 0.08, "learning_rate": 0.0006439123838172836, "loss": 1.013, "step": 46 }, { "epoch": 0.08, "learning_rate": 0.0006435502291500418, "loss": 0.9154, "step": 47 }, { "epoch": 0.09, "learning_rate": 0.0006431777189298656, "loss": 0.9098, "step": 48 }, { "epoch": 0.09, "learning_rate": 0.0006427948652664599, "loss": 0.9243, "step": 49 }, { "epoch": 0.09, "learning_rate": 0.0006424016806057781, "loss": 0.9162, "step": 50 } ], "logging_steps": 1, "max_steps": 563, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "total_flos": 7.5793276624896e+16, "train_batch_size": 3, "trial_name": null, "trial_params": null }