{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 5, "global_step": 2588, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.019319938176197836, "grad_norm": 0.00098650180734694, "learning_rate": 0.009615384615384616, "loss": 0.9907, "step": 50 }, { "epoch": 0.03863987635239567, "grad_norm": 0.000779022928327322, "learning_rate": 0.019230769230769232, "loss": 0.9647, "step": 100 }, { "epoch": 0.05795981452859351, "grad_norm": 0.000611725845374167, "learning_rate": 0.028846153846153844, "loss": 0.9412, "step": 150 }, { "epoch": 0.07727975270479134, "grad_norm": 0.0005838441429659724, "learning_rate": 0.029457236842105262, "loss": 0.9322, "step": 200 }, { "epoch": 0.09659969088098919, "grad_norm": 0.0007691067876294255, "learning_rate": 0.028840460526315788, "loss": 0.9131, "step": 250 }, { "epoch": 0.11591962905718702, "grad_norm": 0.0005935626104474068, "learning_rate": 0.028223684210526314, "loss": 0.9104, "step": 300 }, { "epoch": 0.13523956723338484, "grad_norm": 0.0006890599033795297, "learning_rate": 0.02760690789473684, "loss": 0.9214, "step": 350 }, { "epoch": 0.1545595054095827, "grad_norm": 0.0006042916793376207, "learning_rate": 0.02699013157894737, "loss": 0.9, "step": 400 }, { "epoch": 0.17387944358578053, "grad_norm": 0.0005447549629025161, "learning_rate": 0.026373355263157892, "loss": 0.9097, "step": 450 }, { "epoch": 0.19319938176197837, "grad_norm": 0.0004888740368187428, "learning_rate": 0.02575657894736842, "loss": 0.9037, "step": 500 }, { "epoch": 0.2125193199381762, "grad_norm": 0.0008238813607022166, "learning_rate": 0.025139802631578945, "loss": 0.899, "step": 550 }, { "epoch": 0.23183925811437403, "grad_norm": 0.000727724633179605, "learning_rate": 0.024523026315789474, "loss": 0.923, "step": 600 }, { "epoch": 0.2511591962905719, "grad_norm": 0.0005605846527032554, "learning_rate": 0.02390625, "loss": 0.9031, "step": 650 }, { "epoch": 0.2704791344667697, "grad_norm": 0.0007705381722189486, "learning_rate": 0.023289473684210523, "loss": 0.9013, "step": 700 }, { "epoch": 0.28979907264296756, "grad_norm": 0.0007164838025346398, "learning_rate": 0.022672697368421053, "loss": 0.8971, "step": 750 }, { "epoch": 0.3091190108191654, "grad_norm": 0.000717374321538955, "learning_rate": 0.02205592105263158, "loss": 0.8866, "step": 800 }, { "epoch": 0.3284389489953632, "grad_norm": 0.0006394012016244233, "learning_rate": 0.021439144736842105, "loss": 0.899, "step": 850 }, { "epoch": 0.34775888717156106, "grad_norm": 0.0006252205348573625, "learning_rate": 0.02082236842105263, "loss": 0.894, "step": 900 }, { "epoch": 0.3670788253477589, "grad_norm": 0.0006903470493853092, "learning_rate": 0.020205592105263157, "loss": 0.8858, "step": 950 }, { "epoch": 0.38639876352395675, "grad_norm": 0.0008341589127667248, "learning_rate": 0.019588815789473683, "loss": 0.9168, "step": 1000 }, { "epoch": 0.40571870170015456, "grad_norm": 0.0005771280848421156, "learning_rate": 0.01897203947368421, "loss": 0.9117, "step": 1050 }, { "epoch": 0.4250386398763524, "grad_norm": 0.000522978079970926, "learning_rate": 0.018355263157894736, "loss": 0.8939, "step": 1100 }, { "epoch": 0.44435857805255025, "grad_norm": 0.0005450574099086225, "learning_rate": 0.017738486842105265, "loss": 0.9049, "step": 1150 }, { "epoch": 0.46367851622874806, "grad_norm": 0.0005660468013957143, "learning_rate": 0.017121710526315788, "loss": 0.8944, "step": 1200 }, { "epoch": 0.48299845440494593, "grad_norm": 0.0006663696258328855, "learning_rate": 0.016504934210526314, "loss": 0.8971, "step": 1250 }, { "epoch": 0.5023183925811437, "grad_norm": 0.0005968479672446847, "learning_rate": 0.01588815789473684, "loss": 0.8917, "step": 1300 }, { "epoch": 0.5216383307573416, "grad_norm": 0.0007491153082810342, "learning_rate": 0.01527138157894737, "loss": 0.8829, "step": 1350 }, { "epoch": 0.5409582689335394, "grad_norm": 0.0006275599589571357, "learning_rate": 0.014654605263157894, "loss": 0.9058, "step": 1400 }, { "epoch": 0.5602782071097373, "grad_norm": 0.0007617810624651611, "learning_rate": 0.01403782894736842, "loss": 0.9051, "step": 1450 }, { "epoch": 0.5795981452859351, "grad_norm": 0.0006214394234120846, "learning_rate": 0.013421052631578946, "loss": 0.8879, "step": 1500 }, { "epoch": 0.5989180834621329, "grad_norm": 0.0006560624460689723, "learning_rate": 0.012804276315789473, "loss": 0.8991, "step": 1550 }, { "epoch": 0.6182380216383307, "grad_norm": 0.0007683933363296092, "learning_rate": 0.0121875, "loss": 0.9081, "step": 1600 }, { "epoch": 0.6375579598145286, "grad_norm": 0.0005783849046565592, "learning_rate": 0.011570723684210527, "loss": 0.9067, "step": 1650 }, { "epoch": 0.6568778979907264, "grad_norm": 0.0007958198548294604, "learning_rate": 0.010953947368421053, "loss": 0.885, "step": 1700 }, { "epoch": 0.6761978361669243, "grad_norm": 0.0006095783319324255, "learning_rate": 0.010337171052631579, "loss": 0.8928, "step": 1750 }, { "epoch": 0.6955177743431221, "grad_norm": 0.000699816329870373, "learning_rate": 0.009720394736842105, "loss": 0.903, "step": 1800 }, { "epoch": 0.7148377125193199, "grad_norm": 0.0008128538611344993, "learning_rate": 0.009103618421052631, "loss": 0.9036, "step": 1850 }, { "epoch": 0.7341576506955177, "grad_norm": 0.0006495247362181544, "learning_rate": 0.008486842105263157, "loss": 0.8907, "step": 1900 }, { "epoch": 0.7534775888717156, "grad_norm": 0.0005265743238851428, "learning_rate": 0.007870065789473685, "loss": 0.8843, "step": 1950 }, { "epoch": 0.7727975270479135, "grad_norm": 0.0006601494387723505, "learning_rate": 0.0072532894736842095, "loss": 0.8925, "step": 2000 }, { "epoch": 0.7921174652241113, "grad_norm": 0.0005823367391712964, "learning_rate": 0.0066365131578947365, "loss": 0.8954, "step": 2050 }, { "epoch": 0.8114374034003091, "grad_norm": 0.0005229181842878461, "learning_rate": 0.0060197368421052635, "loss": 0.903, "step": 2100 }, { "epoch": 0.8307573415765069, "grad_norm": 0.0005145368631929159, "learning_rate": 0.00540296052631579, "loss": 0.8923, "step": 2150 }, { "epoch": 0.8500772797527048, "grad_norm": 0.0006071292445994914, "learning_rate": 0.004786184210526316, "loss": 0.8804, "step": 2200 }, { "epoch": 0.8693972179289027, "grad_norm": 0.0006730407476425171, "learning_rate": 0.004169407894736842, "loss": 0.8919, "step": 2250 }, { "epoch": 0.8887171561051005, "grad_norm": 0.0006455178954638541, "learning_rate": 0.003552631578947368, "loss": 0.896, "step": 2300 }, { "epoch": 0.9080370942812983, "grad_norm": 0.0004997382056899369, "learning_rate": 0.002935855263157895, "loss": 0.8921, "step": 2350 }, { "epoch": 0.9273570324574961, "grad_norm": 0.00045192165998741984, "learning_rate": 0.002319078947368421, "loss": 0.8839, "step": 2400 }, { "epoch": 0.9466769706336939, "grad_norm": 0.0004822098126169294, "learning_rate": 0.0017023026315789475, "loss": 0.8988, "step": 2450 }, { "epoch": 0.9659969088098919, "grad_norm": 0.0005721400957554579, "learning_rate": 0.0010855263157894736, "loss": 0.9045, "step": 2500 }, { "epoch": 0.9853168469860897, "grad_norm": 0.0005698847235180438, "learning_rate": 0.00046875, "loss": 0.893, "step": 2550 }, { "epoch": 1.0, "eval_loss": 0.8954795002937317, "eval_runtime": 1619.6937, "eval_samples_per_second": 6.391, "eval_steps_per_second": 0.799, "step": 2588 } ], "logging_steps": 50, "max_steps": 2588, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.394319248976609e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }