{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 1225, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04081632653061224, "grad_norm": 16.731555938720703, "learning_rate": 9.918367346938776e-06, "loss": 0.2616, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 10, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08163265306122448, "grad_norm": 7.052234649658203, "learning_rate": 9.836734693877552e-06, "loss": 0.1555, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 20, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12244897959183673, "grad_norm": 6.5298075675964355, "learning_rate": 9.755102040816327e-06, "loss": 0.1251, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 30, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16326530612244897, "grad_norm": 4.405805587768555, "learning_rate": 9.673469387755103e-06, "loss": 0.1102, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 40, "total_memory_available (GB)": 94.62 }, { "epoch": 0.20408163265306123, "grad_norm": 4.870044708251953, "learning_rate": 9.591836734693878e-06, "loss": 0.1232, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 50, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24489795918367346, "grad_norm": 1.6433866024017334, "learning_rate": 9.510204081632653e-06, "loss": 0.0797, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 60, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2857142857142857, "grad_norm": 4.2432074546813965, "learning_rate": 9.42857142857143e-06, "loss": 0.1031, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 70, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32653061224489793, "grad_norm": 2.0352487564086914, "learning_rate": 9.346938775510204e-06, "loss": 0.1115, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 80, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3673469387755102, "grad_norm": 4.201560020446777, "learning_rate": 9.26530612244898e-06, "loss": 0.0817, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 90, "total_memory_available (GB)": 94.62 }, { "epoch": 0.40816326530612246, "grad_norm": 3.618368625640869, "learning_rate": 9.183673469387756e-06, "loss": 0.0768, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4489795918367347, "grad_norm": 4.793916702270508, "learning_rate": 9.102040816326532e-06, "loss": 0.071, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4897959183673469, "grad_norm": 2.3223495483398438, "learning_rate": 9.020408163265307e-06, "loss": 0.0707, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5306122448979592, "grad_norm": 3.5389153957366943, "learning_rate": 8.938775510204082e-06, "loss": 0.0599, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5714285714285714, "grad_norm": 1.586653232574463, "learning_rate": 8.857142857142858e-06, "loss": 0.0491, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6122448979591837, "grad_norm": 1.5236841440200806, "learning_rate": 8.775510204081633e-06, "loss": 0.0632, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6530612244897959, "grad_norm": 2.752020835876465, "learning_rate": 8.69387755102041e-06, "loss": 0.0722, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6938775510204082, "grad_norm": 7.606927394866943, "learning_rate": 8.612244897959184e-06, "loss": 0.0756, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7346938775510204, "grad_norm": 1.5622702836990356, "learning_rate": 8.530612244897961e-06, "loss": 0.0617, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7755102040816326, "grad_norm": 0.9614956378936768, "learning_rate": 8.448979591836736e-06, "loss": 0.0572, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8163265306122449, "grad_norm": 0.7814755439758301, "learning_rate": 8.36734693877551e-06, "loss": 0.0636, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8571428571428571, "grad_norm": 1.352851390838623, "learning_rate": 8.285714285714287e-06, "loss": 0.0648, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8979591836734694, "grad_norm": 1.6814969778060913, "learning_rate": 8.204081632653062e-06, "loss": 0.0604, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9387755102040817, "grad_norm": 0.859993040561676, "learning_rate": 8.122448979591837e-06, "loss": 0.0549, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9795918367346939, "grad_norm": 0.6439819931983948, "learning_rate": 8.040816326530613e-06, "loss": 0.0493, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0204081632653061, "grad_norm": 0.8465150594711304, "learning_rate": 7.959183673469388e-06, "loss": 0.0624, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0612244897959184, "grad_norm": 1.0257333517074585, "learning_rate": 7.877551020408164e-06, "loss": 0.056, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1020408163265305, "grad_norm": 2.619938850402832, "learning_rate": 7.79591836734694e-06, "loss": 0.0648, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1428571428571428, "grad_norm": 0.4946042001247406, "learning_rate": 7.714285714285716e-06, "loss": 0.0586, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.183673469387755, "grad_norm": 1.0154733657836914, "learning_rate": 7.63265306122449e-06, "loss": 0.0505, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2244897959183674, "grad_norm": 1.0347952842712402, "learning_rate": 7.551020408163265e-06, "loss": 0.0646, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2653061224489797, "grad_norm": 0.7844366431236267, "learning_rate": 7.469387755102041e-06, "loss": 0.0676, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.306122448979592, "grad_norm": 1.1971337795257568, "learning_rate": 7.387755102040817e-06, "loss": 0.0499, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.346938775510204, "grad_norm": 0.6674404740333557, "learning_rate": 7.306122448979592e-06, "loss": 0.0602, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3877551020408163, "grad_norm": 1.511208415031433, "learning_rate": 7.224489795918368e-06, "loss": 0.0547, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4285714285714286, "grad_norm": 0.5328841209411621, "learning_rate": 7.1428571428571436e-06, "loss": 0.0486, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.469387755102041, "grad_norm": 1.464439034461975, "learning_rate": 7.061224489795919e-06, "loss": 0.0464, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.510204081632653, "grad_norm": 0.834863543510437, "learning_rate": 6.979591836734695e-06, "loss": 0.0591, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5510204081632653, "grad_norm": 0.5399609208106995, "learning_rate": 6.8979591836734705e-06, "loss": 0.0464, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 380, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5918367346938775, "grad_norm": 0.8577661514282227, "learning_rate": 6.816326530612245e-06, "loss": 0.0654, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 390, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6326530612244898, "grad_norm": 0.5057955384254456, "learning_rate": 6.734693877551021e-06, "loss": 0.0609, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 400, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6734693877551021, "grad_norm": 0.9135333895683289, "learning_rate": 6.653061224489797e-06, "loss": 0.0607, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 410, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7142857142857144, "grad_norm": 2.9697179794311523, "learning_rate": 6.571428571428572e-06, "loss": 0.054, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 420, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7551020408163265, "grad_norm": 0.3473312556743622, "learning_rate": 6.489795918367348e-06, "loss": 0.0685, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 430, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7959183673469388, "grad_norm": 1.4528335332870483, "learning_rate": 6.408163265306124e-06, "loss": 0.0611, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 440, "total_memory_available (GB)": 94.62 }, { "epoch": 1.836734693877551, "grad_norm": 0.48578280210494995, "learning_rate": 6.326530612244899e-06, "loss": 0.0438, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 450, "total_memory_available (GB)": 94.62 }, { "epoch": 1.8775510204081631, "grad_norm": 0.3472760021686554, "learning_rate": 6.244897959183675e-06, "loss": 0.0544, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 460, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9183673469387754, "grad_norm": 1.0984327793121338, "learning_rate": 6.163265306122449e-06, "loss": 0.0438, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 470, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9591836734693877, "grad_norm": 0.20147933065891266, "learning_rate": 6.0816326530612245e-06, "loss": 0.0518, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 480, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0, "grad_norm": 1.1583309173583984, "learning_rate": 6e-06, "loss": 0.0637, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 490, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0408163265306123, "grad_norm": 0.6601622104644775, "learning_rate": 5.918367346938776e-06, "loss": 0.0596, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0816326530612246, "grad_norm": 0.5227305293083191, "learning_rate": 5.8367346938775515e-06, "loss": 0.0493, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 510, "total_memory_available (GB)": 94.62 }, { "epoch": 2.122448979591837, "grad_norm": 0.8996191620826721, "learning_rate": 5.755102040816327e-06, "loss": 0.0461, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 520, "total_memory_available (GB)": 94.62 }, { "epoch": 2.163265306122449, "grad_norm": 1.0684189796447754, "learning_rate": 5.673469387755103e-06, "loss": 0.0629, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 530, "total_memory_available (GB)": 94.62 }, { "epoch": 2.204081632653061, "grad_norm": 0.5558530688285828, "learning_rate": 5.591836734693878e-06, "loss": 0.0581, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 540, "total_memory_available (GB)": 94.62 }, { "epoch": 2.2448979591836733, "grad_norm": 1.1996757984161377, "learning_rate": 5.510204081632653e-06, "loss": 0.0626, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 550, "total_memory_available (GB)": 94.62 }, { "epoch": 2.2857142857142856, "grad_norm": 1.2928632497787476, "learning_rate": 5.428571428571429e-06, "loss": 0.0575, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 560, "total_memory_available (GB)": 94.62 }, { "epoch": 2.326530612244898, "grad_norm": 0.7934871912002563, "learning_rate": 5.3469387755102045e-06, "loss": 0.0577, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 570, "total_memory_available (GB)": 94.62 }, { "epoch": 2.36734693877551, "grad_norm": 3.946485757827759, "learning_rate": 5.26530612244898e-06, "loss": 0.0663, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 580, "total_memory_available (GB)": 94.62 }, { "epoch": 2.4081632653061225, "grad_norm": 0.43567588925361633, "learning_rate": 5.183673469387756e-06, "loss": 0.0539, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 590, "total_memory_available (GB)": 94.62 }, { "epoch": 2.4489795918367347, "grad_norm": 0.5725533962249756, "learning_rate": 5.1020408163265315e-06, "loss": 0.0438, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 600, "total_memory_available (GB)": 94.62 }, { "epoch": 2.489795918367347, "grad_norm": 0.44328320026397705, "learning_rate": 5.020408163265307e-06, "loss": 0.041, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 610, "total_memory_available (GB)": 94.62 }, { "epoch": 2.5306122448979593, "grad_norm": 1.338100790977478, "learning_rate": 4.938775510204082e-06, "loss": 0.0424, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 620, "total_memory_available (GB)": 94.62 }, { "epoch": 2.571428571428571, "grad_norm": 0.92643803358078, "learning_rate": 4.857142857142858e-06, "loss": 0.0811, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 630, "total_memory_available (GB)": 94.62 }, { "epoch": 2.612244897959184, "grad_norm": 1.1147398948669434, "learning_rate": 4.775510204081633e-06, "loss": 0.0492, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 640, "total_memory_available (GB)": 94.62 }, { "epoch": 2.6530612244897958, "grad_norm": 0.6104307174682617, "learning_rate": 4.693877551020409e-06, "loss": 0.0468, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 650, "total_memory_available (GB)": 94.62 }, { "epoch": 2.693877551020408, "grad_norm": 0.9826134443283081, "learning_rate": 4.612244897959184e-06, "loss": 0.0471, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 660, "total_memory_available (GB)": 94.62 }, { "epoch": 2.7346938775510203, "grad_norm": 0.7680672407150269, "learning_rate": 4.530612244897959e-06, "loss": 0.0858, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 670, "total_memory_available (GB)": 94.62 }, { "epoch": 2.7755102040816326, "grad_norm": 0.9682340025901794, "learning_rate": 4.448979591836735e-06, "loss": 0.0484, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 680, "total_memory_available (GB)": 94.62 }, { "epoch": 2.816326530612245, "grad_norm": 0.37712323665618896, "learning_rate": 4.367346938775511e-06, "loss": 0.0443, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 690, "total_memory_available (GB)": 94.62 }, { "epoch": 2.857142857142857, "grad_norm": 0.34970754384994507, "learning_rate": 4.2857142857142855e-06, "loss": 0.0434, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 700, "total_memory_available (GB)": 94.62 }, { "epoch": 2.8979591836734695, "grad_norm": 0.9949877262115479, "learning_rate": 4.204081632653061e-06, "loss": 0.0553, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 710, "total_memory_available (GB)": 94.62 }, { "epoch": 2.938775510204082, "grad_norm": 1.4436949491500854, "learning_rate": 4.122448979591837e-06, "loss": 0.0583, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 720, "total_memory_available (GB)": 94.62 }, { "epoch": 2.979591836734694, "grad_norm": 0.1619979739189148, "learning_rate": 4.040816326530612e-06, "loss": 0.0336, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 730, "total_memory_available (GB)": 94.62 }, { "epoch": 3.020408163265306, "grad_norm": 1.2799049615859985, "learning_rate": 3.959183673469388e-06, "loss": 0.0536, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 740, "total_memory_available (GB)": 94.62 }, { "epoch": 3.061224489795918, "grad_norm": 0.5613189935684204, "learning_rate": 3.877551020408164e-06, "loss": 0.062, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 750, "total_memory_available (GB)": 94.62 }, { "epoch": 3.1020408163265305, "grad_norm": 0.827383279800415, "learning_rate": 3.795918367346939e-06, "loss": 0.0527, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 760, "total_memory_available (GB)": 94.62 }, { "epoch": 3.142857142857143, "grad_norm": 0.6983201503753662, "learning_rate": 3.7142857142857146e-06, "loss": 0.0691, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 770, "total_memory_available (GB)": 94.62 }, { "epoch": 3.183673469387755, "grad_norm": 1.0466923713684082, "learning_rate": 3.6326530612244903e-06, "loss": 0.0644, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 780, "total_memory_available (GB)": 94.62 }, { "epoch": 3.2244897959183674, "grad_norm": 0.3068871796131134, "learning_rate": 3.5510204081632655e-06, "loss": 0.0524, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 790, "total_memory_available (GB)": 94.62 }, { "epoch": 3.2653061224489797, "grad_norm": 0.40160393714904785, "learning_rate": 3.469387755102041e-06, "loss": 0.0434, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 800, "total_memory_available (GB)": 94.62 }, { "epoch": 3.306122448979592, "grad_norm": 0.880214512348175, "learning_rate": 3.3877551020408168e-06, "loss": 0.056, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 810, "total_memory_available (GB)": 94.62 }, { "epoch": 3.3469387755102042, "grad_norm": 0.9539953470230103, "learning_rate": 3.3061224489795924e-06, "loss": 0.0464, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 820, "total_memory_available (GB)": 94.62 }, { "epoch": 3.387755102040816, "grad_norm": 0.24522298574447632, "learning_rate": 3.2244897959183672e-06, "loss": 0.0485, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 830, "total_memory_available (GB)": 94.62 }, { "epoch": 3.4285714285714284, "grad_norm": 0.4946345388889313, "learning_rate": 3.142857142857143e-06, "loss": 0.0527, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 840, "total_memory_available (GB)": 94.62 }, { "epoch": 3.4693877551020407, "grad_norm": 0.4724675416946411, "learning_rate": 3.0612244897959185e-06, "loss": 0.0813, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 850, "total_memory_available (GB)": 94.62 }, { "epoch": 3.510204081632653, "grad_norm": 0.9907402396202087, "learning_rate": 2.979591836734694e-06, "loss": 0.0447, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 860, "total_memory_available (GB)": 94.62 }, { "epoch": 3.5510204081632653, "grad_norm": 0.19696560502052307, "learning_rate": 2.8979591836734694e-06, "loss": 0.0635, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 870, "total_memory_available (GB)": 94.62 }, { "epoch": 3.5918367346938775, "grad_norm": 0.7972800135612488, "learning_rate": 2.816326530612245e-06, "loss": 0.0438, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 880, "total_memory_available (GB)": 94.62 }, { "epoch": 3.63265306122449, "grad_norm": 0.21193134784698486, "learning_rate": 2.7346938775510207e-06, "loss": 0.029, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 890, "total_memory_available (GB)": 94.62 }, { "epoch": 3.673469387755102, "grad_norm": 0.6128103137016296, "learning_rate": 2.6530612244897964e-06, "loss": 0.0514, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 900, "total_memory_available (GB)": 94.62 }, { "epoch": 3.7142857142857144, "grad_norm": 0.8112168312072754, "learning_rate": 2.571428571428571e-06, "loss": 0.061, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 910, "total_memory_available (GB)": 94.62 }, { "epoch": 3.7551020408163263, "grad_norm": 0.18730562925338745, "learning_rate": 2.489795918367347e-06, "loss": 0.0546, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 920, "total_memory_available (GB)": 94.62 }, { "epoch": 3.795918367346939, "grad_norm": 0.3866801857948303, "learning_rate": 2.4081632653061225e-06, "loss": 0.0501, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 930, "total_memory_available (GB)": 94.62 }, { "epoch": 3.836734693877551, "grad_norm": 0.8816384077072144, "learning_rate": 2.326530612244898e-06, "loss": 0.0489, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 940, "total_memory_available (GB)": 94.62 }, { "epoch": 3.877551020408163, "grad_norm": 0.5572797656059265, "learning_rate": 2.244897959183674e-06, "loss": 0.0599, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 950, "total_memory_available (GB)": 94.62 }, { "epoch": 3.9183673469387754, "grad_norm": 0.38238489627838135, "learning_rate": 2.1632653061224495e-06, "loss": 0.0497, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 960, "total_memory_available (GB)": 94.62 }, { "epoch": 3.9591836734693877, "grad_norm": 0.6144959926605225, "learning_rate": 2.0816326530612247e-06, "loss": 0.0741, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 970, "total_memory_available (GB)": 94.62 }, { "epoch": 4.0, "grad_norm": 0.6087101697921753, "learning_rate": 2.0000000000000003e-06, "loss": 0.0703, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 980, "total_memory_available (GB)": 94.62 }, { "epoch": 4.040816326530612, "grad_norm": 0.5187469720840454, "learning_rate": 1.9183673469387756e-06, "loss": 0.0482, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 990, "total_memory_available (GB)": 94.62 }, { "epoch": 4.081632653061225, "grad_norm": 1.248850703239441, "learning_rate": 1.8367346938775512e-06, "loss": 0.0631, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1000, "total_memory_available (GB)": 94.62 }, { "epoch": 4.122448979591836, "grad_norm": 0.5806276798248291, "learning_rate": 1.7551020408163267e-06, "loss": 0.0629, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1010, "total_memory_available (GB)": 94.62 }, { "epoch": 4.163265306122449, "grad_norm": 0.3565673828125, "learning_rate": 1.6734693877551023e-06, "loss": 0.0407, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1020, "total_memory_available (GB)": 94.62 }, { "epoch": 4.204081632653061, "grad_norm": 0.6948438882827759, "learning_rate": 1.5918367346938775e-06, "loss": 0.053, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1030, "total_memory_available (GB)": 94.62 }, { "epoch": 4.244897959183674, "grad_norm": 0.5245764851570129, "learning_rate": 1.5102040816326532e-06, "loss": 0.0399, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1040, "total_memory_available (GB)": 94.62 }, { "epoch": 4.285714285714286, "grad_norm": 0.7932385802268982, "learning_rate": 1.4285714285714286e-06, "loss": 0.0502, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1050, "total_memory_available (GB)": 94.62 }, { "epoch": 4.326530612244898, "grad_norm": 0.30140048265457153, "learning_rate": 1.3469387755102043e-06, "loss": 0.046, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1060, "total_memory_available (GB)": 94.62 }, { "epoch": 4.36734693877551, "grad_norm": 0.570467472076416, "learning_rate": 1.2653061224489795e-06, "loss": 0.0487, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1070, "total_memory_available (GB)": 94.62 }, { "epoch": 4.408163265306122, "grad_norm": 0.43690067529678345, "learning_rate": 1.1836734693877552e-06, "loss": 0.0521, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1080, "total_memory_available (GB)": 94.62 }, { "epoch": 4.448979591836735, "grad_norm": 0.5298590660095215, "learning_rate": 1.1020408163265308e-06, "loss": 0.0506, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1090, "total_memory_available (GB)": 94.62 }, { "epoch": 4.489795918367347, "grad_norm": 0.2310735136270523, "learning_rate": 1.0204081632653063e-06, "loss": 0.036, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1100, "total_memory_available (GB)": 94.62 }, { "epoch": 4.530612244897959, "grad_norm": 0.13128583133220673, "learning_rate": 9.387755102040817e-07, "loss": 0.0463, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1110, "total_memory_available (GB)": 94.62 }, { "epoch": 4.571428571428571, "grad_norm": 0.7682464122772217, "learning_rate": 8.571428571428572e-07, "loss": 0.0403, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1120, "total_memory_available (GB)": 94.62 }, { "epoch": 4.612244897959184, "grad_norm": 0.6608971953392029, "learning_rate": 7.755102040816327e-07, "loss": 0.0543, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1130, "total_memory_available (GB)": 94.62 }, { "epoch": 4.653061224489796, "grad_norm": 0.8803687691688538, "learning_rate": 6.938775510204082e-07, "loss": 0.0728, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1140, "total_memory_available (GB)": 94.62 }, { "epoch": 4.6938775510204085, "grad_norm": 4.121662139892578, "learning_rate": 6.122448979591837e-07, "loss": 0.0514, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1150, "total_memory_available (GB)": 94.62 }, { "epoch": 4.73469387755102, "grad_norm": 0.7500938773155212, "learning_rate": 5.306122448979592e-07, "loss": 0.0612, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1160, "total_memory_available (GB)": 94.62 }, { "epoch": 4.775510204081632, "grad_norm": 0.6001973748207092, "learning_rate": 4.489795918367347e-07, "loss": 0.0549, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1170, "total_memory_available (GB)": 94.62 }, { "epoch": 4.816326530612245, "grad_norm": 0.7522645592689514, "learning_rate": 3.6734693877551025e-07, "loss": 0.0445, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1180, "total_memory_available (GB)": 94.62 }, { "epoch": 4.857142857142857, "grad_norm": 0.6640497446060181, "learning_rate": 2.8571428571428575e-07, "loss": 0.0542, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1190, "total_memory_available (GB)": 94.62 }, { "epoch": 4.8979591836734695, "grad_norm": 0.8031227588653564, "learning_rate": 2.0408163265306121e-07, "loss": 0.0728, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1200, "total_memory_available (GB)": 94.62 }, { "epoch": 4.938775510204081, "grad_norm": 0.39187708497047424, "learning_rate": 1.2244897959183673e-07, "loss": 0.065, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1210, "total_memory_available (GB)": 94.62 }, { "epoch": 4.979591836734694, "grad_norm": 3.809382915496826, "learning_rate": 4.0816326530612253e-08, "loss": 0.0417, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1220, "total_memory_available (GB)": 94.62 }, { "epoch": 5.0, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 1225, "total_flos": 3.0598946525952e+16, "total_memory_available (GB)": 94.62, "train_loss": 0.06098026679486644, "train_runtime": 1192.2443, "train_samples_per_second": 46.607, "train_steps_per_second": 1.166 } ], "logging_steps": 10, "max_steps": 1225, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.0598946525952e+16, "train_batch_size": 40, "trial_name": null, "trial_params": null }