{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 1225, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04081632653061224, "grad_norm": 9.589848518371582, "learning_rate": 9.918367346938776e-06, "loss": 0.2612, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 10, "total_memory_available (GB)": 94.62 }, { "epoch": 0.08163265306122448, "grad_norm": 8.167236328125, "learning_rate": 9.836734693877552e-06, "loss": 0.1542, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 20, "total_memory_available (GB)": 94.62 }, { "epoch": 0.12244897959183673, "grad_norm": 5.958656311035156, "learning_rate": 9.755102040816327e-06, "loss": 0.1236, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 30, "total_memory_available (GB)": 94.62 }, { "epoch": 0.16326530612244897, "grad_norm": 4.233438968658447, "learning_rate": 9.673469387755103e-06, "loss": 0.1105, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 40, "total_memory_available (GB)": 94.62 }, { "epoch": 0.20408163265306123, "grad_norm": 4.4948201179504395, "learning_rate": 9.591836734693878e-06, "loss": 0.1195, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 50, "total_memory_available (GB)": 94.62 }, { "epoch": 0.24489795918367346, "grad_norm": 1.827812910079956, "learning_rate": 9.510204081632653e-06, "loss": 0.0797, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 60, "total_memory_available (GB)": 94.62 }, { "epoch": 0.2857142857142857, "grad_norm": 3.889023780822754, "learning_rate": 9.42857142857143e-06, "loss": 0.0989, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 70, "total_memory_available (GB)": 94.62 }, { "epoch": 0.32653061224489793, "grad_norm": 1.9397954940795898, "learning_rate": 9.346938775510204e-06, "loss": 0.1102, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 80, "total_memory_available (GB)": 94.62 }, { "epoch": 0.3673469387755102, "grad_norm": 3.2782671451568604, "learning_rate": 9.26530612244898e-06, "loss": 0.0824, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 90, "total_memory_available (GB)": 94.62 }, { "epoch": 0.40816326530612246, "grad_norm": 5.711858749389648, "learning_rate": 9.183673469387756e-06, "loss": 0.0762, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 100, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4489795918367347, "grad_norm": 3.395564317703247, "learning_rate": 9.102040816326532e-06, "loss": 0.0726, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 110, "total_memory_available (GB)": 94.62 }, { "epoch": 0.4897959183673469, "grad_norm": 2.2739310264587402, "learning_rate": 9.020408163265307e-06, "loss": 0.0705, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 120, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5306122448979592, "grad_norm": 2.418794870376587, "learning_rate": 8.938775510204082e-06, "loss": 0.0595, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 130, "total_memory_available (GB)": 94.62 }, { "epoch": 0.5714285714285714, "grad_norm": 2.2896311283111572, "learning_rate": 8.857142857142858e-06, "loss": 0.0498, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 140, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6122448979591837, "grad_norm": 1.7899913787841797, "learning_rate": 8.775510204081633e-06, "loss": 0.0629, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 150, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6530612244897959, "grad_norm": 1.9983731508255005, "learning_rate": 8.69387755102041e-06, "loss": 0.071, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 160, "total_memory_available (GB)": 94.62 }, { "epoch": 0.6938775510204082, "grad_norm": 1.7236266136169434, "learning_rate": 8.612244897959184e-06, "loss": 0.0748, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 170, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7346938775510204, "grad_norm": 1.1180106401443481, "learning_rate": 8.530612244897961e-06, "loss": 0.0599, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 180, "total_memory_available (GB)": 94.62 }, { "epoch": 0.7755102040816326, "grad_norm": 2.3057782649993896, "learning_rate": 8.448979591836736e-06, "loss": 0.0588, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 190, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8163265306122449, "grad_norm": 0.8334403038024902, "learning_rate": 8.36734693877551e-06, "loss": 0.0622, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 200, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8571428571428571, "grad_norm": 1.0548275709152222, "learning_rate": 8.285714285714287e-06, "loss": 0.0623, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 210, "total_memory_available (GB)": 94.62 }, { "epoch": 0.8979591836734694, "grad_norm": 1.3381606340408325, "learning_rate": 8.204081632653062e-06, "loss": 0.0605, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 220, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9387755102040817, "grad_norm": 0.809412956237793, "learning_rate": 8.122448979591837e-06, "loss": 0.0566, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 230, "total_memory_available (GB)": 94.62 }, { "epoch": 0.9795918367346939, "grad_norm": 0.7182928323745728, "learning_rate": 8.040816326530613e-06, "loss": 0.0496, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 240, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0204081632653061, "grad_norm": 1.1081018447875977, "learning_rate": 7.959183673469388e-06, "loss": 0.0601, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 250, "total_memory_available (GB)": 94.62 }, { "epoch": 1.0612244897959184, "grad_norm": 3.9478495121002197, "learning_rate": 7.877551020408164e-06, "loss": 0.0541, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 260, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1020408163265305, "grad_norm": 0.8079606294631958, "learning_rate": 7.79591836734694e-06, "loss": 0.063, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 270, "total_memory_available (GB)": 94.62 }, { "epoch": 1.1428571428571428, "grad_norm": 1.4108704328536987, "learning_rate": 7.714285714285716e-06, "loss": 0.0579, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 280, "total_memory_available (GB)": 94.62 }, { "epoch": 1.183673469387755, "grad_norm": 1.544438123703003, "learning_rate": 7.63265306122449e-06, "loss": 0.0518, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 290, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2244897959183674, "grad_norm": 1.0270023345947266, "learning_rate": 7.551020408163265e-06, "loss": 0.0658, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 300, "total_memory_available (GB)": 94.62 }, { "epoch": 1.2653061224489797, "grad_norm": 1.0347421169281006, "learning_rate": 7.469387755102041e-06, "loss": 0.0667, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 310, "total_memory_available (GB)": 94.62 }, { "epoch": 1.306122448979592, "grad_norm": 1.5818060636520386, "learning_rate": 7.387755102040817e-06, "loss": 0.0502, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 320, "total_memory_available (GB)": 94.62 }, { "epoch": 1.346938775510204, "grad_norm": 1.2030609846115112, "learning_rate": 7.306122448979592e-06, "loss": 0.0616, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 330, "total_memory_available (GB)": 94.62 }, { "epoch": 1.3877551020408163, "grad_norm": 1.2504222393035889, "learning_rate": 7.224489795918368e-06, "loss": 0.0543, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 340, "total_memory_available (GB)": 94.62 }, { "epoch": 1.4285714285714286, "grad_norm": 0.8420921564102173, "learning_rate": 7.1428571428571436e-06, "loss": 0.0488, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 350, "total_memory_available (GB)": 94.62 }, { "epoch": 1.469387755102041, "grad_norm": 1.517096757888794, "learning_rate": 7.061224489795919e-06, "loss": 0.0467, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 360, "total_memory_available (GB)": 94.62 }, { "epoch": 1.510204081632653, "grad_norm": 1.4490768909454346, "learning_rate": 6.979591836734695e-06, "loss": 0.0585, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 370, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5510204081632653, "grad_norm": 0.5360353589057922, "learning_rate": 6.8979591836734705e-06, "loss": 0.046, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 380, "total_memory_available (GB)": 94.62 }, { "epoch": 1.5918367346938775, "grad_norm": 0.8193866610527039, "learning_rate": 6.816326530612245e-06, "loss": 0.0657, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 390, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6326530612244898, "grad_norm": 0.5883302092552185, "learning_rate": 6.734693877551021e-06, "loss": 0.0609, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 400, "total_memory_available (GB)": 94.62 }, { "epoch": 1.6734693877551021, "grad_norm": 0.6720415949821472, "learning_rate": 6.653061224489797e-06, "loss": 0.0603, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 410, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7142857142857144, "grad_norm": 1.368994951248169, "learning_rate": 6.571428571428572e-06, "loss": 0.0528, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 420, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7551020408163265, "grad_norm": 0.25535887479782104, "learning_rate": 6.489795918367348e-06, "loss": 0.0674, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 430, "total_memory_available (GB)": 94.62 }, { "epoch": 1.7959183673469388, "grad_norm": 0.8839388489723206, "learning_rate": 6.408163265306124e-06, "loss": 0.0612, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 440, "total_memory_available (GB)": 94.62 }, { "epoch": 1.836734693877551, "grad_norm": 0.4119959771633148, "learning_rate": 6.326530612244899e-06, "loss": 0.0435, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 450, "total_memory_available (GB)": 94.62 }, { "epoch": 1.8775510204081631, "grad_norm": 0.45877301692962646, "learning_rate": 6.244897959183675e-06, "loss": 0.0543, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 460, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9183673469387754, "grad_norm": 0.9594807624816895, "learning_rate": 6.163265306122449e-06, "loss": 0.0433, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 470, "total_memory_available (GB)": 94.62 }, { "epoch": 1.9591836734693877, "grad_norm": 0.168818861246109, "learning_rate": 6.0816326530612245e-06, "loss": 0.0497, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 480, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0, "grad_norm": 1.1468336582183838, "learning_rate": 6e-06, "loss": 0.0651, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 490, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0408163265306123, "grad_norm": 0.6001573204994202, "learning_rate": 5.918367346938776e-06, "loss": 0.0591, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 500, "total_memory_available (GB)": 94.62 }, { "epoch": 2.0816326530612246, "grad_norm": 0.8006247878074646, "learning_rate": 5.8367346938775515e-06, "loss": 0.0489, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 510, "total_memory_available (GB)": 94.62 }, { "epoch": 2.122448979591837, "grad_norm": 0.6839123964309692, "learning_rate": 5.755102040816327e-06, "loss": 0.0456, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 520, "total_memory_available (GB)": 94.62 }, { "epoch": 2.163265306122449, "grad_norm": 1.020135521888733, "learning_rate": 5.673469387755103e-06, "loss": 0.0627, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 530, "total_memory_available (GB)": 94.62 }, { "epoch": 2.204081632653061, "grad_norm": 0.7156575322151184, "learning_rate": 5.591836734693878e-06, "loss": 0.0584, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 540, "total_memory_available (GB)": 94.62 }, { "epoch": 2.2448979591836733, "grad_norm": 1.195730447769165, "learning_rate": 5.510204081632653e-06, "loss": 0.0628, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 550, "total_memory_available (GB)": 94.62 }, { "epoch": 2.2857142857142856, "grad_norm": 0.8824738264083862, "learning_rate": 5.428571428571429e-06, "loss": 0.0572, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 560, "total_memory_available (GB)": 94.62 }, { "epoch": 2.326530612244898, "grad_norm": 1.3354676961898804, "learning_rate": 5.3469387755102045e-06, "loss": 0.0585, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 570, "total_memory_available (GB)": 94.62 }, { "epoch": 2.36734693877551, "grad_norm": 1.9097795486450195, "learning_rate": 5.26530612244898e-06, "loss": 0.0668, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 580, "total_memory_available (GB)": 94.62 }, { "epoch": 2.4081632653061225, "grad_norm": 0.8386860489845276, "learning_rate": 5.183673469387756e-06, "loss": 0.0542, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 590, "total_memory_available (GB)": 94.62 }, { "epoch": 2.4489795918367347, "grad_norm": 1.2374165058135986, "learning_rate": 5.1020408163265315e-06, "loss": 0.0436, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 600, "total_memory_available (GB)": 94.62 }, { "epoch": 2.489795918367347, "grad_norm": 0.3719439208507538, "learning_rate": 5.020408163265307e-06, "loss": 0.0408, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 610, "total_memory_available (GB)": 94.62 }, { "epoch": 2.5306122448979593, "grad_norm": 0.905327558517456, "learning_rate": 4.938775510204082e-06, "loss": 0.0429, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 620, "total_memory_available (GB)": 94.62 }, { "epoch": 2.571428571428571, "grad_norm": 0.628597617149353, "learning_rate": 4.857142857142858e-06, "loss": 0.0812, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 630, "total_memory_available (GB)": 94.62 }, { "epoch": 2.612244897959184, "grad_norm": 1.3098090887069702, "learning_rate": 4.775510204081633e-06, "loss": 0.0495, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 640, "total_memory_available (GB)": 94.62 }, { "epoch": 2.6530612244897958, "grad_norm": 0.5635781288146973, "learning_rate": 4.693877551020409e-06, "loss": 0.0466, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 650, "total_memory_available (GB)": 94.62 }, { "epoch": 2.693877551020408, "grad_norm": 0.6197735071182251, "learning_rate": 4.612244897959184e-06, "loss": 0.0474, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 660, "total_memory_available (GB)": 94.62 }, { "epoch": 2.7346938775510203, "grad_norm": 0.6390748620033264, "learning_rate": 4.530612244897959e-06, "loss": 0.0863, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 670, "total_memory_available (GB)": 94.62 }, { "epoch": 2.7755102040816326, "grad_norm": 1.6307971477508545, "learning_rate": 4.448979591836735e-06, "loss": 0.0489, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 680, "total_memory_available (GB)": 94.62 }, { "epoch": 2.816326530612245, "grad_norm": 0.3477366864681244, "learning_rate": 4.367346938775511e-06, "loss": 0.0448, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 690, "total_memory_available (GB)": 94.62 }, { "epoch": 2.857142857142857, "grad_norm": 0.41136085987091064, "learning_rate": 4.2857142857142855e-06, "loss": 0.0431, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 700, "total_memory_available (GB)": 94.62 }, { "epoch": 2.8979591836734695, "grad_norm": 1.1029525995254517, "learning_rate": 4.204081632653061e-06, "loss": 0.0551, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 710, "total_memory_available (GB)": 94.62 }, { "epoch": 2.938775510204082, "grad_norm": 0.8994241952896118, "learning_rate": 4.122448979591837e-06, "loss": 0.0581, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 720, "total_memory_available (GB)": 94.62 }, { "epoch": 2.979591836734694, "grad_norm": 0.1889757364988327, "learning_rate": 4.040816326530612e-06, "loss": 0.034, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 730, "total_memory_available (GB)": 94.62 }, { "epoch": 3.020408163265306, "grad_norm": 1.7815334796905518, "learning_rate": 3.959183673469388e-06, "loss": 0.0536, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 740, "total_memory_available (GB)": 94.62 }, { "epoch": 3.061224489795918, "grad_norm": 0.4372510612010956, "learning_rate": 3.877551020408164e-06, "loss": 0.0617, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 750, "total_memory_available (GB)": 94.62 }, { "epoch": 3.1020408163265305, "grad_norm": 5.120749473571777, "learning_rate": 3.795918367346939e-06, "loss": 0.0518, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 760, "total_memory_available (GB)": 94.62 }, { "epoch": 3.142857142857143, "grad_norm": 6.453648090362549, "learning_rate": 3.7142857142857146e-06, "loss": 0.069, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 770, "total_memory_available (GB)": 94.62 }, { "epoch": 3.183673469387755, "grad_norm": 0.6512885093688965, "learning_rate": 3.6326530612244903e-06, "loss": 0.0649, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 780, "total_memory_available (GB)": 94.62 }, { "epoch": 3.2244897959183674, "grad_norm": 0.3266737759113312, "learning_rate": 3.5510204081632655e-06, "loss": 0.0523, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 790, "total_memory_available (GB)": 94.62 }, { "epoch": 3.2653061224489797, "grad_norm": 0.4506176710128784, "learning_rate": 3.469387755102041e-06, "loss": 0.0432, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 800, "total_memory_available (GB)": 94.62 }, { "epoch": 3.306122448979592, "grad_norm": 0.8929914236068726, "learning_rate": 3.3877551020408168e-06, "loss": 0.0554, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 810, "total_memory_available (GB)": 94.62 }, { "epoch": 3.3469387755102042, "grad_norm": 0.7046924233436584, "learning_rate": 3.3061224489795924e-06, "loss": 0.0453, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 820, "total_memory_available (GB)": 94.62 }, { "epoch": 3.387755102040816, "grad_norm": 0.29230576753616333, "learning_rate": 3.2244897959183672e-06, "loss": 0.0491, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 830, "total_memory_available (GB)": 94.62 }, { "epoch": 3.4285714285714284, "grad_norm": 0.4533096253871918, "learning_rate": 3.142857142857143e-06, "loss": 0.0529, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 840, "total_memory_available (GB)": 94.62 }, { "epoch": 3.4693877551020407, "grad_norm": 0.5383632183074951, "learning_rate": 3.0612244897959185e-06, "loss": 0.0823, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 850, "total_memory_available (GB)": 94.62 }, { "epoch": 3.510204081632653, "grad_norm": 2.8597779273986816, "learning_rate": 2.979591836734694e-06, "loss": 0.0456, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 860, "total_memory_available (GB)": 94.62 }, { "epoch": 3.5510204081632653, "grad_norm": 0.26686975359916687, "learning_rate": 2.8979591836734694e-06, "loss": 0.064, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 870, "total_memory_available (GB)": 94.62 }, { "epoch": 3.5918367346938775, "grad_norm": 0.7789614796638489, "learning_rate": 2.816326530612245e-06, "loss": 0.0441, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 880, "total_memory_available (GB)": 94.62 }, { "epoch": 3.63265306122449, "grad_norm": 0.21532948315143585, "learning_rate": 2.7346938775510207e-06, "loss": 0.0291, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 890, "total_memory_available (GB)": 94.62 }, { "epoch": 3.673469387755102, "grad_norm": 0.741765558719635, "learning_rate": 2.6530612244897964e-06, "loss": 0.0512, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 900, "total_memory_available (GB)": 94.62 }, { "epoch": 3.7142857142857144, "grad_norm": 0.6416855454444885, "learning_rate": 2.571428571428571e-06, "loss": 0.0606, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 910, "total_memory_available (GB)": 94.62 }, { "epoch": 3.7551020408163263, "grad_norm": 0.14841973781585693, "learning_rate": 2.489795918367347e-06, "loss": 0.0542, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 920, "total_memory_available (GB)": 94.62 }, { "epoch": 3.795918367346939, "grad_norm": 0.4417996108531952, "learning_rate": 2.4081632653061225e-06, "loss": 0.0498, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 930, "total_memory_available (GB)": 94.62 }, { "epoch": 3.836734693877551, "grad_norm": 0.9759775400161743, "learning_rate": 2.326530612244898e-06, "loss": 0.0491, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 940, "total_memory_available (GB)": 94.62 }, { "epoch": 3.877551020408163, "grad_norm": 1.020371913909912, "learning_rate": 2.244897959183674e-06, "loss": 0.0597, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 950, "total_memory_available (GB)": 94.62 }, { "epoch": 3.9183673469387754, "grad_norm": 0.3064863085746765, "learning_rate": 2.1632653061224495e-06, "loss": 0.0499, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 960, "total_memory_available (GB)": 94.62 }, { "epoch": 3.9591836734693877, "grad_norm": 0.7580925226211548, "learning_rate": 2.0816326530612247e-06, "loss": 0.0742, "max_memory_allocated (GB)": 57.18, "memory_allocated (GB)": 50.57, "step": 970, "total_memory_available (GB)": 94.62 }, { "epoch": 4.0, "grad_norm": 0.6833075881004333, "learning_rate": 2.0000000000000003e-06, "loss": 0.0708, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 980, "total_memory_available (GB)": 94.62 }, { "epoch": 4.040816326530612, "grad_norm": 0.5641142725944519, "learning_rate": 1.9183673469387756e-06, "loss": 0.0481, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 990, "total_memory_available (GB)": 94.62 }, { "epoch": 4.081632653061225, "grad_norm": 0.8568029403686523, "learning_rate": 1.8367346938775512e-06, "loss": 0.0626, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1000, "total_memory_available (GB)": 94.62 }, { "epoch": 4.122448979591836, "grad_norm": 0.5912718772888184, "learning_rate": 1.7551020408163267e-06, "loss": 0.0628, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1010, "total_memory_available (GB)": 94.62 }, { "epoch": 4.163265306122449, "grad_norm": 0.3173392713069916, "learning_rate": 1.6734693877551023e-06, "loss": 0.0402, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1020, "total_memory_available (GB)": 94.62 }, { "epoch": 4.204081632653061, "grad_norm": 0.8902315497398376, "learning_rate": 1.5918367346938775e-06, "loss": 0.0536, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1030, "total_memory_available (GB)": 94.62 }, { "epoch": 4.244897959183674, "grad_norm": 0.5009722113609314, "learning_rate": 1.5102040816326532e-06, "loss": 0.0399, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1040, "total_memory_available (GB)": 94.62 }, { "epoch": 4.285714285714286, "grad_norm": 1.8656221628189087, "learning_rate": 1.4285714285714286e-06, "loss": 0.0499, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1050, "total_memory_available (GB)": 94.62 }, { "epoch": 4.326530612244898, "grad_norm": 0.4257819950580597, "learning_rate": 1.3469387755102043e-06, "loss": 0.0459, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1060, "total_memory_available (GB)": 94.62 }, { "epoch": 4.36734693877551, "grad_norm": 0.5823583006858826, "learning_rate": 1.2653061224489795e-06, "loss": 0.0488, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1070, "total_memory_available (GB)": 94.62 }, { "epoch": 4.408163265306122, "grad_norm": 0.40693071484565735, "learning_rate": 1.1836734693877552e-06, "loss": 0.0525, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1080, "total_memory_available (GB)": 94.62 }, { "epoch": 4.448979591836735, "grad_norm": 1.8890392780303955, "learning_rate": 1.1020408163265308e-06, "loss": 0.0512, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1090, "total_memory_available (GB)": 94.62 }, { "epoch": 4.489795918367347, "grad_norm": 0.3593562841415405, "learning_rate": 1.0204081632653063e-06, "loss": 0.0364, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1100, "total_memory_available (GB)": 94.62 }, { "epoch": 4.530612244897959, "grad_norm": 0.1553877741098404, "learning_rate": 9.387755102040817e-07, "loss": 0.0465, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1110, "total_memory_available (GB)": 94.62 }, { "epoch": 4.571428571428571, "grad_norm": 0.6775248050689697, "learning_rate": 8.571428571428572e-07, "loss": 0.0406, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1120, "total_memory_available (GB)": 94.62 }, { "epoch": 4.612244897959184, "grad_norm": 0.5735678672790527, "learning_rate": 7.755102040816327e-07, "loss": 0.0539, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1130, "total_memory_available (GB)": 94.62 }, { "epoch": 4.653061224489796, "grad_norm": 0.7891528606414795, "learning_rate": 6.938775510204082e-07, "loss": 0.0732, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1140, "total_memory_available (GB)": 94.62 }, { "epoch": 4.6938775510204085, "grad_norm": 0.7845800518989563, "learning_rate": 6.122448979591837e-07, "loss": 0.0515, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1150, "total_memory_available (GB)": 94.62 }, { "epoch": 4.73469387755102, "grad_norm": 1.0361818075180054, "learning_rate": 5.306122448979592e-07, "loss": 0.0608, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1160, "total_memory_available (GB)": 94.62 }, { "epoch": 4.775510204081632, "grad_norm": 0.42603600025177, "learning_rate": 4.489795918367347e-07, "loss": 0.0549, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1170, "total_memory_available (GB)": 94.62 }, { "epoch": 4.816326530612245, "grad_norm": 0.6727630496025085, "learning_rate": 3.6734693877551025e-07, "loss": 0.0441, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1180, "total_memory_available (GB)": 94.62 }, { "epoch": 4.857142857142857, "grad_norm": 0.8499141335487366, "learning_rate": 2.8571428571428575e-07, "loss": 0.0544, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1190, "total_memory_available (GB)": 94.62 }, { "epoch": 4.8979591836734695, "grad_norm": 0.7604736685752869, "learning_rate": 2.0408163265306121e-07, "loss": 0.0728, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1200, "total_memory_available (GB)": 94.62 }, { "epoch": 4.938775510204081, "grad_norm": 1.0298157930374146, "learning_rate": 1.2244897959183673e-07, "loss": 0.0654, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1210, "total_memory_available (GB)": 94.62 }, { "epoch": 4.979591836734694, "grad_norm": 1.1890877485275269, "learning_rate": 4.0816326530612253e-08, "loss": 0.0413, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1220, "total_memory_available (GB)": 94.62 }, { "epoch": 5.0, "max_memory_allocated (GB)": 63.75, "memory_allocated (GB)": 50.57, "step": 1225, "total_flos": 3.0598946525952e+16, "total_memory_available (GB)": 94.62, "train_loss": 0.06080986156755564, "train_runtime": 1168.6251, "train_samples_per_second": 48.37, "train_steps_per_second": 1.21 } ], "logging_steps": 10, "max_steps": 1225, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.0598946525952e+16, "train_batch_size": 40, "trial_name": null, "trial_params": null }