|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 500, |
|
"global_step": 1225, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04081632653061224, |
|
"grad_norm": 16.731555938720703, |
|
"learning_rate": 9.918367346938776e-06, |
|
"loss": 0.2616, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 10, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.08163265306122448, |
|
"grad_norm": 7.052234649658203, |
|
"learning_rate": 9.836734693877552e-06, |
|
"loss": 0.1555, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 20, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.12244897959183673, |
|
"grad_norm": 6.5298075675964355, |
|
"learning_rate": 9.755102040816327e-06, |
|
"loss": 0.1251, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 30, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 4.405805587768555, |
|
"learning_rate": 9.673469387755103e-06, |
|
"loss": 0.1102, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 40, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 4.870044708251953, |
|
"learning_rate": 9.591836734693878e-06, |
|
"loss": 0.1232, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 50, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.24489795918367346, |
|
"grad_norm": 1.6433866024017334, |
|
"learning_rate": 9.510204081632653e-06, |
|
"loss": 0.0797, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 60, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 4.2432074546813965, |
|
"learning_rate": 9.42857142857143e-06, |
|
"loss": 0.1031, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 70, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 2.0352487564086914, |
|
"learning_rate": 9.346938775510204e-06, |
|
"loss": 0.1115, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 80, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.3673469387755102, |
|
"grad_norm": 4.201560020446777, |
|
"learning_rate": 9.26530612244898e-06, |
|
"loss": 0.0817, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 90, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 3.618368625640869, |
|
"learning_rate": 9.183673469387756e-06, |
|
"loss": 0.0768, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.4489795918367347, |
|
"grad_norm": 4.793916702270508, |
|
"learning_rate": 9.102040816326532e-06, |
|
"loss": 0.071, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.4897959183673469, |
|
"grad_norm": 2.3223495483398438, |
|
"learning_rate": 9.020408163265307e-06, |
|
"loss": 0.0707, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.5306122448979592, |
|
"grad_norm": 3.5389153957366943, |
|
"learning_rate": 8.938775510204082e-06, |
|
"loss": 0.0599, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 1.586653232574463, |
|
"learning_rate": 8.857142857142858e-06, |
|
"loss": 0.0491, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 1.5236841440200806, |
|
"learning_rate": 8.775510204081633e-06, |
|
"loss": 0.0632, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6530612244897959, |
|
"grad_norm": 2.752020835876465, |
|
"learning_rate": 8.69387755102041e-06, |
|
"loss": 0.0722, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.6938775510204082, |
|
"grad_norm": 7.606927394866943, |
|
"learning_rate": 8.612244897959184e-06, |
|
"loss": 0.0756, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.7346938775510204, |
|
"grad_norm": 1.5622702836990356, |
|
"learning_rate": 8.530612244897961e-06, |
|
"loss": 0.0617, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.7755102040816326, |
|
"grad_norm": 0.9614956378936768, |
|
"learning_rate": 8.448979591836736e-06, |
|
"loss": 0.0572, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 0.7814755439758301, |
|
"learning_rate": 8.36734693877551e-06, |
|
"loss": 0.0636, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 1.352851390838623, |
|
"learning_rate": 8.285714285714287e-06, |
|
"loss": 0.0648, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.8979591836734694, |
|
"grad_norm": 1.6814969778060913, |
|
"learning_rate": 8.204081632653062e-06, |
|
"loss": 0.0604, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.9387755102040817, |
|
"grad_norm": 0.859993040561676, |
|
"learning_rate": 8.122448979591837e-06, |
|
"loss": 0.0549, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 230, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"grad_norm": 0.6439819931983948, |
|
"learning_rate": 8.040816326530613e-06, |
|
"loss": 0.0493, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 240, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.0204081632653061, |
|
"grad_norm": 0.8465150594711304, |
|
"learning_rate": 7.959183673469388e-06, |
|
"loss": 0.0624, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 250, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.0612244897959184, |
|
"grad_norm": 1.0257333517074585, |
|
"learning_rate": 7.877551020408164e-06, |
|
"loss": 0.056, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 260, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.1020408163265305, |
|
"grad_norm": 2.619938850402832, |
|
"learning_rate": 7.79591836734694e-06, |
|
"loss": 0.0648, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 270, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 0.4946042001247406, |
|
"learning_rate": 7.714285714285716e-06, |
|
"loss": 0.0586, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 280, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.183673469387755, |
|
"grad_norm": 1.0154733657836914, |
|
"learning_rate": 7.63265306122449e-06, |
|
"loss": 0.0505, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 290, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.2244897959183674, |
|
"grad_norm": 1.0347952842712402, |
|
"learning_rate": 7.551020408163265e-06, |
|
"loss": 0.0646, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 300, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.2653061224489797, |
|
"grad_norm": 0.7844366431236267, |
|
"learning_rate": 7.469387755102041e-06, |
|
"loss": 0.0676, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 310, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.306122448979592, |
|
"grad_norm": 1.1971337795257568, |
|
"learning_rate": 7.387755102040817e-06, |
|
"loss": 0.0499, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 320, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.346938775510204, |
|
"grad_norm": 0.6674404740333557, |
|
"learning_rate": 7.306122448979592e-06, |
|
"loss": 0.0602, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 330, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.3877551020408163, |
|
"grad_norm": 1.511208415031433, |
|
"learning_rate": 7.224489795918368e-06, |
|
"loss": 0.0547, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 340, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 0.5328841209411621, |
|
"learning_rate": 7.1428571428571436e-06, |
|
"loss": 0.0486, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 350, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.469387755102041, |
|
"grad_norm": 1.464439034461975, |
|
"learning_rate": 7.061224489795919e-06, |
|
"loss": 0.0464, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 360, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.510204081632653, |
|
"grad_norm": 0.834863543510437, |
|
"learning_rate": 6.979591836734695e-06, |
|
"loss": 0.0591, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 370, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.5510204081632653, |
|
"grad_norm": 0.5399609208106995, |
|
"learning_rate": 6.8979591836734705e-06, |
|
"loss": 0.0464, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 380, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.5918367346938775, |
|
"grad_norm": 0.8577661514282227, |
|
"learning_rate": 6.816326530612245e-06, |
|
"loss": 0.0654, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 390, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.6326530612244898, |
|
"grad_norm": 0.5057955384254456, |
|
"learning_rate": 6.734693877551021e-06, |
|
"loss": 0.0609, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 400, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.6734693877551021, |
|
"grad_norm": 0.9135333895683289, |
|
"learning_rate": 6.653061224489797e-06, |
|
"loss": 0.0607, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 410, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 2.9697179794311523, |
|
"learning_rate": 6.571428571428572e-06, |
|
"loss": 0.054, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 420, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7551020408163265, |
|
"grad_norm": 0.3473312556743622, |
|
"learning_rate": 6.489795918367348e-06, |
|
"loss": 0.0685, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 430, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.7959183673469388, |
|
"grad_norm": 1.4528335332870483, |
|
"learning_rate": 6.408163265306124e-06, |
|
"loss": 0.0611, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 440, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.836734693877551, |
|
"grad_norm": 0.48578280210494995, |
|
"learning_rate": 6.326530612244899e-06, |
|
"loss": 0.0438, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 450, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.8775510204081631, |
|
"grad_norm": 0.3472760021686554, |
|
"learning_rate": 6.244897959183675e-06, |
|
"loss": 0.0544, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 460, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.9183673469387754, |
|
"grad_norm": 1.0984327793121338, |
|
"learning_rate": 6.163265306122449e-06, |
|
"loss": 0.0438, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 470, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 1.9591836734693877, |
|
"grad_norm": 0.20147933065891266, |
|
"learning_rate": 6.0816326530612245e-06, |
|
"loss": 0.0518, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 480, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.1583309173583984, |
|
"learning_rate": 6e-06, |
|
"loss": 0.0637, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 490, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.0408163265306123, |
|
"grad_norm": 0.6601622104644775, |
|
"learning_rate": 5.918367346938776e-06, |
|
"loss": 0.0596, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 500, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.0816326530612246, |
|
"grad_norm": 0.5227305293083191, |
|
"learning_rate": 5.8367346938775515e-06, |
|
"loss": 0.0493, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 510, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.122448979591837, |
|
"grad_norm": 0.8996191620826721, |
|
"learning_rate": 5.755102040816327e-06, |
|
"loss": 0.0461, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 520, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.163265306122449, |
|
"grad_norm": 1.0684189796447754, |
|
"learning_rate": 5.673469387755103e-06, |
|
"loss": 0.0629, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 530, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.204081632653061, |
|
"grad_norm": 0.5558530688285828, |
|
"learning_rate": 5.591836734693878e-06, |
|
"loss": 0.0581, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 540, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.2448979591836733, |
|
"grad_norm": 1.1996757984161377, |
|
"learning_rate": 5.510204081632653e-06, |
|
"loss": 0.0626, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 550, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 1.2928632497787476, |
|
"learning_rate": 5.428571428571429e-06, |
|
"loss": 0.0575, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 560, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.326530612244898, |
|
"grad_norm": 0.7934871912002563, |
|
"learning_rate": 5.3469387755102045e-06, |
|
"loss": 0.0577, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 570, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.36734693877551, |
|
"grad_norm": 3.946485757827759, |
|
"learning_rate": 5.26530612244898e-06, |
|
"loss": 0.0663, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 580, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.4081632653061225, |
|
"grad_norm": 0.43567588925361633, |
|
"learning_rate": 5.183673469387756e-06, |
|
"loss": 0.0539, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 590, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.4489795918367347, |
|
"grad_norm": 0.5725533962249756, |
|
"learning_rate": 5.1020408163265315e-06, |
|
"loss": 0.0438, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 600, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.489795918367347, |
|
"grad_norm": 0.44328320026397705, |
|
"learning_rate": 5.020408163265307e-06, |
|
"loss": 0.041, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 610, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.5306122448979593, |
|
"grad_norm": 1.338100790977478, |
|
"learning_rate": 4.938775510204082e-06, |
|
"loss": 0.0424, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 620, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.571428571428571, |
|
"grad_norm": 0.92643803358078, |
|
"learning_rate": 4.857142857142858e-06, |
|
"loss": 0.0811, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 630, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.612244897959184, |
|
"grad_norm": 1.1147398948669434, |
|
"learning_rate": 4.775510204081633e-06, |
|
"loss": 0.0492, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 640, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.6530612244897958, |
|
"grad_norm": 0.6104307174682617, |
|
"learning_rate": 4.693877551020409e-06, |
|
"loss": 0.0468, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 650, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.693877551020408, |
|
"grad_norm": 0.9826134443283081, |
|
"learning_rate": 4.612244897959184e-06, |
|
"loss": 0.0471, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 660, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.7346938775510203, |
|
"grad_norm": 0.7680672407150269, |
|
"learning_rate": 4.530612244897959e-06, |
|
"loss": 0.0858, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 670, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.7755102040816326, |
|
"grad_norm": 0.9682340025901794, |
|
"learning_rate": 4.448979591836735e-06, |
|
"loss": 0.0484, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 680, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.816326530612245, |
|
"grad_norm": 0.37712323665618896, |
|
"learning_rate": 4.367346938775511e-06, |
|
"loss": 0.0443, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 690, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 0.34970754384994507, |
|
"learning_rate": 4.2857142857142855e-06, |
|
"loss": 0.0434, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 700, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.8979591836734695, |
|
"grad_norm": 0.9949877262115479, |
|
"learning_rate": 4.204081632653061e-06, |
|
"loss": 0.0553, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 710, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.938775510204082, |
|
"grad_norm": 1.4436949491500854, |
|
"learning_rate": 4.122448979591837e-06, |
|
"loss": 0.0583, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 720, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 2.979591836734694, |
|
"grad_norm": 0.1619979739189148, |
|
"learning_rate": 4.040816326530612e-06, |
|
"loss": 0.0336, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 730, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.020408163265306, |
|
"grad_norm": 1.2799049615859985, |
|
"learning_rate": 3.959183673469388e-06, |
|
"loss": 0.0536, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 740, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.061224489795918, |
|
"grad_norm": 0.5613189935684204, |
|
"learning_rate": 3.877551020408164e-06, |
|
"loss": 0.062, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 750, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.1020408163265305, |
|
"grad_norm": 0.827383279800415, |
|
"learning_rate": 3.795918367346939e-06, |
|
"loss": 0.0527, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 760, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.142857142857143, |
|
"grad_norm": 0.6983201503753662, |
|
"learning_rate": 3.7142857142857146e-06, |
|
"loss": 0.0691, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 770, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.183673469387755, |
|
"grad_norm": 1.0466923713684082, |
|
"learning_rate": 3.6326530612244903e-06, |
|
"loss": 0.0644, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 780, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.2244897959183674, |
|
"grad_norm": 0.3068871796131134, |
|
"learning_rate": 3.5510204081632655e-06, |
|
"loss": 0.0524, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 790, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.2653061224489797, |
|
"grad_norm": 0.40160393714904785, |
|
"learning_rate": 3.469387755102041e-06, |
|
"loss": 0.0434, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 800, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.306122448979592, |
|
"grad_norm": 0.880214512348175, |
|
"learning_rate": 3.3877551020408168e-06, |
|
"loss": 0.056, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 810, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.3469387755102042, |
|
"grad_norm": 0.9539953470230103, |
|
"learning_rate": 3.3061224489795924e-06, |
|
"loss": 0.0464, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 820, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.387755102040816, |
|
"grad_norm": 0.24522298574447632, |
|
"learning_rate": 3.2244897959183672e-06, |
|
"loss": 0.0485, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 830, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.4285714285714284, |
|
"grad_norm": 0.4946345388889313, |
|
"learning_rate": 3.142857142857143e-06, |
|
"loss": 0.0527, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 840, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.4693877551020407, |
|
"grad_norm": 0.4724675416946411, |
|
"learning_rate": 3.0612244897959185e-06, |
|
"loss": 0.0813, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 850, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.510204081632653, |
|
"grad_norm": 0.9907402396202087, |
|
"learning_rate": 2.979591836734694e-06, |
|
"loss": 0.0447, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 860, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.5510204081632653, |
|
"grad_norm": 0.19696560502052307, |
|
"learning_rate": 2.8979591836734694e-06, |
|
"loss": 0.0635, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 870, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.5918367346938775, |
|
"grad_norm": 0.7972800135612488, |
|
"learning_rate": 2.816326530612245e-06, |
|
"loss": 0.0438, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 880, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.63265306122449, |
|
"grad_norm": 0.21193134784698486, |
|
"learning_rate": 2.7346938775510207e-06, |
|
"loss": 0.029, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 890, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.673469387755102, |
|
"grad_norm": 0.6128103137016296, |
|
"learning_rate": 2.6530612244897964e-06, |
|
"loss": 0.0514, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 900, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.7142857142857144, |
|
"grad_norm": 0.8112168312072754, |
|
"learning_rate": 2.571428571428571e-06, |
|
"loss": 0.061, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 910, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.7551020408163263, |
|
"grad_norm": 0.18730562925338745, |
|
"learning_rate": 2.489795918367347e-06, |
|
"loss": 0.0546, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 920, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.795918367346939, |
|
"grad_norm": 0.3866801857948303, |
|
"learning_rate": 2.4081632653061225e-06, |
|
"loss": 0.0501, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 930, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.836734693877551, |
|
"grad_norm": 0.8816384077072144, |
|
"learning_rate": 2.326530612244898e-06, |
|
"loss": 0.0489, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 940, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.877551020408163, |
|
"grad_norm": 0.5572797656059265, |
|
"learning_rate": 2.244897959183674e-06, |
|
"loss": 0.0599, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 950, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.9183673469387754, |
|
"grad_norm": 0.38238489627838135, |
|
"learning_rate": 2.1632653061224495e-06, |
|
"loss": 0.0497, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 960, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 3.9591836734693877, |
|
"grad_norm": 0.6144959926605225, |
|
"learning_rate": 2.0816326530612247e-06, |
|
"loss": 0.0741, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 970, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.6087101697921753, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.0703, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 980, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.040816326530612, |
|
"grad_norm": 0.5187469720840454, |
|
"learning_rate": 1.9183673469387756e-06, |
|
"loss": 0.0482, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 990, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.081632653061225, |
|
"grad_norm": 1.248850703239441, |
|
"learning_rate": 1.8367346938775512e-06, |
|
"loss": 0.0631, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1000, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.122448979591836, |
|
"grad_norm": 0.5806276798248291, |
|
"learning_rate": 1.7551020408163267e-06, |
|
"loss": 0.0629, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1010, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.163265306122449, |
|
"grad_norm": 0.3565673828125, |
|
"learning_rate": 1.6734693877551023e-06, |
|
"loss": 0.0407, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1020, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.204081632653061, |
|
"grad_norm": 0.6948438882827759, |
|
"learning_rate": 1.5918367346938775e-06, |
|
"loss": 0.053, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1030, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.244897959183674, |
|
"grad_norm": 0.5245764851570129, |
|
"learning_rate": 1.5102040816326532e-06, |
|
"loss": 0.0399, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1040, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.285714285714286, |
|
"grad_norm": 0.7932385802268982, |
|
"learning_rate": 1.4285714285714286e-06, |
|
"loss": 0.0502, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1050, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.326530612244898, |
|
"grad_norm": 0.30140048265457153, |
|
"learning_rate": 1.3469387755102043e-06, |
|
"loss": 0.046, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1060, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.36734693877551, |
|
"grad_norm": 0.570467472076416, |
|
"learning_rate": 1.2653061224489795e-06, |
|
"loss": 0.0487, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1070, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.408163265306122, |
|
"grad_norm": 0.43690067529678345, |
|
"learning_rate": 1.1836734693877552e-06, |
|
"loss": 0.0521, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1080, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.448979591836735, |
|
"grad_norm": 0.5298590660095215, |
|
"learning_rate": 1.1020408163265308e-06, |
|
"loss": 0.0506, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1090, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.489795918367347, |
|
"grad_norm": 0.2310735136270523, |
|
"learning_rate": 1.0204081632653063e-06, |
|
"loss": 0.036, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1100, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.530612244897959, |
|
"grad_norm": 0.13128583133220673, |
|
"learning_rate": 9.387755102040817e-07, |
|
"loss": 0.0463, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1110, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.571428571428571, |
|
"grad_norm": 0.7682464122772217, |
|
"learning_rate": 8.571428571428572e-07, |
|
"loss": 0.0403, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1120, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.612244897959184, |
|
"grad_norm": 0.6608971953392029, |
|
"learning_rate": 7.755102040816327e-07, |
|
"loss": 0.0543, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1130, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.653061224489796, |
|
"grad_norm": 0.8803687691688538, |
|
"learning_rate": 6.938775510204082e-07, |
|
"loss": 0.0728, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1140, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.6938775510204085, |
|
"grad_norm": 4.121662139892578, |
|
"learning_rate": 6.122448979591837e-07, |
|
"loss": 0.0514, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1150, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.73469387755102, |
|
"grad_norm": 0.7500938773155212, |
|
"learning_rate": 5.306122448979592e-07, |
|
"loss": 0.0612, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1160, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.775510204081632, |
|
"grad_norm": 0.6001973748207092, |
|
"learning_rate": 4.489795918367347e-07, |
|
"loss": 0.0549, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1170, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.816326530612245, |
|
"grad_norm": 0.7522645592689514, |
|
"learning_rate": 3.6734693877551025e-07, |
|
"loss": 0.0445, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1180, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.857142857142857, |
|
"grad_norm": 0.6640497446060181, |
|
"learning_rate": 2.8571428571428575e-07, |
|
"loss": 0.0542, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1190, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.8979591836734695, |
|
"grad_norm": 0.8031227588653564, |
|
"learning_rate": 2.0408163265306121e-07, |
|
"loss": 0.0728, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1200, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.938775510204081, |
|
"grad_norm": 0.39187708497047424, |
|
"learning_rate": 1.2244897959183673e-07, |
|
"loss": 0.065, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1210, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 4.979591836734694, |
|
"grad_norm": 3.809382915496826, |
|
"learning_rate": 4.0816326530612253e-08, |
|
"loss": 0.0417, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1220, |
|
"total_memory_available (GB)": 94.62 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"max_memory_allocated (GB)": 57.18, |
|
"memory_allocated (GB)": 50.57, |
|
"step": 1225, |
|
"total_flos": 3.0598946525952e+16, |
|
"total_memory_available (GB)": 94.62, |
|
"train_loss": 0.06098026679486644, |
|
"train_runtime": 1192.2443, |
|
"train_samples_per_second": 46.607, |
|
"train_steps_per_second": 1.166 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1225, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.0598946525952e+16, |
|
"train_batch_size": 40, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|