bridgetower / trainer_state.json
edlee123's picture
End of training
3a0cc09 verified
raw
history blame
37.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 5.0,
"eval_steps": 500,
"global_step": 1225,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04081632653061224,
"grad_norm": 16.731555938720703,
"learning_rate": 9.918367346938776e-06,
"loss": 0.2616,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 10,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.08163265306122448,
"grad_norm": 7.052234649658203,
"learning_rate": 9.836734693877552e-06,
"loss": 0.1555,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 20,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.12244897959183673,
"grad_norm": 6.5298075675964355,
"learning_rate": 9.755102040816327e-06,
"loss": 0.1251,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 30,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.16326530612244897,
"grad_norm": 4.405805587768555,
"learning_rate": 9.673469387755103e-06,
"loss": 0.1102,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 40,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.20408163265306123,
"grad_norm": 4.870044708251953,
"learning_rate": 9.591836734693878e-06,
"loss": 0.1232,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 50,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.24489795918367346,
"grad_norm": 1.6433866024017334,
"learning_rate": 9.510204081632653e-06,
"loss": 0.0797,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 60,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.2857142857142857,
"grad_norm": 4.2432074546813965,
"learning_rate": 9.42857142857143e-06,
"loss": 0.1031,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 70,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.32653061224489793,
"grad_norm": 2.0352487564086914,
"learning_rate": 9.346938775510204e-06,
"loss": 0.1115,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 80,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.3673469387755102,
"grad_norm": 4.201560020446777,
"learning_rate": 9.26530612244898e-06,
"loss": 0.0817,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 90,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.40816326530612246,
"grad_norm": 3.618368625640869,
"learning_rate": 9.183673469387756e-06,
"loss": 0.0768,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 100,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.4489795918367347,
"grad_norm": 4.793916702270508,
"learning_rate": 9.102040816326532e-06,
"loss": 0.071,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 110,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.4897959183673469,
"grad_norm": 2.3223495483398438,
"learning_rate": 9.020408163265307e-06,
"loss": 0.0707,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 120,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.5306122448979592,
"grad_norm": 3.5389153957366943,
"learning_rate": 8.938775510204082e-06,
"loss": 0.0599,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 130,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.5714285714285714,
"grad_norm": 1.586653232574463,
"learning_rate": 8.857142857142858e-06,
"loss": 0.0491,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 140,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.6122448979591837,
"grad_norm": 1.5236841440200806,
"learning_rate": 8.775510204081633e-06,
"loss": 0.0632,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 150,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.6530612244897959,
"grad_norm": 2.752020835876465,
"learning_rate": 8.69387755102041e-06,
"loss": 0.0722,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 160,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.6938775510204082,
"grad_norm": 7.606927394866943,
"learning_rate": 8.612244897959184e-06,
"loss": 0.0756,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 170,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.7346938775510204,
"grad_norm": 1.5622702836990356,
"learning_rate": 8.530612244897961e-06,
"loss": 0.0617,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 180,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.7755102040816326,
"grad_norm": 0.9614956378936768,
"learning_rate": 8.448979591836736e-06,
"loss": 0.0572,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 190,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.8163265306122449,
"grad_norm": 0.7814755439758301,
"learning_rate": 8.36734693877551e-06,
"loss": 0.0636,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 200,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.8571428571428571,
"grad_norm": 1.352851390838623,
"learning_rate": 8.285714285714287e-06,
"loss": 0.0648,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 210,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.8979591836734694,
"grad_norm": 1.6814969778060913,
"learning_rate": 8.204081632653062e-06,
"loss": 0.0604,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 220,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.9387755102040817,
"grad_norm": 0.859993040561676,
"learning_rate": 8.122448979591837e-06,
"loss": 0.0549,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 230,
"total_memory_available (GB)": 94.62
},
{
"epoch": 0.9795918367346939,
"grad_norm": 0.6439819931983948,
"learning_rate": 8.040816326530613e-06,
"loss": 0.0493,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 240,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.0204081632653061,
"grad_norm": 0.8465150594711304,
"learning_rate": 7.959183673469388e-06,
"loss": 0.0624,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 250,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.0612244897959184,
"grad_norm": 1.0257333517074585,
"learning_rate": 7.877551020408164e-06,
"loss": 0.056,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 260,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.1020408163265305,
"grad_norm": 2.619938850402832,
"learning_rate": 7.79591836734694e-06,
"loss": 0.0648,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 270,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.1428571428571428,
"grad_norm": 0.4946042001247406,
"learning_rate": 7.714285714285716e-06,
"loss": 0.0586,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 280,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.183673469387755,
"grad_norm": 1.0154733657836914,
"learning_rate": 7.63265306122449e-06,
"loss": 0.0505,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 290,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.2244897959183674,
"grad_norm": 1.0347952842712402,
"learning_rate": 7.551020408163265e-06,
"loss": 0.0646,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 300,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.2653061224489797,
"grad_norm": 0.7844366431236267,
"learning_rate": 7.469387755102041e-06,
"loss": 0.0676,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 310,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.306122448979592,
"grad_norm": 1.1971337795257568,
"learning_rate": 7.387755102040817e-06,
"loss": 0.0499,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 320,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.346938775510204,
"grad_norm": 0.6674404740333557,
"learning_rate": 7.306122448979592e-06,
"loss": 0.0602,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 330,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.3877551020408163,
"grad_norm": 1.511208415031433,
"learning_rate": 7.224489795918368e-06,
"loss": 0.0547,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 340,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.5328841209411621,
"learning_rate": 7.1428571428571436e-06,
"loss": 0.0486,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 350,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.469387755102041,
"grad_norm": 1.464439034461975,
"learning_rate": 7.061224489795919e-06,
"loss": 0.0464,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 360,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.510204081632653,
"grad_norm": 0.834863543510437,
"learning_rate": 6.979591836734695e-06,
"loss": 0.0591,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 370,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.5510204081632653,
"grad_norm": 0.5399609208106995,
"learning_rate": 6.8979591836734705e-06,
"loss": 0.0464,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 380,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.5918367346938775,
"grad_norm": 0.8577661514282227,
"learning_rate": 6.816326530612245e-06,
"loss": 0.0654,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 390,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.6326530612244898,
"grad_norm": 0.5057955384254456,
"learning_rate": 6.734693877551021e-06,
"loss": 0.0609,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 400,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.6734693877551021,
"grad_norm": 0.9135333895683289,
"learning_rate": 6.653061224489797e-06,
"loss": 0.0607,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 410,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.7142857142857144,
"grad_norm": 2.9697179794311523,
"learning_rate": 6.571428571428572e-06,
"loss": 0.054,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 420,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.7551020408163265,
"grad_norm": 0.3473312556743622,
"learning_rate": 6.489795918367348e-06,
"loss": 0.0685,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 430,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.7959183673469388,
"grad_norm": 1.4528335332870483,
"learning_rate": 6.408163265306124e-06,
"loss": 0.0611,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 440,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.836734693877551,
"grad_norm": 0.48578280210494995,
"learning_rate": 6.326530612244899e-06,
"loss": 0.0438,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 450,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.8775510204081631,
"grad_norm": 0.3472760021686554,
"learning_rate": 6.244897959183675e-06,
"loss": 0.0544,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 460,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.9183673469387754,
"grad_norm": 1.0984327793121338,
"learning_rate": 6.163265306122449e-06,
"loss": 0.0438,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 470,
"total_memory_available (GB)": 94.62
},
{
"epoch": 1.9591836734693877,
"grad_norm": 0.20147933065891266,
"learning_rate": 6.0816326530612245e-06,
"loss": 0.0518,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 480,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.0,
"grad_norm": 1.1583309173583984,
"learning_rate": 6e-06,
"loss": 0.0637,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 490,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.0408163265306123,
"grad_norm": 0.6601622104644775,
"learning_rate": 5.918367346938776e-06,
"loss": 0.0596,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 500,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.0816326530612246,
"grad_norm": 0.5227305293083191,
"learning_rate": 5.8367346938775515e-06,
"loss": 0.0493,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 510,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.122448979591837,
"grad_norm": 0.8996191620826721,
"learning_rate": 5.755102040816327e-06,
"loss": 0.0461,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 520,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.163265306122449,
"grad_norm": 1.0684189796447754,
"learning_rate": 5.673469387755103e-06,
"loss": 0.0629,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 530,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.204081632653061,
"grad_norm": 0.5558530688285828,
"learning_rate": 5.591836734693878e-06,
"loss": 0.0581,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 540,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.2448979591836733,
"grad_norm": 1.1996757984161377,
"learning_rate": 5.510204081632653e-06,
"loss": 0.0626,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 550,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.2857142857142856,
"grad_norm": 1.2928632497787476,
"learning_rate": 5.428571428571429e-06,
"loss": 0.0575,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 560,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.326530612244898,
"grad_norm": 0.7934871912002563,
"learning_rate": 5.3469387755102045e-06,
"loss": 0.0577,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 570,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.36734693877551,
"grad_norm": 3.946485757827759,
"learning_rate": 5.26530612244898e-06,
"loss": 0.0663,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 580,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.4081632653061225,
"grad_norm": 0.43567588925361633,
"learning_rate": 5.183673469387756e-06,
"loss": 0.0539,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 590,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.4489795918367347,
"grad_norm": 0.5725533962249756,
"learning_rate": 5.1020408163265315e-06,
"loss": 0.0438,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 600,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.489795918367347,
"grad_norm": 0.44328320026397705,
"learning_rate": 5.020408163265307e-06,
"loss": 0.041,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 610,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.5306122448979593,
"grad_norm": 1.338100790977478,
"learning_rate": 4.938775510204082e-06,
"loss": 0.0424,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 620,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.571428571428571,
"grad_norm": 0.92643803358078,
"learning_rate": 4.857142857142858e-06,
"loss": 0.0811,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 630,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.612244897959184,
"grad_norm": 1.1147398948669434,
"learning_rate": 4.775510204081633e-06,
"loss": 0.0492,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 640,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.6530612244897958,
"grad_norm": 0.6104307174682617,
"learning_rate": 4.693877551020409e-06,
"loss": 0.0468,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 650,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.693877551020408,
"grad_norm": 0.9826134443283081,
"learning_rate": 4.612244897959184e-06,
"loss": 0.0471,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 660,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.7346938775510203,
"grad_norm": 0.7680672407150269,
"learning_rate": 4.530612244897959e-06,
"loss": 0.0858,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 670,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.7755102040816326,
"grad_norm": 0.9682340025901794,
"learning_rate": 4.448979591836735e-06,
"loss": 0.0484,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 680,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.816326530612245,
"grad_norm": 0.37712323665618896,
"learning_rate": 4.367346938775511e-06,
"loss": 0.0443,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 690,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.34970754384994507,
"learning_rate": 4.2857142857142855e-06,
"loss": 0.0434,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 700,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.8979591836734695,
"grad_norm": 0.9949877262115479,
"learning_rate": 4.204081632653061e-06,
"loss": 0.0553,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 710,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.938775510204082,
"grad_norm": 1.4436949491500854,
"learning_rate": 4.122448979591837e-06,
"loss": 0.0583,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 720,
"total_memory_available (GB)": 94.62
},
{
"epoch": 2.979591836734694,
"grad_norm": 0.1619979739189148,
"learning_rate": 4.040816326530612e-06,
"loss": 0.0336,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 730,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.020408163265306,
"grad_norm": 1.2799049615859985,
"learning_rate": 3.959183673469388e-06,
"loss": 0.0536,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 740,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.061224489795918,
"grad_norm": 0.5613189935684204,
"learning_rate": 3.877551020408164e-06,
"loss": 0.062,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 750,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.1020408163265305,
"grad_norm": 0.827383279800415,
"learning_rate": 3.795918367346939e-06,
"loss": 0.0527,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 760,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.142857142857143,
"grad_norm": 0.6983201503753662,
"learning_rate": 3.7142857142857146e-06,
"loss": 0.0691,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 770,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.183673469387755,
"grad_norm": 1.0466923713684082,
"learning_rate": 3.6326530612244903e-06,
"loss": 0.0644,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 780,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.2244897959183674,
"grad_norm": 0.3068871796131134,
"learning_rate": 3.5510204081632655e-06,
"loss": 0.0524,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 790,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.2653061224489797,
"grad_norm": 0.40160393714904785,
"learning_rate": 3.469387755102041e-06,
"loss": 0.0434,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 800,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.306122448979592,
"grad_norm": 0.880214512348175,
"learning_rate": 3.3877551020408168e-06,
"loss": 0.056,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 810,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.3469387755102042,
"grad_norm": 0.9539953470230103,
"learning_rate": 3.3061224489795924e-06,
"loss": 0.0464,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 820,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.387755102040816,
"grad_norm": 0.24522298574447632,
"learning_rate": 3.2244897959183672e-06,
"loss": 0.0485,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 830,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.4285714285714284,
"grad_norm": 0.4946345388889313,
"learning_rate": 3.142857142857143e-06,
"loss": 0.0527,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 840,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.4693877551020407,
"grad_norm": 0.4724675416946411,
"learning_rate": 3.0612244897959185e-06,
"loss": 0.0813,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 850,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.510204081632653,
"grad_norm": 0.9907402396202087,
"learning_rate": 2.979591836734694e-06,
"loss": 0.0447,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 860,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.5510204081632653,
"grad_norm": 0.19696560502052307,
"learning_rate": 2.8979591836734694e-06,
"loss": 0.0635,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 870,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.5918367346938775,
"grad_norm": 0.7972800135612488,
"learning_rate": 2.816326530612245e-06,
"loss": 0.0438,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 880,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.63265306122449,
"grad_norm": 0.21193134784698486,
"learning_rate": 2.7346938775510207e-06,
"loss": 0.029,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 890,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.673469387755102,
"grad_norm": 0.6128103137016296,
"learning_rate": 2.6530612244897964e-06,
"loss": 0.0514,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 900,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.7142857142857144,
"grad_norm": 0.8112168312072754,
"learning_rate": 2.571428571428571e-06,
"loss": 0.061,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 910,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.7551020408163263,
"grad_norm": 0.18730562925338745,
"learning_rate": 2.489795918367347e-06,
"loss": 0.0546,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 920,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.795918367346939,
"grad_norm": 0.3866801857948303,
"learning_rate": 2.4081632653061225e-06,
"loss": 0.0501,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 930,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.836734693877551,
"grad_norm": 0.8816384077072144,
"learning_rate": 2.326530612244898e-06,
"loss": 0.0489,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 940,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.877551020408163,
"grad_norm": 0.5572797656059265,
"learning_rate": 2.244897959183674e-06,
"loss": 0.0599,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 950,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.9183673469387754,
"grad_norm": 0.38238489627838135,
"learning_rate": 2.1632653061224495e-06,
"loss": 0.0497,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 960,
"total_memory_available (GB)": 94.62
},
{
"epoch": 3.9591836734693877,
"grad_norm": 0.6144959926605225,
"learning_rate": 2.0816326530612247e-06,
"loss": 0.0741,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 970,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.0,
"grad_norm": 0.6087101697921753,
"learning_rate": 2.0000000000000003e-06,
"loss": 0.0703,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 980,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.040816326530612,
"grad_norm": 0.5187469720840454,
"learning_rate": 1.9183673469387756e-06,
"loss": 0.0482,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 990,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.081632653061225,
"grad_norm": 1.248850703239441,
"learning_rate": 1.8367346938775512e-06,
"loss": 0.0631,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1000,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.122448979591836,
"grad_norm": 0.5806276798248291,
"learning_rate": 1.7551020408163267e-06,
"loss": 0.0629,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1010,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.163265306122449,
"grad_norm": 0.3565673828125,
"learning_rate": 1.6734693877551023e-06,
"loss": 0.0407,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1020,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.204081632653061,
"grad_norm": 0.6948438882827759,
"learning_rate": 1.5918367346938775e-06,
"loss": 0.053,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1030,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.244897959183674,
"grad_norm": 0.5245764851570129,
"learning_rate": 1.5102040816326532e-06,
"loss": 0.0399,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1040,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.285714285714286,
"grad_norm": 0.7932385802268982,
"learning_rate": 1.4285714285714286e-06,
"loss": 0.0502,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1050,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.326530612244898,
"grad_norm": 0.30140048265457153,
"learning_rate": 1.3469387755102043e-06,
"loss": 0.046,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1060,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.36734693877551,
"grad_norm": 0.570467472076416,
"learning_rate": 1.2653061224489795e-06,
"loss": 0.0487,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1070,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.408163265306122,
"grad_norm": 0.43690067529678345,
"learning_rate": 1.1836734693877552e-06,
"loss": 0.0521,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1080,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.448979591836735,
"grad_norm": 0.5298590660095215,
"learning_rate": 1.1020408163265308e-06,
"loss": 0.0506,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1090,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.489795918367347,
"grad_norm": 0.2310735136270523,
"learning_rate": 1.0204081632653063e-06,
"loss": 0.036,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1100,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.530612244897959,
"grad_norm": 0.13128583133220673,
"learning_rate": 9.387755102040817e-07,
"loss": 0.0463,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1110,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.571428571428571,
"grad_norm": 0.7682464122772217,
"learning_rate": 8.571428571428572e-07,
"loss": 0.0403,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1120,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.612244897959184,
"grad_norm": 0.6608971953392029,
"learning_rate": 7.755102040816327e-07,
"loss": 0.0543,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1130,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.653061224489796,
"grad_norm": 0.8803687691688538,
"learning_rate": 6.938775510204082e-07,
"loss": 0.0728,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1140,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.6938775510204085,
"grad_norm": 4.121662139892578,
"learning_rate": 6.122448979591837e-07,
"loss": 0.0514,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1150,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.73469387755102,
"grad_norm": 0.7500938773155212,
"learning_rate": 5.306122448979592e-07,
"loss": 0.0612,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1160,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.775510204081632,
"grad_norm": 0.6001973748207092,
"learning_rate": 4.489795918367347e-07,
"loss": 0.0549,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1170,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.816326530612245,
"grad_norm": 0.7522645592689514,
"learning_rate": 3.6734693877551025e-07,
"loss": 0.0445,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1180,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.857142857142857,
"grad_norm": 0.6640497446060181,
"learning_rate": 2.8571428571428575e-07,
"loss": 0.0542,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1190,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.8979591836734695,
"grad_norm": 0.8031227588653564,
"learning_rate": 2.0408163265306121e-07,
"loss": 0.0728,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1200,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.938775510204081,
"grad_norm": 0.39187708497047424,
"learning_rate": 1.2244897959183673e-07,
"loss": 0.065,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1210,
"total_memory_available (GB)": 94.62
},
{
"epoch": 4.979591836734694,
"grad_norm": 3.809382915496826,
"learning_rate": 4.0816326530612253e-08,
"loss": 0.0417,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1220,
"total_memory_available (GB)": 94.62
},
{
"epoch": 5.0,
"max_memory_allocated (GB)": 57.18,
"memory_allocated (GB)": 50.57,
"step": 1225,
"total_flos": 3.0598946525952e+16,
"total_memory_available (GB)": 94.62,
"train_loss": 0.06098026679486644,
"train_runtime": 1192.2443,
"train_samples_per_second": 46.607,
"train_steps_per_second": 1.166
}
],
"logging_steps": 10,
"max_steps": 1225,
"num_input_tokens_seen": 0,
"num_train_epochs": 5,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.0598946525952e+16,
"train_batch_size": 40,
"trial_name": null,
"trial_params": null
}