bridgetower / trainer_state.json

End of training

f50a312 verified 14 days ago

37.3 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 5.0,
	"eval_steps": 500,
	"global_step": 1225,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.04081632653061224,
	"grad_norm": 9.589848518371582,
	"learning_rate": 9.918367346938776e-06,
	"loss": 0.2612,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 10,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.08163265306122448,
	"grad_norm": 8.167236328125,
	"learning_rate": 9.836734693877552e-06,
	"loss": 0.1542,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 20,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.12244897959183673,
	"grad_norm": 5.958656311035156,
	"learning_rate": 9.755102040816327e-06,
	"loss": 0.1236,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 30,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.16326530612244897,
	"grad_norm": 4.233438968658447,
	"learning_rate": 9.673469387755103e-06,
	"loss": 0.1105,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 40,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.20408163265306123,
	"grad_norm": 4.4948201179504395,
	"learning_rate": 9.591836734693878e-06,
	"loss": 0.1195,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 50,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.24489795918367346,
	"grad_norm": 1.827812910079956,
	"learning_rate": 9.510204081632653e-06,
	"loss": 0.0797,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 60,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.2857142857142857,
	"grad_norm": 3.889023780822754,
	"learning_rate": 9.42857142857143e-06,
	"loss": 0.0989,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 70,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.32653061224489793,
	"grad_norm": 1.9397954940795898,
	"learning_rate": 9.346938775510204e-06,
	"loss": 0.1102,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 80,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.3673469387755102,
	"grad_norm": 3.2782671451568604,
	"learning_rate": 9.26530612244898e-06,
	"loss": 0.0824,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 90,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.40816326530612246,
	"grad_norm": 5.711858749389648,
	"learning_rate": 9.183673469387756e-06,
	"loss": 0.0762,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 100,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.4489795918367347,
	"grad_norm": 3.395564317703247,
	"learning_rate": 9.102040816326532e-06,
	"loss": 0.0726,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 110,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.4897959183673469,
	"grad_norm": 2.2739310264587402,
	"learning_rate": 9.020408163265307e-06,
	"loss": 0.0705,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 120,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.5306122448979592,
	"grad_norm": 2.418794870376587,
	"learning_rate": 8.938775510204082e-06,
	"loss": 0.0595,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 130,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.5714285714285714,
	"grad_norm": 2.2896311283111572,
	"learning_rate": 8.857142857142858e-06,
	"loss": 0.0498,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 140,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.6122448979591837,
	"grad_norm": 1.7899913787841797,
	"learning_rate": 8.775510204081633e-06,
	"loss": 0.0629,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 150,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.6530612244897959,
	"grad_norm": 1.9983731508255005,
	"learning_rate": 8.69387755102041e-06,
	"loss": 0.071,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 160,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.6938775510204082,
	"grad_norm": 1.7236266136169434,
	"learning_rate": 8.612244897959184e-06,
	"loss": 0.0748,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 170,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.7346938775510204,
	"grad_norm": 1.1180106401443481,
	"learning_rate": 8.530612244897961e-06,
	"loss": 0.0599,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 180,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.7755102040816326,
	"grad_norm": 2.3057782649993896,
	"learning_rate": 8.448979591836736e-06,
	"loss": 0.0588,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 190,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.8163265306122449,
	"grad_norm": 0.8334403038024902,
	"learning_rate": 8.36734693877551e-06,
	"loss": 0.0622,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 200,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.8571428571428571,
	"grad_norm": 1.0548275709152222,
	"learning_rate": 8.285714285714287e-06,
	"loss": 0.0623,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 210,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.8979591836734694,
	"grad_norm": 1.3381606340408325,
	"learning_rate": 8.204081632653062e-06,
	"loss": 0.0605,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 220,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.9387755102040817,
	"grad_norm": 0.809412956237793,
	"learning_rate": 8.122448979591837e-06,
	"loss": 0.0566,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 230,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 0.9795918367346939,
	"grad_norm": 0.7182928323745728,
	"learning_rate": 8.040816326530613e-06,
	"loss": 0.0496,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 240,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.0204081632653061,
	"grad_norm": 1.1081018447875977,
	"learning_rate": 7.959183673469388e-06,
	"loss": 0.0601,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 250,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.0612244897959184,
	"grad_norm": 3.9478495121002197,
	"learning_rate": 7.877551020408164e-06,
	"loss": 0.0541,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 260,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.1020408163265305,
	"grad_norm": 0.8079606294631958,
	"learning_rate": 7.79591836734694e-06,
	"loss": 0.063,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 270,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.1428571428571428,
	"grad_norm": 1.4108704328536987,
	"learning_rate": 7.714285714285716e-06,
	"loss": 0.0579,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 280,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.183673469387755,
	"grad_norm": 1.544438123703003,
	"learning_rate": 7.63265306122449e-06,
	"loss": 0.0518,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 290,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.2244897959183674,
	"grad_norm": 1.0270023345947266,
	"learning_rate": 7.551020408163265e-06,
	"loss": 0.0658,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 300,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.2653061224489797,
	"grad_norm": 1.0347421169281006,
	"learning_rate": 7.469387755102041e-06,
	"loss": 0.0667,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 310,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.306122448979592,
	"grad_norm": 1.5818060636520386,
	"learning_rate": 7.387755102040817e-06,
	"loss": 0.0502,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 320,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.346938775510204,
	"grad_norm": 1.2030609846115112,
	"learning_rate": 7.306122448979592e-06,
	"loss": 0.0616,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 330,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.3877551020408163,
	"grad_norm": 1.2504222393035889,
	"learning_rate": 7.224489795918368e-06,
	"loss": 0.0543,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 340,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.4285714285714286,
	"grad_norm": 0.8420921564102173,
	"learning_rate": 7.1428571428571436e-06,
	"loss": 0.0488,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 350,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.469387755102041,
	"grad_norm": 1.517096757888794,
	"learning_rate": 7.061224489795919e-06,
	"loss": 0.0467,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 360,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.510204081632653,
	"grad_norm": 1.4490768909454346,
	"learning_rate": 6.979591836734695e-06,
	"loss": 0.0585,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 370,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.5510204081632653,
	"grad_norm": 0.5360353589057922,
	"learning_rate": 6.8979591836734705e-06,
	"loss": 0.046,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 380,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.5918367346938775,
	"grad_norm": 0.8193866610527039,
	"learning_rate": 6.816326530612245e-06,
	"loss": 0.0657,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 390,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.6326530612244898,
	"grad_norm": 0.5883302092552185,
	"learning_rate": 6.734693877551021e-06,
	"loss": 0.0609,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 400,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.6734693877551021,
	"grad_norm": 0.6720415949821472,
	"learning_rate": 6.653061224489797e-06,
	"loss": 0.0603,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 410,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.7142857142857144,
	"grad_norm": 1.368994951248169,
	"learning_rate": 6.571428571428572e-06,
	"loss": 0.0528,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 420,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.7551020408163265,
	"grad_norm": 0.25535887479782104,
	"learning_rate": 6.489795918367348e-06,
	"loss": 0.0674,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 430,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.7959183673469388,
	"grad_norm": 0.8839388489723206,
	"learning_rate": 6.408163265306124e-06,
	"loss": 0.0612,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 440,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.836734693877551,
	"grad_norm": 0.4119959771633148,
	"learning_rate": 6.326530612244899e-06,
	"loss": 0.0435,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 450,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.8775510204081631,
	"grad_norm": 0.45877301692962646,
	"learning_rate": 6.244897959183675e-06,
	"loss": 0.0543,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 460,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.9183673469387754,
	"grad_norm": 0.9594807624816895,
	"learning_rate": 6.163265306122449e-06,
	"loss": 0.0433,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 470,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 1.9591836734693877,
	"grad_norm": 0.168818861246109,
	"learning_rate": 6.0816326530612245e-06,
	"loss": 0.0497,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 480,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.0,
	"grad_norm": 1.1468336582183838,
	"learning_rate": 6e-06,
	"loss": 0.0651,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 490,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.0408163265306123,
	"grad_norm": 0.6001573204994202,
	"learning_rate": 5.918367346938776e-06,
	"loss": 0.0591,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 500,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.0816326530612246,
	"grad_norm": 0.8006247878074646,
	"learning_rate": 5.8367346938775515e-06,
	"loss": 0.0489,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 510,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.122448979591837,
	"grad_norm": 0.6839123964309692,
	"learning_rate": 5.755102040816327e-06,
	"loss": 0.0456,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 520,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.163265306122449,
	"grad_norm": 1.020135521888733,
	"learning_rate": 5.673469387755103e-06,
	"loss": 0.0627,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 530,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.204081632653061,
	"grad_norm": 0.7156575322151184,
	"learning_rate": 5.591836734693878e-06,
	"loss": 0.0584,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 540,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.2448979591836733,
	"grad_norm": 1.195730447769165,
	"learning_rate": 5.510204081632653e-06,
	"loss": 0.0628,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 550,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.2857142857142856,
	"grad_norm": 0.8824738264083862,
	"learning_rate": 5.428571428571429e-06,
	"loss": 0.0572,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 560,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.326530612244898,
	"grad_norm": 1.3354676961898804,
	"learning_rate": 5.3469387755102045e-06,
	"loss": 0.0585,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 570,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.36734693877551,
	"grad_norm": 1.9097795486450195,
	"learning_rate": 5.26530612244898e-06,
	"loss": 0.0668,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 580,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.4081632653061225,
	"grad_norm": 0.8386860489845276,
	"learning_rate": 5.183673469387756e-06,
	"loss": 0.0542,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 590,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.4489795918367347,
	"grad_norm": 1.2374165058135986,
	"learning_rate": 5.1020408163265315e-06,
	"loss": 0.0436,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 600,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.489795918367347,
	"grad_norm": 0.3719439208507538,
	"learning_rate": 5.020408163265307e-06,
	"loss": 0.0408,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 610,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.5306122448979593,
	"grad_norm": 0.905327558517456,
	"learning_rate": 4.938775510204082e-06,
	"loss": 0.0429,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 620,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.571428571428571,
	"grad_norm": 0.628597617149353,
	"learning_rate": 4.857142857142858e-06,
	"loss": 0.0812,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 630,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.612244897959184,
	"grad_norm": 1.3098090887069702,
	"learning_rate": 4.775510204081633e-06,
	"loss": 0.0495,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 640,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.6530612244897958,
	"grad_norm": 0.5635781288146973,
	"learning_rate": 4.693877551020409e-06,
	"loss": 0.0466,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 650,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.693877551020408,
	"grad_norm": 0.6197735071182251,
	"learning_rate": 4.612244897959184e-06,
	"loss": 0.0474,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 660,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.7346938775510203,
	"grad_norm": 0.6390748620033264,
	"learning_rate": 4.530612244897959e-06,
	"loss": 0.0863,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 670,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.7755102040816326,
	"grad_norm": 1.6307971477508545,
	"learning_rate": 4.448979591836735e-06,
	"loss": 0.0489,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 680,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.816326530612245,
	"grad_norm": 0.3477366864681244,
	"learning_rate": 4.367346938775511e-06,
	"loss": 0.0448,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 690,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.857142857142857,
	"grad_norm": 0.41136085987091064,
	"learning_rate": 4.2857142857142855e-06,
	"loss": 0.0431,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 700,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.8979591836734695,
	"grad_norm": 1.1029525995254517,
	"learning_rate": 4.204081632653061e-06,
	"loss": 0.0551,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 710,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.938775510204082,
	"grad_norm": 0.8994241952896118,
	"learning_rate": 4.122448979591837e-06,
	"loss": 0.0581,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 720,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 2.979591836734694,
	"grad_norm": 0.1889757364988327,
	"learning_rate": 4.040816326530612e-06,
	"loss": 0.034,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 730,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.020408163265306,
	"grad_norm": 1.7815334796905518,
	"learning_rate": 3.959183673469388e-06,
	"loss": 0.0536,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 740,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.061224489795918,
	"grad_norm": 0.4372510612010956,
	"learning_rate": 3.877551020408164e-06,
	"loss": 0.0617,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 750,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.1020408163265305,
	"grad_norm": 5.120749473571777,
	"learning_rate": 3.795918367346939e-06,
	"loss": 0.0518,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 760,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.142857142857143,
	"grad_norm": 6.453648090362549,
	"learning_rate": 3.7142857142857146e-06,
	"loss": 0.069,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 770,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.183673469387755,
	"grad_norm": 0.6512885093688965,
	"learning_rate": 3.6326530612244903e-06,
	"loss": 0.0649,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 780,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.2244897959183674,
	"grad_norm": 0.3266737759113312,
	"learning_rate": 3.5510204081632655e-06,
	"loss": 0.0523,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 790,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.2653061224489797,
	"grad_norm": 0.4506176710128784,
	"learning_rate": 3.469387755102041e-06,
	"loss": 0.0432,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 800,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.306122448979592,
	"grad_norm": 0.8929914236068726,
	"learning_rate": 3.3877551020408168e-06,
	"loss": 0.0554,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 810,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.3469387755102042,
	"grad_norm": 0.7046924233436584,
	"learning_rate": 3.3061224489795924e-06,
	"loss": 0.0453,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 820,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.387755102040816,
	"grad_norm": 0.29230576753616333,
	"learning_rate": 3.2244897959183672e-06,
	"loss": 0.0491,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 830,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.4285714285714284,
	"grad_norm": 0.4533096253871918,
	"learning_rate": 3.142857142857143e-06,
	"loss": 0.0529,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 840,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.4693877551020407,
	"grad_norm": 0.5383632183074951,
	"learning_rate": 3.0612244897959185e-06,
	"loss": 0.0823,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 850,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.510204081632653,
	"grad_norm": 2.8597779273986816,
	"learning_rate": 2.979591836734694e-06,
	"loss": 0.0456,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 860,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.5510204081632653,
	"grad_norm": 0.26686975359916687,
	"learning_rate": 2.8979591836734694e-06,
	"loss": 0.064,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 870,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.5918367346938775,
	"grad_norm": 0.7789614796638489,
	"learning_rate": 2.816326530612245e-06,
	"loss": 0.0441,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 880,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.63265306122449,
	"grad_norm": 0.21532948315143585,
	"learning_rate": 2.7346938775510207e-06,
	"loss": 0.0291,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 890,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.673469387755102,
	"grad_norm": 0.741765558719635,
	"learning_rate": 2.6530612244897964e-06,
	"loss": 0.0512,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 900,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.7142857142857144,
	"grad_norm": 0.6416855454444885,
	"learning_rate": 2.571428571428571e-06,
	"loss": 0.0606,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 910,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.7551020408163263,
	"grad_norm": 0.14841973781585693,
	"learning_rate": 2.489795918367347e-06,
	"loss": 0.0542,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 920,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.795918367346939,
	"grad_norm": 0.4417996108531952,
	"learning_rate": 2.4081632653061225e-06,
	"loss": 0.0498,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 930,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.836734693877551,
	"grad_norm": 0.9759775400161743,
	"learning_rate": 2.326530612244898e-06,
	"loss": 0.0491,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 940,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.877551020408163,
	"grad_norm": 1.020371913909912,
	"learning_rate": 2.244897959183674e-06,
	"loss": 0.0597,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 950,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.9183673469387754,
	"grad_norm": 0.3064863085746765,
	"learning_rate": 2.1632653061224495e-06,
	"loss": 0.0499,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 960,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 3.9591836734693877,
	"grad_norm": 0.7580925226211548,
	"learning_rate": 2.0816326530612247e-06,
	"loss": 0.0742,
	"max_memory_allocated (GB)": 57.18,
	"memory_allocated (GB)": 50.57,
	"step": 970,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.0,
	"grad_norm": 0.6833075881004333,
	"learning_rate": 2.0000000000000003e-06,
	"loss": 0.0708,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 980,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.040816326530612,
	"grad_norm": 0.5641142725944519,
	"learning_rate": 1.9183673469387756e-06,
	"loss": 0.0481,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 990,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.081632653061225,
	"grad_norm": 0.8568029403686523,
	"learning_rate": 1.8367346938775512e-06,
	"loss": 0.0626,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1000,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.122448979591836,
	"grad_norm": 0.5912718772888184,
	"learning_rate": 1.7551020408163267e-06,
	"loss": 0.0628,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1010,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.163265306122449,
	"grad_norm": 0.3173392713069916,
	"learning_rate": 1.6734693877551023e-06,
	"loss": 0.0402,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1020,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.204081632653061,
	"grad_norm": 0.8902315497398376,
	"learning_rate": 1.5918367346938775e-06,
	"loss": 0.0536,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1030,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.244897959183674,
	"grad_norm": 0.5009722113609314,
	"learning_rate": 1.5102040816326532e-06,
	"loss": 0.0399,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1040,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.285714285714286,
	"grad_norm": 1.8656221628189087,
	"learning_rate": 1.4285714285714286e-06,
	"loss": 0.0499,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1050,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.326530612244898,
	"grad_norm": 0.4257819950580597,
	"learning_rate": 1.3469387755102043e-06,
	"loss": 0.0459,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1060,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.36734693877551,
	"grad_norm": 0.5823583006858826,
	"learning_rate": 1.2653061224489795e-06,
	"loss": 0.0488,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1070,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.408163265306122,
	"grad_norm": 0.40693071484565735,
	"learning_rate": 1.1836734693877552e-06,
	"loss": 0.0525,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1080,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.448979591836735,
	"grad_norm": 1.8890392780303955,
	"learning_rate": 1.1020408163265308e-06,
	"loss": 0.0512,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1090,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.489795918367347,
	"grad_norm": 0.3593562841415405,
	"learning_rate": 1.0204081632653063e-06,
	"loss": 0.0364,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1100,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.530612244897959,
	"grad_norm": 0.1553877741098404,
	"learning_rate": 9.387755102040817e-07,
	"loss": 0.0465,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1110,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.571428571428571,
	"grad_norm": 0.6775248050689697,
	"learning_rate": 8.571428571428572e-07,
	"loss": 0.0406,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1120,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.612244897959184,
	"grad_norm": 0.5735678672790527,
	"learning_rate": 7.755102040816327e-07,
	"loss": 0.0539,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1130,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.653061224489796,
	"grad_norm": 0.7891528606414795,
	"learning_rate": 6.938775510204082e-07,
	"loss": 0.0732,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1140,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.6938775510204085,
	"grad_norm": 0.7845800518989563,
	"learning_rate": 6.122448979591837e-07,
	"loss": 0.0515,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1150,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.73469387755102,
	"grad_norm": 1.0361818075180054,
	"learning_rate": 5.306122448979592e-07,
	"loss": 0.0608,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1160,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.775510204081632,
	"grad_norm": 0.42603600025177,
	"learning_rate": 4.489795918367347e-07,
	"loss": 0.0549,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1170,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.816326530612245,
	"grad_norm": 0.6727630496025085,
	"learning_rate": 3.6734693877551025e-07,
	"loss": 0.0441,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1180,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.857142857142857,
	"grad_norm": 0.8499141335487366,
	"learning_rate": 2.8571428571428575e-07,
	"loss": 0.0544,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1190,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.8979591836734695,
	"grad_norm": 0.7604736685752869,
	"learning_rate": 2.0408163265306121e-07,
	"loss": 0.0728,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1200,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.938775510204081,
	"grad_norm": 1.0298157930374146,
	"learning_rate": 1.2244897959183673e-07,
	"loss": 0.0654,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1210,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 4.979591836734694,
	"grad_norm": 1.1890877485275269,
	"learning_rate": 4.0816326530612253e-08,
	"loss": 0.0413,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1220,
	"total_memory_available (GB)": 94.62
	},
	{
	"epoch": 5.0,
	"max_memory_allocated (GB)": 63.75,
	"memory_allocated (GB)": 50.57,
	"step": 1225,
	"total_flos": 3.0598946525952e+16,
	"total_memory_available (GB)": 94.62,
	"train_loss": 0.06080986156755564,
	"train_runtime": 1168.6251,
	"train_samples_per_second": 48.37,
	"train_steps_per_second": 1.21
	}
	],
	"logging_steps": 10,
	"max_steps": 1225,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 5,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 3.0598946525952e+16,
	"train_batch_size": 40,
	"trial_name": null,
	"trial_params": null
	}