|
{ |
|
"best_metric": 1.6187845468521118, |
|
"best_model_checkpoint": "./Sustainability_model/checkpoint-2000", |
|
"epoch": 1.220703125, |
|
"eval_steps": 100, |
|
"global_step": 2500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01220703125, |
|
"grad_norm": 3.0088555812835693, |
|
"learning_rate": 2e-05, |
|
"loss": 2.1582, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0244140625, |
|
"grad_norm": 5.197660446166992, |
|
"learning_rate": 2e-05, |
|
"loss": 2.0856, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03662109375, |
|
"grad_norm": 3.234564781188965, |
|
"learning_rate": 2e-05, |
|
"loss": 1.9269, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.048828125, |
|
"grad_norm": 7.08390474319458, |
|
"learning_rate": 2e-05, |
|
"loss": 1.888, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.048828125, |
|
"eval_loss": 1.8261231184005737, |
|
"eval_runtime": 590.9102, |
|
"eval_samples_per_second": 3.468, |
|
"eval_steps_per_second": 0.435, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06103515625, |
|
"grad_norm": 3.1646361351013184, |
|
"learning_rate": 2e-05, |
|
"loss": 1.8649, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.0732421875, |
|
"grad_norm": 6.104555130004883, |
|
"learning_rate": 2e-05, |
|
"loss": 1.742, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.08544921875, |
|
"grad_norm": 2.9724113941192627, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7567, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.09765625, |
|
"grad_norm": 6.2468791007995605, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7452, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09765625, |
|
"eval_loss": 1.7315690517425537, |
|
"eval_runtime": 590.974, |
|
"eval_samples_per_second": 3.467, |
|
"eval_steps_per_second": 0.435, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10986328125, |
|
"grad_norm": 2.97963285446167, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6694, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1220703125, |
|
"grad_norm": 4.771264553070068, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6833, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.13427734375, |
|
"grad_norm": 2.825491428375244, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6958, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.146484375, |
|
"grad_norm": 4.647068977355957, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7428, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.146484375, |
|
"eval_loss": 1.6999598741531372, |
|
"eval_runtime": 590.2857, |
|
"eval_samples_per_second": 3.471, |
|
"eval_steps_per_second": 0.435, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.15869140625, |
|
"grad_norm": 3.1953535079956055, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7458, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.1708984375, |
|
"grad_norm": 5.5873799324035645, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6244, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.18310546875, |
|
"grad_norm": 2.5425360202789307, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6862, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.1953125, |
|
"grad_norm": 4.082971572875977, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6836, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.1953125, |
|
"eval_loss": 1.6864606142044067, |
|
"eval_runtime": 589.1989, |
|
"eval_samples_per_second": 3.478, |
|
"eval_steps_per_second": 0.436, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.20751953125, |
|
"grad_norm": 2.6709253787994385, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6939, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.2197265625, |
|
"grad_norm": 5.410455703735352, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5974, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.23193359375, |
|
"grad_norm": 2.8631389141082764, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6609, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.244140625, |
|
"grad_norm": 3.2581229209899902, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6251, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.244140625, |
|
"eval_loss": 1.67488431930542, |
|
"eval_runtime": 589.2638, |
|
"eval_samples_per_second": 3.477, |
|
"eval_steps_per_second": 0.436, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.25634765625, |
|
"grad_norm": 2.8811697959899902, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7135, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.2685546875, |
|
"grad_norm": 5.96162748336792, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6709, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.28076171875, |
|
"grad_norm": 2.4651806354522705, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6504, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.29296875, |
|
"grad_norm": 4.032615661621094, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7128, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.29296875, |
|
"eval_loss": 1.668798565864563, |
|
"eval_runtime": 589.1105, |
|
"eval_samples_per_second": 3.478, |
|
"eval_steps_per_second": 0.436, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.30517578125, |
|
"grad_norm": 2.694554328918457, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7093, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.3173828125, |
|
"grad_norm": 4.213258743286133, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6899, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.32958984375, |
|
"grad_norm": 2.69679594039917, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6451, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.341796875, |
|
"grad_norm": 3.6988604068756104, |
|
"learning_rate": 2e-05, |
|
"loss": 1.631, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.341796875, |
|
"eval_loss": 1.662984013557434, |
|
"eval_runtime": 588.5535, |
|
"eval_samples_per_second": 3.481, |
|
"eval_steps_per_second": 0.437, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.35400390625, |
|
"grad_norm": 2.6815237998962402, |
|
"learning_rate": 2e-05, |
|
"loss": 1.688, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.3662109375, |
|
"grad_norm": 5.819088459014893, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6649, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.37841796875, |
|
"grad_norm": 2.524092674255371, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6305, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"grad_norm": 4.0569963455200195, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6493, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"eval_loss": 1.6568603515625, |
|
"eval_runtime": 588.2081, |
|
"eval_samples_per_second": 3.483, |
|
"eval_steps_per_second": 0.437, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.40283203125, |
|
"grad_norm": 2.565763473510742, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6983, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.4150390625, |
|
"grad_norm": 6.5800676345825195, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6565, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.42724609375, |
|
"grad_norm": 2.1741669178009033, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7585, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.439453125, |
|
"grad_norm": 3.838252305984497, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6141, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.439453125, |
|
"eval_loss": 1.6529587507247925, |
|
"eval_runtime": 588.0827, |
|
"eval_samples_per_second": 3.484, |
|
"eval_steps_per_second": 0.437, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.45166015625, |
|
"grad_norm": 4.486364841461182, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6489, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.4638671875, |
|
"grad_norm": 3.693453311920166, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6026, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.47607421875, |
|
"grad_norm": 2.4286513328552246, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5639, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.48828125, |
|
"grad_norm": 3.9820656776428223, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6621, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.48828125, |
|
"eval_loss": 1.6506658792495728, |
|
"eval_runtime": 588.1468, |
|
"eval_samples_per_second": 3.484, |
|
"eval_steps_per_second": 0.437, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.50048828125, |
|
"grad_norm": 2.915191411972046, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6281, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.5126953125, |
|
"grad_norm": 4.406491756439209, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7108, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.52490234375, |
|
"grad_norm": 2.6505398750305176, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7151, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.537109375, |
|
"grad_norm": 3.872833728790283, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5925, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.537109375, |
|
"eval_loss": 1.6442919969558716, |
|
"eval_runtime": 588.2624, |
|
"eval_samples_per_second": 3.483, |
|
"eval_steps_per_second": 0.437, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.54931640625, |
|
"grad_norm": 2.210282802581787, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5845, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.5615234375, |
|
"grad_norm": 3.7344298362731934, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5994, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.57373046875, |
|
"grad_norm": 2.3247945308685303, |
|
"learning_rate": 2e-05, |
|
"loss": 1.622, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.5859375, |
|
"grad_norm": 4.974765300750732, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6571, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.5859375, |
|
"eval_loss": 1.6453276872634888, |
|
"eval_runtime": 588.5916, |
|
"eval_samples_per_second": 3.481, |
|
"eval_steps_per_second": 0.437, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.59814453125, |
|
"grad_norm": 2.6029038429260254, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6854, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.6103515625, |
|
"grad_norm": 3.8252599239349365, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6875, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.62255859375, |
|
"grad_norm": 2.5335938930511475, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5917, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.634765625, |
|
"grad_norm": 3.6627395153045654, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6078, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.634765625, |
|
"eval_loss": 1.638580322265625, |
|
"eval_runtime": 588.7972, |
|
"eval_samples_per_second": 3.48, |
|
"eval_steps_per_second": 0.436, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.64697265625, |
|
"grad_norm": 2.5015482902526855, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6793, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.6591796875, |
|
"grad_norm": 3.70072340965271, |
|
"learning_rate": 2e-05, |
|
"loss": 1.661, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.67138671875, |
|
"grad_norm": 2.6039609909057617, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6349, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.68359375, |
|
"grad_norm": 3.3291618824005127, |
|
"learning_rate": 2e-05, |
|
"loss": 1.616, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.68359375, |
|
"eval_loss": 1.6347644329071045, |
|
"eval_runtime": 588.5837, |
|
"eval_samples_per_second": 3.481, |
|
"eval_steps_per_second": 0.437, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.69580078125, |
|
"grad_norm": 2.6853315830230713, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7087, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.7080078125, |
|
"grad_norm": 3.296851396560669, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6676, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.72021484375, |
|
"grad_norm": 2.3841185569763184, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6212, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.732421875, |
|
"grad_norm": 3.612088441848755, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6473, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.732421875, |
|
"eval_loss": 1.6339186429977417, |
|
"eval_runtime": 588.3073, |
|
"eval_samples_per_second": 3.483, |
|
"eval_steps_per_second": 0.437, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.74462890625, |
|
"grad_norm": 2.6555330753326416, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6643, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.7568359375, |
|
"grad_norm": 4.533504486083984, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6236, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.76904296875, |
|
"grad_norm": 2.2276220321655273, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6783, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 3.533113956451416, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6123, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"eval_loss": 1.628023386001587, |
|
"eval_runtime": 588.6386, |
|
"eval_samples_per_second": 3.481, |
|
"eval_steps_per_second": 0.437, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.79345703125, |
|
"grad_norm": 2.2332117557525635, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6795, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.8056640625, |
|
"grad_norm": 4.059207916259766, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5915, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.81787109375, |
|
"grad_norm": 2.46692156791687, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6456, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.830078125, |
|
"grad_norm": 3.602611780166626, |
|
"learning_rate": 2e-05, |
|
"loss": 1.564, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.830078125, |
|
"eval_loss": 1.6274890899658203, |
|
"eval_runtime": 588.2617, |
|
"eval_samples_per_second": 3.483, |
|
"eval_steps_per_second": 0.437, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.84228515625, |
|
"grad_norm": 2.20896315574646, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6469, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.8544921875, |
|
"grad_norm": 4.329638481140137, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5571, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.86669921875, |
|
"grad_norm": 1.9945570230484009, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6461, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.87890625, |
|
"grad_norm": 3.428687334060669, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6564, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.87890625, |
|
"eval_loss": 1.6232744455337524, |
|
"eval_runtime": 588.0784, |
|
"eval_samples_per_second": 3.484, |
|
"eval_steps_per_second": 0.437, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.89111328125, |
|
"grad_norm": 2.5266592502593994, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5607, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.9033203125, |
|
"grad_norm": 3.4067883491516113, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6394, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.91552734375, |
|
"grad_norm": 2.0028152465820312, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6908, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.927734375, |
|
"grad_norm": 2.8983733654022217, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5646, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.927734375, |
|
"eval_loss": 1.6202832460403442, |
|
"eval_runtime": 587.8115, |
|
"eval_samples_per_second": 3.486, |
|
"eval_steps_per_second": 0.437, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.93994140625, |
|
"grad_norm": 2.6408419609069824, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5905, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.9521484375, |
|
"grad_norm": 3.899275302886963, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6138, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.96435546875, |
|
"grad_norm": 2.338137149810791, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6963, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.9765625, |
|
"grad_norm": 3.6352951526641846, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5849, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.9765625, |
|
"eval_loss": 1.6187845468521118, |
|
"eval_runtime": 587.8791, |
|
"eval_samples_per_second": 3.485, |
|
"eval_steps_per_second": 0.437, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.98876953125, |
|
"grad_norm": 2.4254846572875977, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6391, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 1.0009765625, |
|
"grad_norm": 2.079317569732666, |
|
"learning_rate": 2e-05, |
|
"loss": 1.6238, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 1.01318359375, |
|
"grad_norm": 2.1677002906799316, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5543, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 1.025390625, |
|
"grad_norm": 2.4266505241394043, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4812, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.025390625, |
|
"eval_loss": 1.6256210803985596, |
|
"eval_runtime": 585.954, |
|
"eval_samples_per_second": 3.497, |
|
"eval_steps_per_second": 0.439, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.03759765625, |
|
"grad_norm": 2.4697976112365723, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5147, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 1.0498046875, |
|
"grad_norm": 2.3185527324676514, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5198, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 1.06201171875, |
|
"grad_norm": 2.7304463386535645, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5237, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 1.07421875, |
|
"grad_norm": 2.616072177886963, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5598, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.07421875, |
|
"eval_loss": 1.623382568359375, |
|
"eval_runtime": 586.1381, |
|
"eval_samples_per_second": 3.496, |
|
"eval_steps_per_second": 0.438, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.08642578125, |
|
"grad_norm": 2.7308809757232666, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5691, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 1.0986328125, |
|
"grad_norm": 2.6916451454162598, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5102, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 1.11083984375, |
|
"grad_norm": 2.960580348968506, |
|
"learning_rate": 2e-05, |
|
"loss": 1.539, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 1.123046875, |
|
"grad_norm": 2.5936009883880615, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5657, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.123046875, |
|
"eval_loss": 1.6226788759231567, |
|
"eval_runtime": 586.4284, |
|
"eval_samples_per_second": 3.494, |
|
"eval_steps_per_second": 0.438, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.13525390625, |
|
"grad_norm": 2.8930952548980713, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4579, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 1.1474609375, |
|
"grad_norm": 2.8736538887023926, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5127, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 1.15966796875, |
|
"grad_norm": 4.384296894073486, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5988, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 1.171875, |
|
"grad_norm": 2.728992223739624, |
|
"learning_rate": 2e-05, |
|
"loss": 1.51, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.171875, |
|
"eval_loss": 1.6226541996002197, |
|
"eval_runtime": 586.345, |
|
"eval_samples_per_second": 3.495, |
|
"eval_steps_per_second": 0.438, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.18408203125, |
|
"grad_norm": 2.651820421218872, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5226, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 1.1962890625, |
|
"grad_norm": 2.717193126678467, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4966, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 1.20849609375, |
|
"grad_norm": 2.9759628772735596, |
|
"learning_rate": 2e-05, |
|
"loss": 1.526, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 1.220703125, |
|
"grad_norm": 2.8832080364227295, |
|
"learning_rate": 2e-05, |
|
"loss": 1.5452, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.220703125, |
|
"eval_loss": 1.6226392984390259, |
|
"eval_runtime": 586.3744, |
|
"eval_samples_per_second": 3.494, |
|
"eval_steps_per_second": 0.438, |
|
"step": 2500 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 4096, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 6, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.924062136972083e+16, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|