adapters-gemma-bf16-QLORA-super_glue-copa
/
trainer_state-gemma-bf16-QLORA-super_glue-copa-sequence_classification.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 10.0, | |
"eval_steps": 1, | |
"global_step": 50, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.2, | |
"grad_norm": 181.0, | |
"learning_rate": 2.5e-05, | |
"loss": 1.1894, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.2, | |
"eval_accuracy": 0.45, | |
"eval_loss": 1.370854377746582, | |
"eval_runtime": 0.9284, | |
"eval_samples_per_second": 107.713, | |
"eval_steps_per_second": 3.231, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.4, | |
"grad_norm": 214.0, | |
"learning_rate": 5e-05, | |
"loss": 1.3321, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.4, | |
"eval_accuracy": 0.6, | |
"eval_loss": 0.7611915469169617, | |
"eval_runtime": 1.0247, | |
"eval_samples_per_second": 97.591, | |
"eval_steps_per_second": 2.928, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.6, | |
"grad_norm": 99.0, | |
"learning_rate": 4.8958333333333335e-05, | |
"loss": 0.864, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.6, | |
"eval_accuracy": 0.63, | |
"eval_loss": 0.8467197418212891, | |
"eval_runtime": 1.0232, | |
"eval_samples_per_second": 97.729, | |
"eval_steps_per_second": 2.932, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.8, | |
"grad_norm": 207.0, | |
"learning_rate": 4.791666666666667e-05, | |
"loss": 0.9293, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.8, | |
"eval_accuracy": 0.91, | |
"eval_loss": 0.411680668592453, | |
"eval_runtime": 1.0225, | |
"eval_samples_per_second": 97.8, | |
"eval_steps_per_second": 2.934, | |
"step": 4 | |
}, | |
{ | |
"epoch": 1.0, | |
"grad_norm": 76.5, | |
"learning_rate": 4.6875e-05, | |
"loss": 0.3438, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.0, | |
"eval_accuracy": 0.92, | |
"eval_loss": 0.2434273660182953, | |
"eval_runtime": 0.9724, | |
"eval_samples_per_second": 102.835, | |
"eval_steps_per_second": 3.085, | |
"step": 5 | |
}, | |
{ | |
"epoch": 1.2, | |
"grad_norm": 12.3125, | |
"learning_rate": 4.5833333333333334e-05, | |
"loss": 0.377, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.2, | |
"eval_accuracy": 0.87, | |
"eval_loss": 0.3455248177051544, | |
"eval_runtime": 1.0215, | |
"eval_samples_per_second": 97.895, | |
"eval_steps_per_second": 2.937, | |
"step": 6 | |
}, | |
{ | |
"epoch": 1.4, | |
"grad_norm": 111.0, | |
"learning_rate": 4.4791666666666673e-05, | |
"loss": 0.6136, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.4, | |
"eval_accuracy": 0.9, | |
"eval_loss": 0.324820876121521, | |
"eval_runtime": 1.0222, | |
"eval_samples_per_second": 97.829, | |
"eval_steps_per_second": 2.935, | |
"step": 7 | |
}, | |
{ | |
"epoch": 1.6, | |
"grad_norm": 37.25, | |
"learning_rate": 4.375e-05, | |
"loss": 0.135, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.6, | |
"eval_accuracy": 0.92, | |
"eval_loss": 0.24112339317798615, | |
"eval_runtime": 0.9739, | |
"eval_samples_per_second": 102.682, | |
"eval_steps_per_second": 3.08, | |
"step": 8 | |
}, | |
{ | |
"epoch": 1.8, | |
"grad_norm": 8.3125, | |
"learning_rate": 4.270833333333333e-05, | |
"loss": 0.2028, | |
"step": 9 | |
}, | |
{ | |
"epoch": 1.8, | |
"eval_accuracy": 0.92, | |
"eval_loss": 0.2563403844833374, | |
"eval_runtime": 1.0204, | |
"eval_samples_per_second": 97.997, | |
"eval_steps_per_second": 2.94, | |
"step": 9 | |
}, | |
{ | |
"epoch": 2.0, | |
"grad_norm": 10.5, | |
"learning_rate": 4.166666666666667e-05, | |
"loss": 0.0607, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.0, | |
"eval_accuracy": 0.92, | |
"eval_loss": 0.2764730155467987, | |
"eval_runtime": 1.0225, | |
"eval_samples_per_second": 97.804, | |
"eval_steps_per_second": 2.934, | |
"step": 10 | |
}, | |
{ | |
"epoch": 2.2, | |
"grad_norm": 15.6875, | |
"learning_rate": 4.0625000000000005e-05, | |
"loss": 0.0602, | |
"step": 11 | |
}, | |
{ | |
"epoch": 2.2, | |
"eval_accuracy": 0.93, | |
"eval_loss": 0.26724421977996826, | |
"eval_runtime": 1.0214, | |
"eval_samples_per_second": 97.907, | |
"eval_steps_per_second": 2.937, | |
"step": 11 | |
}, | |
{ | |
"epoch": 2.4, | |
"grad_norm": 5.84375, | |
"learning_rate": 3.958333333333333e-05, | |
"loss": 0.0881, | |
"step": 12 | |
}, | |
{ | |
"epoch": 2.4, | |
"eval_accuracy": 0.93, | |
"eval_loss": 0.2725869417190552, | |
"eval_runtime": 1.0234, | |
"eval_samples_per_second": 97.712, | |
"eval_steps_per_second": 2.931, | |
"step": 12 | |
}, | |
{ | |
"epoch": 2.6, | |
"grad_norm": 9.0625, | |
"learning_rate": 3.854166666666667e-05, | |
"loss": 0.0385, | |
"step": 13 | |
}, | |
{ | |
"epoch": 2.6, | |
"eval_accuracy": 0.93, | |
"eval_loss": 0.2831783592700958, | |
"eval_runtime": 1.023, | |
"eval_samples_per_second": 97.753, | |
"eval_steps_per_second": 2.933, | |
"step": 13 | |
}, | |
{ | |
"epoch": 2.8, | |
"grad_norm": 2.203125, | |
"learning_rate": 3.7500000000000003e-05, | |
"loss": 0.0081, | |
"step": 14 | |
}, | |
{ | |
"epoch": 2.8, | |
"eval_accuracy": 0.93, | |
"eval_loss": 0.27984124422073364, | |
"eval_runtime": 1.0215, | |
"eval_samples_per_second": 97.894, | |
"eval_steps_per_second": 2.937, | |
"step": 14 | |
}, | |
{ | |
"epoch": 3.0, | |
"grad_norm": 5.46875, | |
"learning_rate": 3.6458333333333336e-05, | |
"loss": 0.0815, | |
"step": 15 | |
}, | |
{ | |
"epoch": 3.0, | |
"eval_accuracy": 0.93, | |
"eval_loss": 0.2797885239124298, | |
"eval_runtime": 1.0235, | |
"eval_samples_per_second": 97.705, | |
"eval_steps_per_second": 2.931, | |
"step": 15 | |
}, | |
{ | |
"epoch": 3.2, | |
"grad_norm": 0.625, | |
"learning_rate": 3.541666666666667e-05, | |
"loss": 0.0029, | |
"step": 16 | |
}, | |
{ | |
"epoch": 3.2, | |
"eval_accuracy": 0.93, | |
"eval_loss": 0.29290881752967834, | |
"eval_runtime": 1.0228, | |
"eval_samples_per_second": 97.77, | |
"eval_steps_per_second": 2.933, | |
"step": 16 | |
}, | |
{ | |
"epoch": 3.4, | |
"grad_norm": 0.5390625, | |
"learning_rate": 3.4375e-05, | |
"loss": 0.0027, | |
"step": 17 | |
}, | |
{ | |
"epoch": 3.4, | |
"eval_accuracy": 0.93, | |
"eval_loss": 0.3011457622051239, | |
"eval_runtime": 1.0225, | |
"eval_samples_per_second": 97.796, | |
"eval_steps_per_second": 2.934, | |
"step": 17 | |
}, | |
{ | |
"epoch": 3.6, | |
"grad_norm": 0.42578125, | |
"learning_rate": 3.3333333333333335e-05, | |
"loss": 0.0013, | |
"step": 18 | |
}, | |
{ | |
"epoch": 3.6, | |
"eval_accuracy": 0.93, | |
"eval_loss": 0.31652283668518066, | |
"eval_runtime": 1.023, | |
"eval_samples_per_second": 97.748, | |
"eval_steps_per_second": 2.932, | |
"step": 18 | |
}, | |
{ | |
"epoch": 3.8, | |
"grad_norm": 5.8125, | |
"learning_rate": 3.229166666666667e-05, | |
"loss": 0.0151, | |
"step": 19 | |
}, | |
{ | |
"epoch": 3.8, | |
"eval_accuracy": 0.95, | |
"eval_loss": 0.31321465969085693, | |
"eval_runtime": 1.022, | |
"eval_samples_per_second": 97.844, | |
"eval_steps_per_second": 2.935, | |
"step": 19 | |
}, | |
{ | |
"epoch": 4.0, | |
"grad_norm": 0.2216796875, | |
"learning_rate": 3.125e-05, | |
"loss": 0.0005, | |
"step": 20 | |
}, | |
{ | |
"epoch": 4.0, | |
"eval_accuracy": 0.95, | |
"eval_loss": 0.3194425702095032, | |
"eval_runtime": 1.0227, | |
"eval_samples_per_second": 97.78, | |
"eval_steps_per_second": 2.933, | |
"step": 20 | |
}, | |
{ | |
"epoch": 4.2, | |
"grad_norm": 0.52734375, | |
"learning_rate": 3.0208333333333334e-05, | |
"loss": 0.0008, | |
"step": 21 | |
}, | |
{ | |
"epoch": 4.2, | |
"eval_accuracy": 0.95, | |
"eval_loss": 0.33766308426856995, | |
"eval_runtime": 1.0213, | |
"eval_samples_per_second": 97.913, | |
"eval_steps_per_second": 2.937, | |
"step": 21 | |
}, | |
{ | |
"epoch": 4.4, | |
"grad_norm": 0.052734375, | |
"learning_rate": 2.916666666666667e-05, | |
"loss": 0.0001, | |
"step": 22 | |
}, | |
{ | |
"epoch": 4.4, | |
"eval_accuracy": 0.95, | |
"eval_loss": 0.34586212038993835, | |
"eval_runtime": 1.0224, | |
"eval_samples_per_second": 97.805, | |
"eval_steps_per_second": 2.934, | |
"step": 22 | |
}, | |
{ | |
"epoch": 4.6, | |
"grad_norm": 0.043212890625, | |
"learning_rate": 2.8125000000000003e-05, | |
"loss": 0.0001, | |
"step": 23 | |
}, | |
{ | |
"epoch": 4.6, | |
"eval_accuracy": 0.95, | |
"eval_loss": 0.3585689067840576, | |
"eval_runtime": 0.9713, | |
"eval_samples_per_second": 102.955, | |
"eval_steps_per_second": 3.089, | |
"step": 23 | |
}, | |
{ | |
"epoch": 4.8, | |
"grad_norm": 0.00946044921875, | |
"learning_rate": 2.7083333333333332e-05, | |
"loss": 0.0, | |
"step": 24 | |
}, | |
{ | |
"epoch": 4.8, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.3643890619277954, | |
"eval_runtime": 1.0221, | |
"eval_samples_per_second": 97.84, | |
"eval_steps_per_second": 2.935, | |
"step": 24 | |
}, | |
{ | |
"epoch": 5.0, | |
"grad_norm": 0.0556640625, | |
"learning_rate": 2.604166666666667e-05, | |
"loss": 0.0001, | |
"step": 25 | |
}, | |
{ | |
"epoch": 5.0, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.3764663636684418, | |
"eval_runtime": 1.0239, | |
"eval_samples_per_second": 97.666, | |
"eval_steps_per_second": 2.93, | |
"step": 25 | |
}, | |
{ | |
"epoch": 5.2, | |
"grad_norm": 0.0042724609375, | |
"learning_rate": 2.5e-05, | |
"loss": 0.0, | |
"step": 26 | |
}, | |
{ | |
"epoch": 5.2, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.3840982913970947, | |
"eval_runtime": 1.0218, | |
"eval_samples_per_second": 97.864, | |
"eval_steps_per_second": 2.936, | |
"step": 26 | |
}, | |
{ | |
"epoch": 5.4, | |
"grad_norm": 0.004913330078125, | |
"learning_rate": 2.3958333333333334e-05, | |
"loss": 0.0, | |
"step": 27 | |
}, | |
{ | |
"epoch": 5.4, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.38938531279563904, | |
"eval_runtime": 1.0225, | |
"eval_samples_per_second": 97.795, | |
"eval_steps_per_second": 2.934, | |
"step": 27 | |
}, | |
{ | |
"epoch": 5.6, | |
"grad_norm": 0.05419921875, | |
"learning_rate": 2.2916666666666667e-05, | |
"loss": 0.0001, | |
"step": 28 | |
}, | |
{ | |
"epoch": 5.6, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.39918196201324463, | |
"eval_runtime": 1.0221, | |
"eval_samples_per_second": 97.835, | |
"eval_steps_per_second": 2.935, | |
"step": 28 | |
}, | |
{ | |
"epoch": 5.8, | |
"grad_norm": 0.0015106201171875, | |
"learning_rate": 2.1875e-05, | |
"loss": 0.0, | |
"step": 29 | |
}, | |
{ | |
"epoch": 5.8, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.4027714431285858, | |
"eval_runtime": 0.9742, | |
"eval_samples_per_second": 102.648, | |
"eval_steps_per_second": 3.079, | |
"step": 29 | |
}, | |
{ | |
"epoch": 6.0, | |
"grad_norm": 0.00124359130859375, | |
"learning_rate": 2.0833333333333336e-05, | |
"loss": 0.0, | |
"step": 30 | |
}, | |
{ | |
"epoch": 6.0, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.4107368588447571, | |
"eval_runtime": 1.0223, | |
"eval_samples_per_second": 97.82, | |
"eval_steps_per_second": 2.935, | |
"step": 30 | |
}, | |
{ | |
"epoch": 6.2, | |
"grad_norm": 0.00616455078125, | |
"learning_rate": 1.9791666666666665e-05, | |
"loss": 0.0, | |
"step": 31 | |
}, | |
{ | |
"epoch": 6.2, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.417328417301178, | |
"eval_runtime": 1.0242, | |
"eval_samples_per_second": 97.634, | |
"eval_steps_per_second": 2.929, | |
"step": 31 | |
}, | |
{ | |
"epoch": 6.4, | |
"grad_norm": 0.0005950927734375, | |
"learning_rate": 1.8750000000000002e-05, | |
"loss": 0.0, | |
"step": 32 | |
}, | |
{ | |
"epoch": 6.4, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.41879531741142273, | |
"eval_runtime": 1.0235, | |
"eval_samples_per_second": 97.705, | |
"eval_steps_per_second": 2.931, | |
"step": 32 | |
}, | |
{ | |
"epoch": 6.6, | |
"grad_norm": 0.000667572021484375, | |
"learning_rate": 1.7708333333333335e-05, | |
"loss": 0.0, | |
"step": 33 | |
}, | |
{ | |
"epoch": 6.6, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.42711684107780457, | |
"eval_runtime": 1.0243, | |
"eval_samples_per_second": 97.627, | |
"eval_steps_per_second": 2.929, | |
"step": 33 | |
}, | |
{ | |
"epoch": 6.8, | |
"grad_norm": 0.0023651123046875, | |
"learning_rate": 1.6666666666666667e-05, | |
"loss": 0.0, | |
"step": 34 | |
}, | |
{ | |
"epoch": 6.8, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.42084625363349915, | |
"eval_runtime": 1.021, | |
"eval_samples_per_second": 97.941, | |
"eval_steps_per_second": 2.938, | |
"step": 34 | |
}, | |
{ | |
"epoch": 7.0, | |
"grad_norm": 0.000560760498046875, | |
"learning_rate": 1.5625e-05, | |
"loss": 0.0, | |
"step": 35 | |
}, | |
{ | |
"epoch": 7.0, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.42686185240745544, | |
"eval_runtime": 1.0218, | |
"eval_samples_per_second": 97.868, | |
"eval_steps_per_second": 2.936, | |
"step": 35 | |
}, | |
{ | |
"epoch": 7.2, | |
"grad_norm": 0.0002498626708984375, | |
"learning_rate": 1.4583333333333335e-05, | |
"loss": 0.0, | |
"step": 36 | |
}, | |
{ | |
"epoch": 7.2, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.4289308190345764, | |
"eval_runtime": 1.0218, | |
"eval_samples_per_second": 97.865, | |
"eval_steps_per_second": 2.936, | |
"step": 36 | |
}, | |
{ | |
"epoch": 7.4, | |
"grad_norm": 0.00040435791015625, | |
"learning_rate": 1.3541666666666666e-05, | |
"loss": 0.0, | |
"step": 37 | |
}, | |
{ | |
"epoch": 7.4, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.43295109272003174, | |
"eval_runtime": 1.0228, | |
"eval_samples_per_second": 97.775, | |
"eval_steps_per_second": 2.933, | |
"step": 37 | |
}, | |
{ | |
"epoch": 7.6, | |
"grad_norm": 0.00019741058349609375, | |
"learning_rate": 1.25e-05, | |
"loss": 0.0, | |
"step": 38 | |
}, | |
{ | |
"epoch": 7.6, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.43248245120048523, | |
"eval_runtime": 1.0208, | |
"eval_samples_per_second": 97.959, | |
"eval_steps_per_second": 2.939, | |
"step": 38 | |
}, | |
{ | |
"epoch": 7.8, | |
"grad_norm": 0.0015869140625, | |
"learning_rate": 1.1458333333333333e-05, | |
"loss": 0.0, | |
"step": 39 | |
}, | |
{ | |
"epoch": 7.8, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.4331529140472412, | |
"eval_runtime": 0.9726, | |
"eval_samples_per_second": 102.82, | |
"eval_steps_per_second": 3.085, | |
"step": 39 | |
}, | |
{ | |
"epoch": 8.0, | |
"grad_norm": 0.000469207763671875, | |
"learning_rate": 1.0416666666666668e-05, | |
"loss": 0.0, | |
"step": 40 | |
}, | |
{ | |
"epoch": 8.0, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.4309934973716736, | |
"eval_runtime": 1.0232, | |
"eval_samples_per_second": 97.73, | |
"eval_steps_per_second": 2.932, | |
"step": 40 | |
}, | |
{ | |
"epoch": 8.2, | |
"grad_norm": 0.00037384033203125, | |
"learning_rate": 9.375000000000001e-06, | |
"loss": 0.0, | |
"step": 41 | |
}, | |
{ | |
"epoch": 8.2, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.4338625967502594, | |
"eval_runtime": 1.0229, | |
"eval_samples_per_second": 97.763, | |
"eval_steps_per_second": 2.933, | |
"step": 41 | |
}, | |
{ | |
"epoch": 8.4, | |
"grad_norm": 0.00142669677734375, | |
"learning_rate": 8.333333333333334e-06, | |
"loss": 0.0, | |
"step": 42 | |
}, | |
{ | |
"epoch": 8.4, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.43012726306915283, | |
"eval_runtime": 1.058, | |
"eval_samples_per_second": 94.52, | |
"eval_steps_per_second": 2.836, | |
"step": 42 | |
}, | |
{ | |
"epoch": 8.6, | |
"grad_norm": 0.00075531005859375, | |
"learning_rate": 7.2916666666666674e-06, | |
"loss": 0.0, | |
"step": 43 | |
}, | |
{ | |
"epoch": 8.6, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.43235713243484497, | |
"eval_runtime": 1.0267, | |
"eval_samples_per_second": 97.401, | |
"eval_steps_per_second": 2.922, | |
"step": 43 | |
}, | |
{ | |
"epoch": 8.8, | |
"grad_norm": 0.0002593994140625, | |
"learning_rate": 6.25e-06, | |
"loss": 0.0, | |
"step": 44 | |
}, | |
{ | |
"epoch": 8.8, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.43538880348205566, | |
"eval_runtime": 0.9747, | |
"eval_samples_per_second": 102.591, | |
"eval_steps_per_second": 3.078, | |
"step": 44 | |
}, | |
{ | |
"epoch": 9.0, | |
"grad_norm": 0.0004329681396484375, | |
"learning_rate": 5.208333333333334e-06, | |
"loss": 0.0, | |
"step": 45 | |
}, | |
{ | |
"epoch": 9.0, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.4357055723667145, | |
"eval_runtime": 1.0226, | |
"eval_samples_per_second": 97.791, | |
"eval_steps_per_second": 2.934, | |
"step": 45 | |
}, | |
{ | |
"epoch": 9.2, | |
"grad_norm": 0.000652313232421875, | |
"learning_rate": 4.166666666666667e-06, | |
"loss": 0.0, | |
"step": 46 | |
}, | |
{ | |
"epoch": 9.2, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.43566077947616577, | |
"eval_runtime": 1.0229, | |
"eval_samples_per_second": 97.757, | |
"eval_steps_per_second": 2.933, | |
"step": 46 | |
}, | |
{ | |
"epoch": 9.4, | |
"grad_norm": 0.0002994537353515625, | |
"learning_rate": 3.125e-06, | |
"loss": 0.0, | |
"step": 47 | |
}, | |
{ | |
"epoch": 9.4, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.4365979731082916, | |
"eval_runtime": 1.0208, | |
"eval_samples_per_second": 97.958, | |
"eval_steps_per_second": 2.939, | |
"step": 47 | |
}, | |
{ | |
"epoch": 9.6, | |
"grad_norm": 0.0004177093505859375, | |
"learning_rate": 2.0833333333333334e-06, | |
"loss": 0.0, | |
"step": 48 | |
}, | |
{ | |
"epoch": 9.6, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.43618661165237427, | |
"eval_runtime": 0.9713, | |
"eval_samples_per_second": 102.953, | |
"eval_steps_per_second": 3.089, | |
"step": 48 | |
}, | |
{ | |
"epoch": 9.8, | |
"grad_norm": 0.000568389892578125, | |
"learning_rate": 1.0416666666666667e-06, | |
"loss": 0.0, | |
"step": 49 | |
}, | |
{ | |
"epoch": 9.8, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.4351811110973358, | |
"eval_runtime": 1.0214, | |
"eval_samples_per_second": 97.903, | |
"eval_steps_per_second": 2.937, | |
"step": 49 | |
}, | |
{ | |
"epoch": 10.0, | |
"grad_norm": 0.00014495849609375, | |
"learning_rate": 0.0, | |
"loss": 0.0, | |
"step": 50 | |
}, | |
{ | |
"epoch": 10.0, | |
"eval_accuracy": 0.96, | |
"eval_loss": 0.4397030174732208, | |
"eval_runtime": 1.0204, | |
"eval_samples_per_second": 98.0, | |
"eval_steps_per_second": 2.94, | |
"step": 50 | |
}, | |
{ | |
"epoch": 10.0, | |
"step": 50, | |
"total_flos": 9439761349476352.0, | |
"train_loss": 0.126956181311009, | |
"train_runtime": 137.6145, | |
"train_samples_per_second": 29.067, | |
"train_steps_per_second": 0.363 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 50, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 10, | |
"save_steps": 500, | |
"stateful_callbacks": { | |
"TrainerControl": { | |
"args": { | |
"should_epoch_stop": false, | |
"should_evaluate": false, | |
"should_log": false, | |
"should_save": false, | |
"should_training_stop": false | |
}, | |
"attributes": {} | |
} | |
}, | |
"total_flos": 9439761349476352.0, | |
"train_batch_size": 10, | |
"trial_name": null, | |
"trial_params": null | |
} | |