Safetensors
wav2vec2-bert
wav2vec2bert-jyutping / trainer_state.json
indiejoseph's picture
Upload folder using huggingface_hub
1f89a01 verified
{
"best_metric": 0.042238425940266514,
"best_model_checkpoint": "checkpoints/checkpoint-4000",
"epoch": 10.0,
"eval_steps": 1000,
"global_step": 4370,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.2288329519450801,
"grad_norm": 0.8938984274864197,
"learning_rate": 1e-05,
"loss": 0.0975,
"step": 100
},
{
"epoch": 0.4576659038901602,
"grad_norm": 1.1161267757415771,
"learning_rate": 2e-05,
"loss": 0.0613,
"step": 200
},
{
"epoch": 0.6864988558352403,
"grad_norm": 1.264156460762024,
"learning_rate": 3e-05,
"loss": 0.0533,
"step": 300
},
{
"epoch": 0.9153318077803204,
"grad_norm": 2.014840602874756,
"learning_rate": 4e-05,
"loss": 0.0528,
"step": 400
},
{
"epoch": 1.1441647597254005,
"grad_norm": 0.28405696153640747,
"learning_rate": 5e-05,
"loss": 0.0481,
"step": 500
},
{
"epoch": 1.3729977116704806,
"grad_norm": 0.4474036395549774,
"learning_rate": 6e-05,
"loss": 0.0549,
"step": 600
},
{
"epoch": 1.6018306636155606,
"grad_norm": 0.3621448278427124,
"learning_rate": 7e-05,
"loss": 0.0592,
"step": 700
},
{
"epoch": 1.8306636155606406,
"grad_norm": 1.5040546655654907,
"learning_rate": 8e-05,
"loss": 0.0596,
"step": 800
},
{
"epoch": 2.059496567505721,
"grad_norm": 0.5771723389625549,
"learning_rate": 9e-05,
"loss": 0.0625,
"step": 900
},
{
"epoch": 2.288329519450801,
"grad_norm": 0.7228975296020508,
"learning_rate": 0.0001,
"loss": 0.0507,
"step": 1000
},
{
"epoch": 2.288329519450801,
"eval_loss": 0.05043927580118179,
"eval_per": 0.05231171437952632,
"eval_runtime": 73.3314,
"eval_samples_per_second": 84.752,
"eval_steps_per_second": 1.336,
"eval_ter": 0.09057921991660414,
"step": 1000
},
{
"epoch": 2.517162471395881,
"grad_norm": 0.6161186695098877,
"learning_rate": 9.70326409495549e-05,
"loss": 0.0581,
"step": 1100
},
{
"epoch": 2.745995423340961,
"grad_norm": 0.4418635964393616,
"learning_rate": 9.40652818991098e-05,
"loss": 0.0578,
"step": 1200
},
{
"epoch": 2.974828375286041,
"grad_norm": 1.2202107906341553,
"learning_rate": 9.10979228486647e-05,
"loss": 0.0582,
"step": 1300
},
{
"epoch": 3.203661327231121,
"grad_norm": 0.6182931065559387,
"learning_rate": 8.813056379821959e-05,
"loss": 0.0444,
"step": 1400
},
{
"epoch": 3.4324942791762014,
"grad_norm": 0.36747854948043823,
"learning_rate": 8.516320474777448e-05,
"loss": 0.0431,
"step": 1500
},
{
"epoch": 3.6613272311212812,
"grad_norm": 0.5178420543670654,
"learning_rate": 8.219584569732938e-05,
"loss": 0.0379,
"step": 1600
},
{
"epoch": 3.8901601830663615,
"grad_norm": 0.4050116539001465,
"learning_rate": 7.922848664688428e-05,
"loss": 0.0373,
"step": 1700
},
{
"epoch": 4.118993135011442,
"grad_norm": 0.2309502214193344,
"learning_rate": 7.626112759643917e-05,
"loss": 0.0336,
"step": 1800
},
{
"epoch": 4.3478260869565215,
"grad_norm": 0.7122157216072083,
"learning_rate": 7.329376854599406e-05,
"loss": 0.0321,
"step": 1900
},
{
"epoch": 4.576659038901602,
"grad_norm": 0.20260649919509888,
"learning_rate": 7.032640949554896e-05,
"loss": 0.0327,
"step": 2000
},
{
"epoch": 4.576659038901602,
"eval_loss": 0.05170031264424324,
"eval_per": 0.044530625646617146,
"eval_runtime": 73.8457,
"eval_samples_per_second": 84.162,
"eval_steps_per_second": 1.327,
"eval_ter": 0.09002570757327888,
"step": 2000
},
{
"epoch": 4.805491990846682,
"grad_norm": 0.1853743940591812,
"learning_rate": 6.735905044510387e-05,
"loss": 0.0306,
"step": 2100
},
{
"epoch": 5.034324942791762,
"grad_norm": 0.26290130615234375,
"learning_rate": 6.439169139465876e-05,
"loss": 0.029,
"step": 2200
},
{
"epoch": 5.2631578947368425,
"grad_norm": 0.24352087080478668,
"learning_rate": 6.142433234421366e-05,
"loss": 0.0238,
"step": 2300
},
{
"epoch": 5.491990846681922,
"grad_norm": 0.506681501865387,
"learning_rate": 5.845697329376855e-05,
"loss": 0.023,
"step": 2400
},
{
"epoch": 5.720823798627002,
"grad_norm": 0.2733093202114105,
"learning_rate": 5.548961424332344e-05,
"loss": 0.025,
"step": 2500
},
{
"epoch": 5.949656750572083,
"grad_norm": 0.38764065504074097,
"learning_rate": 5.252225519287835e-05,
"loss": 0.0257,
"step": 2600
},
{
"epoch": 6.178489702517163,
"grad_norm": 0.9044945240020752,
"learning_rate": 4.9554896142433236e-05,
"loss": 0.0206,
"step": 2700
},
{
"epoch": 6.407322654462242,
"grad_norm": 0.2747916579246521,
"learning_rate": 4.658753709198813e-05,
"loss": 0.0175,
"step": 2800
},
{
"epoch": 6.636155606407323,
"grad_norm": 0.22902531921863556,
"learning_rate": 4.362017804154303e-05,
"loss": 0.0164,
"step": 2900
},
{
"epoch": 6.864988558352403,
"grad_norm": 0.28316548466682434,
"learning_rate": 4.0652818991097924e-05,
"loss": 0.0167,
"step": 3000
},
{
"epoch": 6.864988558352403,
"eval_loss": 0.044352661818265915,
"eval_per": 0.04722550908516451,
"eval_runtime": 73.6387,
"eval_samples_per_second": 84.399,
"eval_steps_per_second": 1.331,
"eval_ter": 0.08786085929716232,
"step": 3000
},
{
"epoch": 7.093821510297483,
"grad_norm": 0.220920130610466,
"learning_rate": 3.768545994065282e-05,
"loss": 0.0149,
"step": 3100
},
{
"epoch": 7.322654462242563,
"grad_norm": 2.0292646884918213,
"learning_rate": 3.4718100890207716e-05,
"loss": 0.0137,
"step": 3200
},
{
"epoch": 7.551487414187643,
"grad_norm": 0.3689746558666229,
"learning_rate": 3.175074183976261e-05,
"loss": 0.0131,
"step": 3300
},
{
"epoch": 7.780320366132723,
"grad_norm": 0.17288458347320557,
"learning_rate": 2.878338278931751e-05,
"loss": 0.0129,
"step": 3400
},
{
"epoch": 8.009153318077804,
"grad_norm": 0.13609908521175385,
"learning_rate": 2.58160237388724e-05,
"loss": 0.0117,
"step": 3500
},
{
"epoch": 8.237986270022883,
"grad_norm": 0.14868062734603882,
"learning_rate": 2.28486646884273e-05,
"loss": 0.0099,
"step": 3600
},
{
"epoch": 8.466819221967963,
"grad_norm": 0.4655757248401642,
"learning_rate": 1.9881305637982196e-05,
"loss": 0.0089,
"step": 3700
},
{
"epoch": 8.695652173913043,
"grad_norm": 0.41903799772262573,
"learning_rate": 1.6913946587537096e-05,
"loss": 0.009,
"step": 3800
},
{
"epoch": 8.924485125858123,
"grad_norm": 0.17405986785888672,
"learning_rate": 1.394658753709199e-05,
"loss": 0.0085,
"step": 3900
},
{
"epoch": 9.153318077803204,
"grad_norm": 0.16548456251621246,
"learning_rate": 1.0979228486646884e-05,
"loss": 0.0076,
"step": 4000
},
{
"epoch": 9.153318077803204,
"eval_loss": 0.04819780960679054,
"eval_per": 0.042238425940266514,
"eval_runtime": 74.5094,
"eval_samples_per_second": 83.412,
"eval_steps_per_second": 1.315,
"eval_ter": 0.08691373817636133,
"step": 4000
},
{
"epoch": 9.382151029748284,
"grad_norm": 0.16761469841003418,
"learning_rate": 8.011869436201782e-06,
"loss": 0.0068,
"step": 4100
},
{
"epoch": 9.610983981693364,
"grad_norm": 0.23434874415397644,
"learning_rate": 5.044510385756677e-06,
"loss": 0.0062,
"step": 4200
},
{
"epoch": 9.839816933638444,
"grad_norm": 0.1882612109184265,
"learning_rate": 2.0771513353115726e-06,
"loss": 0.0065,
"step": 4300
}
],
"logging_steps": 100,
"max_steps": 4370,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 6.034289580760073e+19,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}