whisper-small-darija / trainer_state.json
marouaneoa's picture
Upload 12 files
02f1e4e verified
{
"best_metric": 48.88501742160279,
"best_model_checkpoint": "./whisper-small-ar/checkpoint-60",
"epoch": 34.285714285714285,
"eval_steps": 10,
"global_step": 60,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.5714285714285714,
"grad_norm": 58.168914794921875,
"learning_rate": 1e-07,
"loss": 2.3005,
"step": 1
},
{
"epoch": 1.1428571428571428,
"grad_norm": 20.130443572998047,
"learning_rate": 1.0080000000000002e-05,
"loss": 1.829,
"step": 2
},
{
"epoch": 1.7142857142857144,
"grad_norm": 21.1949405670166,
"learning_rate": 2.006e-05,
"loss": 1.8278,
"step": 3
},
{
"epoch": 2.2857142857142856,
"grad_norm": 17.402198791503906,
"learning_rate": 3.0039999999999997e-05,
"loss": 1.6203,
"step": 4
},
{
"epoch": 2.857142857142857,
"grad_norm": 8.070812225341797,
"learning_rate": 4.0020000000000006e-05,
"loss": 1.3838,
"step": 5
},
{
"epoch": 3.4285714285714284,
"grad_norm": 10.189924240112305,
"learning_rate": 5e-05,
"loss": 1.2624,
"step": 6
},
{
"epoch": 4.0,
"grad_norm": 11.481833457946777,
"learning_rate": 4.9744102564102566e-05,
"loss": 1.1801,
"step": 7
},
{
"epoch": 4.571428571428571,
"grad_norm": 8.908258438110352,
"learning_rate": 4.948820512820513e-05,
"loss": 1.036,
"step": 8
},
{
"epoch": 5.142857142857143,
"grad_norm": 6.949155807495117,
"learning_rate": 4.92323076923077e-05,
"loss": 0.9218,
"step": 9
},
{
"epoch": 5.714285714285714,
"grad_norm": 5.213706016540527,
"learning_rate": 4.897641025641026e-05,
"loss": 0.821,
"step": 10
},
{
"epoch": 5.714285714285714,
"eval_loss": 1.0209691524505615,
"eval_runtime": 36.4465,
"eval_samples_per_second": 2.771,
"eval_steps_per_second": 0.192,
"eval_wer": 93.58885017421603,
"step": 10
},
{
"epoch": 6.285714285714286,
"grad_norm": 5.321272850036621,
"learning_rate": 4.872051282051282e-05,
"loss": 0.7761,
"step": 11
},
{
"epoch": 6.857142857142857,
"grad_norm": 4.693880081176758,
"learning_rate": 4.8464615384615386e-05,
"loss": 0.7099,
"step": 12
},
{
"epoch": 7.428571428571429,
"grad_norm": 4.722415924072266,
"learning_rate": 4.820871794871795e-05,
"loss": 0.6289,
"step": 13
},
{
"epoch": 8.0,
"grad_norm": 3.9184811115264893,
"learning_rate": 4.795282051282052e-05,
"loss": 0.5725,
"step": 14
},
{
"epoch": 8.571428571428571,
"grad_norm": 3.7839274406433105,
"learning_rate": 4.7696923076923084e-05,
"loss": 0.5235,
"step": 15
},
{
"epoch": 9.142857142857142,
"grad_norm": 4.117933750152588,
"learning_rate": 4.744102564102564e-05,
"loss": 0.4812,
"step": 16
},
{
"epoch": 9.714285714285714,
"grad_norm": 3.781254529953003,
"learning_rate": 4.7185128205128205e-05,
"loss": 0.4397,
"step": 17
},
{
"epoch": 10.285714285714286,
"grad_norm": 3.4398269653320312,
"learning_rate": 4.692923076923077e-05,
"loss": 0.3749,
"step": 18
},
{
"epoch": 10.857142857142858,
"grad_norm": 3.5433449745178223,
"learning_rate": 4.667333333333334e-05,
"loss": 0.3506,
"step": 19
},
{
"epoch": 11.428571428571429,
"grad_norm": 3.3172426223754883,
"learning_rate": 4.64174358974359e-05,
"loss": 0.3185,
"step": 20
},
{
"epoch": 11.428571428571429,
"eval_loss": 0.83645099401474,
"eval_runtime": 27.2985,
"eval_samples_per_second": 3.7,
"eval_steps_per_second": 0.256,
"eval_wer": 109.26829268292684,
"step": 20
},
{
"epoch": 12.0,
"grad_norm": 3.2160873413085938,
"learning_rate": 4.616153846153847e-05,
"loss": 0.2779,
"step": 21
},
{
"epoch": 12.571428571428571,
"grad_norm": 3.265079975128174,
"learning_rate": 4.5905641025641024e-05,
"loss": 0.2553,
"step": 22
},
{
"epoch": 13.142857142857142,
"grad_norm": 3.1832408905029297,
"learning_rate": 4.564974358974359e-05,
"loss": 0.2157,
"step": 23
},
{
"epoch": 13.714285714285714,
"grad_norm": 2.9767119884490967,
"learning_rate": 4.539384615384616e-05,
"loss": 0.1921,
"step": 24
},
{
"epoch": 14.285714285714286,
"grad_norm": 2.942561388015747,
"learning_rate": 4.513794871794872e-05,
"loss": 0.1741,
"step": 25
},
{
"epoch": 14.857142857142858,
"grad_norm": 2.837989091873169,
"learning_rate": 4.4882051282051286e-05,
"loss": 0.1422,
"step": 26
},
{
"epoch": 15.428571428571429,
"grad_norm": 2.894479513168335,
"learning_rate": 4.462615384615385e-05,
"loss": 0.1257,
"step": 27
},
{
"epoch": 16.0,
"grad_norm": 2.841754198074341,
"learning_rate": 4.437025641025641e-05,
"loss": 0.1039,
"step": 28
},
{
"epoch": 16.571428571428573,
"grad_norm": 2.616755485534668,
"learning_rate": 4.411435897435898e-05,
"loss": 0.0847,
"step": 29
},
{
"epoch": 17.142857142857142,
"grad_norm": 2.05346417427063,
"learning_rate": 4.385846153846154e-05,
"loss": 0.0606,
"step": 30
},
{
"epoch": 17.142857142857142,
"eval_loss": 0.8186278939247131,
"eval_runtime": 26.7925,
"eval_samples_per_second": 3.77,
"eval_steps_per_second": 0.261,
"eval_wer": 54.843205574912886,
"step": 30
},
{
"epoch": 17.714285714285715,
"grad_norm": 1.4241214990615845,
"learning_rate": 4.3602564102564106e-05,
"loss": 0.051,
"step": 31
},
{
"epoch": 18.285714285714285,
"grad_norm": 0.806209921836853,
"learning_rate": 4.334666666666667e-05,
"loss": 0.0336,
"step": 32
},
{
"epoch": 18.857142857142858,
"grad_norm": 0.7847088575363159,
"learning_rate": 4.309076923076923e-05,
"loss": 0.0282,
"step": 33
},
{
"epoch": 19.428571428571427,
"grad_norm": 0.9520515203475952,
"learning_rate": 4.28348717948718e-05,
"loss": 0.0204,
"step": 34
},
{
"epoch": 20.0,
"grad_norm": 0.37412479519844055,
"learning_rate": 4.257897435897436e-05,
"loss": 0.0163,
"step": 35
},
{
"epoch": 20.571428571428573,
"grad_norm": 0.3346174657344818,
"learning_rate": 4.2323076923076925e-05,
"loss": 0.0122,
"step": 36
},
{
"epoch": 21.142857142857142,
"grad_norm": 0.23975922167301178,
"learning_rate": 4.206717948717949e-05,
"loss": 0.0105,
"step": 37
},
{
"epoch": 21.714285714285715,
"grad_norm": 0.2158885896205902,
"learning_rate": 4.181128205128205e-05,
"loss": 0.0078,
"step": 38
},
{
"epoch": 22.285714285714285,
"grad_norm": 0.15689243376255035,
"learning_rate": 4.155538461538462e-05,
"loss": 0.0065,
"step": 39
},
{
"epoch": 22.857142857142858,
"grad_norm": 0.10141926258802414,
"learning_rate": 4.129948717948718e-05,
"loss": 0.0056,
"step": 40
},
{
"epoch": 22.857142857142858,
"eval_loss": 0.9213815927505493,
"eval_runtime": 26.8113,
"eval_samples_per_second": 3.767,
"eval_steps_per_second": 0.261,
"eval_wer": 50.139372822299656,
"step": 40
},
{
"epoch": 23.428571428571427,
"grad_norm": 0.07463113218545914,
"learning_rate": 4.1043589743589744e-05,
"loss": 0.0047,
"step": 41
},
{
"epoch": 24.0,
"grad_norm": 0.09193433821201324,
"learning_rate": 4.078769230769231e-05,
"loss": 0.0041,
"step": 42
},
{
"epoch": 24.571428571428573,
"grad_norm": 0.06787554919719696,
"learning_rate": 4.053179487179487e-05,
"loss": 0.0035,
"step": 43
},
{
"epoch": 25.142857142857142,
"grad_norm": 0.07034426182508469,
"learning_rate": 4.027589743589744e-05,
"loss": 0.0031,
"step": 44
},
{
"epoch": 25.714285714285715,
"grad_norm": 0.06088101118803024,
"learning_rate": 4.0020000000000006e-05,
"loss": 0.0028,
"step": 45
},
{
"epoch": 26.285714285714285,
"grad_norm": 0.03634655103087425,
"learning_rate": 3.9764102564102564e-05,
"loss": 0.0024,
"step": 46
},
{
"epoch": 26.857142857142858,
"grad_norm": 0.13105067610740662,
"learning_rate": 3.950820512820513e-05,
"loss": 0.0022,
"step": 47
},
{
"epoch": 27.428571428571427,
"grad_norm": 0.02541457489132881,
"learning_rate": 3.925230769230769e-05,
"loss": 0.002,
"step": 48
},
{
"epoch": 28.0,
"grad_norm": 0.04503984376788139,
"learning_rate": 3.899641025641026e-05,
"loss": 0.0017,
"step": 49
},
{
"epoch": 28.571428571428573,
"grad_norm": 0.08539522439241409,
"learning_rate": 3.8740512820512826e-05,
"loss": 0.0019,
"step": 50
},
{
"epoch": 28.571428571428573,
"eval_loss": 1.0016592741012573,
"eval_runtime": 26.5804,
"eval_samples_per_second": 3.8,
"eval_steps_per_second": 0.263,
"eval_wer": 50.87108013937283,
"step": 50
},
{
"epoch": 29.142857142857142,
"grad_norm": 0.06402863562107086,
"learning_rate": 3.848461538461539e-05,
"loss": 0.0017,
"step": 51
},
{
"epoch": 29.714285714285715,
"grad_norm": 0.016784947365522385,
"learning_rate": 3.822871794871795e-05,
"loss": 0.0013,
"step": 52
},
{
"epoch": 30.285714285714285,
"grad_norm": 0.030725400894880295,
"learning_rate": 3.797282051282051e-05,
"loss": 0.0013,
"step": 53
},
{
"epoch": 30.857142857142858,
"grad_norm": 0.015844004228711128,
"learning_rate": 3.771692307692308e-05,
"loss": 0.0011,
"step": 54
},
{
"epoch": 31.428571428571427,
"grad_norm": 0.015942782163619995,
"learning_rate": 3.7461025641025645e-05,
"loss": 0.0011,
"step": 55
},
{
"epoch": 32.0,
"grad_norm": 0.18212947249412537,
"learning_rate": 3.720512820512821e-05,
"loss": 0.0012,
"step": 56
},
{
"epoch": 32.57142857142857,
"grad_norm": 0.016346724703907967,
"learning_rate": 3.694923076923077e-05,
"loss": 0.0009,
"step": 57
},
{
"epoch": 33.142857142857146,
"grad_norm": 0.010927367024123669,
"learning_rate": 3.669333333333333e-05,
"loss": 0.0009,
"step": 58
},
{
"epoch": 33.714285714285715,
"grad_norm": 0.013612424023449421,
"learning_rate": 3.64374358974359e-05,
"loss": 0.0008,
"step": 59
},
{
"epoch": 34.285714285714285,
"grad_norm": 0.10505015403032303,
"learning_rate": 3.6181538461538464e-05,
"loss": 0.0009,
"step": 60
},
{
"epoch": 34.285714285714285,
"eval_loss": 1.0416876077651978,
"eval_runtime": 26.3087,
"eval_samples_per_second": 3.839,
"eval_steps_per_second": 0.266,
"eval_wer": 48.88501742160279,
"step": 60
}
],
"logging_steps": 1,
"max_steps": 200,
"num_input_tokens_seen": 0,
"num_train_epochs": 200,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 4.432671678450893e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}