|
{ |
|
"best_metric": 48.88501742160279, |
|
"best_model_checkpoint": "./whisper-small-ar/checkpoint-60", |
|
"epoch": 34.285714285714285, |
|
"eval_steps": 10, |
|
"global_step": 60, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 58.168914794921875, |
|
"learning_rate": 1e-07, |
|
"loss": 2.3005, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 1.1428571428571428, |
|
"grad_norm": 20.130443572998047, |
|
"learning_rate": 1.0080000000000002e-05, |
|
"loss": 1.829, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 1.7142857142857144, |
|
"grad_norm": 21.1949405670166, |
|
"learning_rate": 2.006e-05, |
|
"loss": 1.8278, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 2.2857142857142856, |
|
"grad_norm": 17.402198791503906, |
|
"learning_rate": 3.0039999999999997e-05, |
|
"loss": 1.6203, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 8.070812225341797, |
|
"learning_rate": 4.0020000000000006e-05, |
|
"loss": 1.3838, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 3.4285714285714284, |
|
"grad_norm": 10.189924240112305, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2624, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 11.481833457946777, |
|
"learning_rate": 4.9744102564102566e-05, |
|
"loss": 1.1801, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 4.571428571428571, |
|
"grad_norm": 8.908258438110352, |
|
"learning_rate": 4.948820512820513e-05, |
|
"loss": 1.036, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 5.142857142857143, |
|
"grad_norm": 6.949155807495117, |
|
"learning_rate": 4.92323076923077e-05, |
|
"loss": 0.9218, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"grad_norm": 5.213706016540527, |
|
"learning_rate": 4.897641025641026e-05, |
|
"loss": 0.821, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 5.714285714285714, |
|
"eval_loss": 1.0209691524505615, |
|
"eval_runtime": 36.4465, |
|
"eval_samples_per_second": 2.771, |
|
"eval_steps_per_second": 0.192, |
|
"eval_wer": 93.58885017421603, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 6.285714285714286, |
|
"grad_norm": 5.321272850036621, |
|
"learning_rate": 4.872051282051282e-05, |
|
"loss": 0.7761, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 6.857142857142857, |
|
"grad_norm": 4.693880081176758, |
|
"learning_rate": 4.8464615384615386e-05, |
|
"loss": 0.7099, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 7.428571428571429, |
|
"grad_norm": 4.722415924072266, |
|
"learning_rate": 4.820871794871795e-05, |
|
"loss": 0.6289, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.9184811115264893, |
|
"learning_rate": 4.795282051282052e-05, |
|
"loss": 0.5725, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 8.571428571428571, |
|
"grad_norm": 3.7839274406433105, |
|
"learning_rate": 4.7696923076923084e-05, |
|
"loss": 0.5235, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 9.142857142857142, |
|
"grad_norm": 4.117933750152588, |
|
"learning_rate": 4.744102564102564e-05, |
|
"loss": 0.4812, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 9.714285714285714, |
|
"grad_norm": 3.781254529953003, |
|
"learning_rate": 4.7185128205128205e-05, |
|
"loss": 0.4397, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 10.285714285714286, |
|
"grad_norm": 3.4398269653320312, |
|
"learning_rate": 4.692923076923077e-05, |
|
"loss": 0.3749, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 10.857142857142858, |
|
"grad_norm": 3.5433449745178223, |
|
"learning_rate": 4.667333333333334e-05, |
|
"loss": 0.3506, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"grad_norm": 3.3172426223754883, |
|
"learning_rate": 4.64174358974359e-05, |
|
"loss": 0.3185, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 11.428571428571429, |
|
"eval_loss": 0.83645099401474, |
|
"eval_runtime": 27.2985, |
|
"eval_samples_per_second": 3.7, |
|
"eval_steps_per_second": 0.256, |
|
"eval_wer": 109.26829268292684, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 3.2160873413085938, |
|
"learning_rate": 4.616153846153847e-05, |
|
"loss": 0.2779, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 12.571428571428571, |
|
"grad_norm": 3.265079975128174, |
|
"learning_rate": 4.5905641025641024e-05, |
|
"loss": 0.2553, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 13.142857142857142, |
|
"grad_norm": 3.1832408905029297, |
|
"learning_rate": 4.564974358974359e-05, |
|
"loss": 0.2157, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 13.714285714285714, |
|
"grad_norm": 2.9767119884490967, |
|
"learning_rate": 4.539384615384616e-05, |
|
"loss": 0.1921, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 14.285714285714286, |
|
"grad_norm": 2.942561388015747, |
|
"learning_rate": 4.513794871794872e-05, |
|
"loss": 0.1741, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 14.857142857142858, |
|
"grad_norm": 2.837989091873169, |
|
"learning_rate": 4.4882051282051286e-05, |
|
"loss": 0.1422, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 15.428571428571429, |
|
"grad_norm": 2.894479513168335, |
|
"learning_rate": 4.462615384615385e-05, |
|
"loss": 0.1257, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 2.841754198074341, |
|
"learning_rate": 4.437025641025641e-05, |
|
"loss": 0.1039, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 16.571428571428573, |
|
"grad_norm": 2.616755485534668, |
|
"learning_rate": 4.411435897435898e-05, |
|
"loss": 0.0847, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"grad_norm": 2.05346417427063, |
|
"learning_rate": 4.385846153846154e-05, |
|
"loss": 0.0606, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 17.142857142857142, |
|
"eval_loss": 0.8186278939247131, |
|
"eval_runtime": 26.7925, |
|
"eval_samples_per_second": 3.77, |
|
"eval_steps_per_second": 0.261, |
|
"eval_wer": 54.843205574912886, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 17.714285714285715, |
|
"grad_norm": 1.4241214990615845, |
|
"learning_rate": 4.3602564102564106e-05, |
|
"loss": 0.051, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 18.285714285714285, |
|
"grad_norm": 0.806209921836853, |
|
"learning_rate": 4.334666666666667e-05, |
|
"loss": 0.0336, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 18.857142857142858, |
|
"grad_norm": 0.7847088575363159, |
|
"learning_rate": 4.309076923076923e-05, |
|
"loss": 0.0282, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 19.428571428571427, |
|
"grad_norm": 0.9520515203475952, |
|
"learning_rate": 4.28348717948718e-05, |
|
"loss": 0.0204, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.37412479519844055, |
|
"learning_rate": 4.257897435897436e-05, |
|
"loss": 0.0163, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 20.571428571428573, |
|
"grad_norm": 0.3346174657344818, |
|
"learning_rate": 4.2323076923076925e-05, |
|
"loss": 0.0122, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 21.142857142857142, |
|
"grad_norm": 0.23975922167301178, |
|
"learning_rate": 4.206717948717949e-05, |
|
"loss": 0.0105, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 21.714285714285715, |
|
"grad_norm": 0.2158885896205902, |
|
"learning_rate": 4.181128205128205e-05, |
|
"loss": 0.0078, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 22.285714285714285, |
|
"grad_norm": 0.15689243376255035, |
|
"learning_rate": 4.155538461538462e-05, |
|
"loss": 0.0065, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"grad_norm": 0.10141926258802414, |
|
"learning_rate": 4.129948717948718e-05, |
|
"loss": 0.0056, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 22.857142857142858, |
|
"eval_loss": 0.9213815927505493, |
|
"eval_runtime": 26.8113, |
|
"eval_samples_per_second": 3.767, |
|
"eval_steps_per_second": 0.261, |
|
"eval_wer": 50.139372822299656, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 23.428571428571427, |
|
"grad_norm": 0.07463113218545914, |
|
"learning_rate": 4.1043589743589744e-05, |
|
"loss": 0.0047, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"grad_norm": 0.09193433821201324, |
|
"learning_rate": 4.078769230769231e-05, |
|
"loss": 0.0041, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 24.571428571428573, |
|
"grad_norm": 0.06787554919719696, |
|
"learning_rate": 4.053179487179487e-05, |
|
"loss": 0.0035, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 25.142857142857142, |
|
"grad_norm": 0.07034426182508469, |
|
"learning_rate": 4.027589743589744e-05, |
|
"loss": 0.0031, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 25.714285714285715, |
|
"grad_norm": 0.06088101118803024, |
|
"learning_rate": 4.0020000000000006e-05, |
|
"loss": 0.0028, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 26.285714285714285, |
|
"grad_norm": 0.03634655103087425, |
|
"learning_rate": 3.9764102564102564e-05, |
|
"loss": 0.0024, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 26.857142857142858, |
|
"grad_norm": 0.13105067610740662, |
|
"learning_rate": 3.950820512820513e-05, |
|
"loss": 0.0022, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 27.428571428571427, |
|
"grad_norm": 0.02541457489132881, |
|
"learning_rate": 3.925230769230769e-05, |
|
"loss": 0.002, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"grad_norm": 0.04503984376788139, |
|
"learning_rate": 3.899641025641026e-05, |
|
"loss": 0.0017, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"grad_norm": 0.08539522439241409, |
|
"learning_rate": 3.8740512820512826e-05, |
|
"loss": 0.0019, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 28.571428571428573, |
|
"eval_loss": 1.0016592741012573, |
|
"eval_runtime": 26.5804, |
|
"eval_samples_per_second": 3.8, |
|
"eval_steps_per_second": 0.263, |
|
"eval_wer": 50.87108013937283, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 29.142857142857142, |
|
"grad_norm": 0.06402863562107086, |
|
"learning_rate": 3.848461538461539e-05, |
|
"loss": 0.0017, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 29.714285714285715, |
|
"grad_norm": 0.016784947365522385, |
|
"learning_rate": 3.822871794871795e-05, |
|
"loss": 0.0013, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 30.285714285714285, |
|
"grad_norm": 0.030725400894880295, |
|
"learning_rate": 3.797282051282051e-05, |
|
"loss": 0.0013, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 30.857142857142858, |
|
"grad_norm": 0.015844004228711128, |
|
"learning_rate": 3.771692307692308e-05, |
|
"loss": 0.0011, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 31.428571428571427, |
|
"grad_norm": 0.015942782163619995, |
|
"learning_rate": 3.7461025641025645e-05, |
|
"loss": 0.0011, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"grad_norm": 0.18212947249412537, |
|
"learning_rate": 3.720512820512821e-05, |
|
"loss": 0.0012, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 32.57142857142857, |
|
"grad_norm": 0.016346724703907967, |
|
"learning_rate": 3.694923076923077e-05, |
|
"loss": 0.0009, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 33.142857142857146, |
|
"grad_norm": 0.010927367024123669, |
|
"learning_rate": 3.669333333333333e-05, |
|
"loss": 0.0009, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 33.714285714285715, |
|
"grad_norm": 0.013612424023449421, |
|
"learning_rate": 3.64374358974359e-05, |
|
"loss": 0.0008, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 34.285714285714285, |
|
"grad_norm": 0.10505015403032303, |
|
"learning_rate": 3.6181538461538464e-05, |
|
"loss": 0.0009, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 34.285714285714285, |
|
"eval_loss": 1.0416876077651978, |
|
"eval_runtime": 26.3087, |
|
"eval_samples_per_second": 3.839, |
|
"eval_steps_per_second": 0.266, |
|
"eval_wer": 48.88501742160279, |
|
"step": 60 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 200, |
|
"save_steps": 10, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.432671678450893e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|