csikasote's picture
End of training
578a93c verified
{
"best_metric": 0.29688435792922974,
"best_model_checkpoint": "/scratch/skscla001/results/w2v-bert-bem-genbed-combined-model/checkpoint-2400",
"epoch": 8.241758241758241,
"eval_steps": 200,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.27472527472527475,
"grad_norm": 3.8470664024353027,
"learning_rate": 0.00029699999999999996,
"loss": 1.5247,
"step": 100
},
{
"epoch": 0.5494505494505495,
"grad_norm": 4.275773048400879,
"learning_rate": 0.00029725508317929756,
"loss": 0.6407,
"step": 200
},
{
"epoch": 0.5494505494505495,
"eval_loss": 0.6847326159477234,
"eval_runtime": 81.9887,
"eval_samples_per_second": 23.65,
"eval_steps_per_second": 2.964,
"eval_wer": 0.8380540540540541,
"step": 200
},
{
"epoch": 0.8241758241758241,
"grad_norm": 4.533022403717041,
"learning_rate": 0.0002944824399260628,
"loss": 0.5465,
"step": 300
},
{
"epoch": 1.098901098901099,
"grad_norm": 1.5235211849212646,
"learning_rate": 0.00029170979667282804,
"loss": 0.458,
"step": 400
},
{
"epoch": 1.098901098901099,
"eval_loss": 0.4855729043483734,
"eval_runtime": 81.2075,
"eval_samples_per_second": 23.877,
"eval_steps_per_second": 2.992,
"eval_wer": 0.6787027027027027,
"step": 400
},
{
"epoch": 1.3736263736263736,
"grad_norm": 1.3080778121948242,
"learning_rate": 0.00028893715341959334,
"loss": 0.4539,
"step": 500
},
{
"epoch": 1.6483516483516483,
"grad_norm": 1.1141129732131958,
"learning_rate": 0.0002861645101663586,
"loss": 0.4014,
"step": 600
},
{
"epoch": 1.6483516483516483,
"eval_loss": 0.4310147762298584,
"eval_runtime": 81.4044,
"eval_samples_per_second": 23.819,
"eval_steps_per_second": 2.985,
"eval_wer": 0.6258378378378379,
"step": 600
},
{
"epoch": 1.9230769230769231,
"grad_norm": 1.3589787483215332,
"learning_rate": 0.00028339186691312383,
"loss": 0.4043,
"step": 700
},
{
"epoch": 2.197802197802198,
"grad_norm": 1.0690594911575317,
"learning_rate": 0.0002806192236598891,
"loss": 0.3523,
"step": 800
},
{
"epoch": 2.197802197802198,
"eval_loss": 0.36540287733078003,
"eval_runtime": 81.4173,
"eval_samples_per_second": 23.816,
"eval_steps_per_second": 2.985,
"eval_wer": 0.5421621621621622,
"step": 800
},
{
"epoch": 2.4725274725274726,
"grad_norm": 1.0713083744049072,
"learning_rate": 0.0002778465804066543,
"loss": 0.3256,
"step": 900
},
{
"epoch": 2.7472527472527473,
"grad_norm": 1.2681094408035278,
"learning_rate": 0.00027507393715341956,
"loss": 0.3298,
"step": 1000
},
{
"epoch": 2.7472527472527473,
"eval_loss": 0.3534020483493805,
"eval_runtime": 81.532,
"eval_samples_per_second": 23.782,
"eval_steps_per_second": 2.98,
"eval_wer": 0.5373513513513514,
"step": 1000
},
{
"epoch": 3.021978021978022,
"grad_norm": 1.4569008350372314,
"learning_rate": 0.0002723012939001848,
"loss": 0.3323,
"step": 1100
},
{
"epoch": 3.2967032967032965,
"grad_norm": 0.8082828521728516,
"learning_rate": 0.00026952865064695005,
"loss": 0.2749,
"step": 1200
},
{
"epoch": 3.2967032967032965,
"eval_loss": 0.34022244811058044,
"eval_runtime": 81.3749,
"eval_samples_per_second": 23.828,
"eval_steps_per_second": 2.986,
"eval_wer": 0.5196216216216216,
"step": 1200
},
{
"epoch": 3.571428571428571,
"grad_norm": 0.5922394394874573,
"learning_rate": 0.0002667560073937153,
"loss": 0.2681,
"step": 1300
},
{
"epoch": 3.8461538461538463,
"grad_norm": 0.7247134447097778,
"learning_rate": 0.00026398336414048054,
"loss": 0.2705,
"step": 1400
},
{
"epoch": 3.8461538461538463,
"eval_loss": 0.3283730447292328,
"eval_runtime": 86.9096,
"eval_samples_per_second": 22.311,
"eval_steps_per_second": 2.796,
"eval_wer": 0.5250270270270271,
"step": 1400
},
{
"epoch": 4.1208791208791204,
"grad_norm": 0.9202386140823364,
"learning_rate": 0.00026121072088724584,
"loss": 0.2516,
"step": 1500
},
{
"epoch": 4.395604395604396,
"grad_norm": 1.350644588470459,
"learning_rate": 0.0002584380776340111,
"loss": 0.249,
"step": 1600
},
{
"epoch": 4.395604395604396,
"eval_loss": 0.3498856723308563,
"eval_runtime": 81.8027,
"eval_samples_per_second": 23.703,
"eval_steps_per_second": 2.971,
"eval_wer": 0.5299459459459459,
"step": 1600
},
{
"epoch": 4.670329670329671,
"grad_norm": 1.1849825382232666,
"learning_rate": 0.00025566543438077633,
"loss": 0.2415,
"step": 1700
},
{
"epoch": 4.945054945054945,
"grad_norm": 1.1748567819595337,
"learning_rate": 0.0002528927911275416,
"loss": 0.2508,
"step": 1800
},
{
"epoch": 4.945054945054945,
"eval_loss": 0.35116615891456604,
"eval_runtime": 81.4977,
"eval_samples_per_second": 23.792,
"eval_steps_per_second": 2.982,
"eval_wer": 0.5582162162162162,
"step": 1800
},
{
"epoch": 5.21978021978022,
"grad_norm": 0.8483781814575195,
"learning_rate": 0.0002501201478743068,
"loss": 0.2172,
"step": 1900
},
{
"epoch": 5.4945054945054945,
"grad_norm": 0.7349032759666443,
"learning_rate": 0.00024734750462107206,
"loss": 0.2081,
"step": 2000
},
{
"epoch": 5.4945054945054945,
"eval_loss": 0.3216859996318817,
"eval_runtime": 81.8476,
"eval_samples_per_second": 23.69,
"eval_steps_per_second": 2.969,
"eval_wer": 0.4808108108108108,
"step": 2000
},
{
"epoch": 5.769230769230769,
"grad_norm": 0.5661817193031311,
"learning_rate": 0.0002445748613678373,
"loss": 0.2039,
"step": 2100
},
{
"epoch": 6.043956043956044,
"grad_norm": 0.5281690359115601,
"learning_rate": 0.00024180221811460255,
"loss": 0.2176,
"step": 2200
},
{
"epoch": 6.043956043956044,
"eval_loss": 0.3141148090362549,
"eval_runtime": 81.6241,
"eval_samples_per_second": 23.755,
"eval_steps_per_second": 2.977,
"eval_wer": 0.472,
"step": 2200
},
{
"epoch": 6.318681318681318,
"grad_norm": 0.5380920171737671,
"learning_rate": 0.0002390295748613678,
"loss": 0.1719,
"step": 2300
},
{
"epoch": 6.593406593406593,
"grad_norm": 0.6829392910003662,
"learning_rate": 0.0002362569316081331,
"loss": 0.1784,
"step": 2400
},
{
"epoch": 6.593406593406593,
"eval_loss": 0.29688435792922974,
"eval_runtime": 81.2312,
"eval_samples_per_second": 23.87,
"eval_steps_per_second": 2.991,
"eval_wer": 0.4669189189189189,
"step": 2400
},
{
"epoch": 6.868131868131869,
"grad_norm": 0.5835310220718384,
"learning_rate": 0.00023348428835489834,
"loss": 0.1842,
"step": 2500
},
{
"epoch": 7.142857142857143,
"grad_norm": 0.5060920715332031,
"learning_rate": 0.00023071164510166358,
"loss": 0.166,
"step": 2600
},
{
"epoch": 7.142857142857143,
"eval_loss": 0.3367214798927307,
"eval_runtime": 81.6166,
"eval_samples_per_second": 23.757,
"eval_steps_per_second": 2.977,
"eval_wer": 0.49135135135135133,
"step": 2600
},
{
"epoch": 7.417582417582418,
"grad_norm": 0.5145979523658752,
"learning_rate": 0.00022793900184842883,
"loss": 0.1562,
"step": 2700
},
{
"epoch": 7.6923076923076925,
"grad_norm": 0.8669461011886597,
"learning_rate": 0.00022516635859519407,
"loss": 0.157,
"step": 2800
},
{
"epoch": 7.6923076923076925,
"eval_loss": 0.32062390446662903,
"eval_runtime": 81.6437,
"eval_samples_per_second": 23.75,
"eval_steps_per_second": 2.976,
"eval_wer": 0.49027027027027026,
"step": 2800
},
{
"epoch": 7.967032967032967,
"grad_norm": 0.7915688753128052,
"learning_rate": 0.00022239371534195932,
"loss": 0.1602,
"step": 2900
},
{
"epoch": 8.241758241758241,
"grad_norm": 0.7078336477279663,
"learning_rate": 0.00021962107208872456,
"loss": 0.1398,
"step": 3000
},
{
"epoch": 8.241758241758241,
"eval_loss": 0.32599326968193054,
"eval_runtime": 81.2462,
"eval_samples_per_second": 23.866,
"eval_steps_per_second": 2.991,
"eval_wer": 0.4616756756756757,
"step": 3000
},
{
"epoch": 8.241758241758241,
"step": 3000,
"total_flos": 8.892175962497035e+18,
"train_loss": 0.32454829279581704,
"train_runtime": 5336.5113,
"train_samples_per_second": 32.701,
"train_steps_per_second": 2.046
}
],
"logging_steps": 100,
"max_steps": 10920,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 200,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 3
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8.892175962497035e+18,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}