|
{ |
|
"best_metric": 0.29688435792922974, |
|
"best_model_checkpoint": "/scratch/skscla001/results/w2v-bert-bem-genbed-combined-model/checkpoint-2400", |
|
"epoch": 8.241758241758241, |
|
"eval_steps": 200, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.27472527472527475, |
|
"grad_norm": 3.8470664024353027, |
|
"learning_rate": 0.00029699999999999996, |
|
"loss": 1.5247, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5494505494505495, |
|
"grad_norm": 4.275773048400879, |
|
"learning_rate": 0.00029725508317929756, |
|
"loss": 0.6407, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5494505494505495, |
|
"eval_loss": 0.6847326159477234, |
|
"eval_runtime": 81.9887, |
|
"eval_samples_per_second": 23.65, |
|
"eval_steps_per_second": 2.964, |
|
"eval_wer": 0.8380540540540541, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.8241758241758241, |
|
"grad_norm": 4.533022403717041, |
|
"learning_rate": 0.0002944824399260628, |
|
"loss": 0.5465, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.098901098901099, |
|
"grad_norm": 1.5235211849212646, |
|
"learning_rate": 0.00029170979667282804, |
|
"loss": 0.458, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.098901098901099, |
|
"eval_loss": 0.4855729043483734, |
|
"eval_runtime": 81.2075, |
|
"eval_samples_per_second": 23.877, |
|
"eval_steps_per_second": 2.992, |
|
"eval_wer": 0.6787027027027027, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.3736263736263736, |
|
"grad_norm": 1.3080778121948242, |
|
"learning_rate": 0.00028893715341959334, |
|
"loss": 0.4539, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.6483516483516483, |
|
"grad_norm": 1.1141129732131958, |
|
"learning_rate": 0.0002861645101663586, |
|
"loss": 0.4014, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.6483516483516483, |
|
"eval_loss": 0.4310147762298584, |
|
"eval_runtime": 81.4044, |
|
"eval_samples_per_second": 23.819, |
|
"eval_steps_per_second": 2.985, |
|
"eval_wer": 0.6258378378378379, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.9230769230769231, |
|
"grad_norm": 1.3589787483215332, |
|
"learning_rate": 0.00028339186691312383, |
|
"loss": 0.4043, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.197802197802198, |
|
"grad_norm": 1.0690594911575317, |
|
"learning_rate": 0.0002806192236598891, |
|
"loss": 0.3523, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.197802197802198, |
|
"eval_loss": 0.36540287733078003, |
|
"eval_runtime": 81.4173, |
|
"eval_samples_per_second": 23.816, |
|
"eval_steps_per_second": 2.985, |
|
"eval_wer": 0.5421621621621622, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.4725274725274726, |
|
"grad_norm": 1.0713083744049072, |
|
"learning_rate": 0.0002778465804066543, |
|
"loss": 0.3256, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.7472527472527473, |
|
"grad_norm": 1.2681094408035278, |
|
"learning_rate": 0.00027507393715341956, |
|
"loss": 0.3298, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.7472527472527473, |
|
"eval_loss": 0.3534020483493805, |
|
"eval_runtime": 81.532, |
|
"eval_samples_per_second": 23.782, |
|
"eval_steps_per_second": 2.98, |
|
"eval_wer": 0.5373513513513514, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 3.021978021978022, |
|
"grad_norm": 1.4569008350372314, |
|
"learning_rate": 0.0002723012939001848, |
|
"loss": 0.3323, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 3.2967032967032965, |
|
"grad_norm": 0.8082828521728516, |
|
"learning_rate": 0.00026952865064695005, |
|
"loss": 0.2749, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.2967032967032965, |
|
"eval_loss": 0.34022244811058044, |
|
"eval_runtime": 81.3749, |
|
"eval_samples_per_second": 23.828, |
|
"eval_steps_per_second": 2.986, |
|
"eval_wer": 0.5196216216216216, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.571428571428571, |
|
"grad_norm": 0.5922394394874573, |
|
"learning_rate": 0.0002667560073937153, |
|
"loss": 0.2681, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.8461538461538463, |
|
"grad_norm": 0.7247134447097778, |
|
"learning_rate": 0.00026398336414048054, |
|
"loss": 0.2705, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.8461538461538463, |
|
"eval_loss": 0.3283730447292328, |
|
"eval_runtime": 86.9096, |
|
"eval_samples_per_second": 22.311, |
|
"eval_steps_per_second": 2.796, |
|
"eval_wer": 0.5250270270270271, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 4.1208791208791204, |
|
"grad_norm": 0.9202386140823364, |
|
"learning_rate": 0.00026121072088724584, |
|
"loss": 0.2516, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 4.395604395604396, |
|
"grad_norm": 1.350644588470459, |
|
"learning_rate": 0.0002584380776340111, |
|
"loss": 0.249, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.395604395604396, |
|
"eval_loss": 0.3498856723308563, |
|
"eval_runtime": 81.8027, |
|
"eval_samples_per_second": 23.703, |
|
"eval_steps_per_second": 2.971, |
|
"eval_wer": 0.5299459459459459, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.670329670329671, |
|
"grad_norm": 1.1849825382232666, |
|
"learning_rate": 0.00025566543438077633, |
|
"loss": 0.2415, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 4.945054945054945, |
|
"grad_norm": 1.1748567819595337, |
|
"learning_rate": 0.0002528927911275416, |
|
"loss": 0.2508, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 4.945054945054945, |
|
"eval_loss": 0.35116615891456604, |
|
"eval_runtime": 81.4977, |
|
"eval_samples_per_second": 23.792, |
|
"eval_steps_per_second": 2.982, |
|
"eval_wer": 0.5582162162162162, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 5.21978021978022, |
|
"grad_norm": 0.8483781814575195, |
|
"learning_rate": 0.0002501201478743068, |
|
"loss": 0.2172, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 5.4945054945054945, |
|
"grad_norm": 0.7349032759666443, |
|
"learning_rate": 0.00024734750462107206, |
|
"loss": 0.2081, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.4945054945054945, |
|
"eval_loss": 0.3216859996318817, |
|
"eval_runtime": 81.8476, |
|
"eval_samples_per_second": 23.69, |
|
"eval_steps_per_second": 2.969, |
|
"eval_wer": 0.4808108108108108, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 5.769230769230769, |
|
"grad_norm": 0.5661817193031311, |
|
"learning_rate": 0.0002445748613678373, |
|
"loss": 0.2039, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 6.043956043956044, |
|
"grad_norm": 0.5281690359115601, |
|
"learning_rate": 0.00024180221811460255, |
|
"loss": 0.2176, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.043956043956044, |
|
"eval_loss": 0.3141148090362549, |
|
"eval_runtime": 81.6241, |
|
"eval_samples_per_second": 23.755, |
|
"eval_steps_per_second": 2.977, |
|
"eval_wer": 0.472, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 6.318681318681318, |
|
"grad_norm": 0.5380920171737671, |
|
"learning_rate": 0.0002390295748613678, |
|
"loss": 0.1719, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 6.593406593406593, |
|
"grad_norm": 0.6829392910003662, |
|
"learning_rate": 0.0002362569316081331, |
|
"loss": 0.1784, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.593406593406593, |
|
"eval_loss": 0.29688435792922974, |
|
"eval_runtime": 81.2312, |
|
"eval_samples_per_second": 23.87, |
|
"eval_steps_per_second": 2.991, |
|
"eval_wer": 0.4669189189189189, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 6.868131868131869, |
|
"grad_norm": 0.5835310220718384, |
|
"learning_rate": 0.00023348428835489834, |
|
"loss": 0.1842, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"grad_norm": 0.5060920715332031, |
|
"learning_rate": 0.00023071164510166358, |
|
"loss": 0.166, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.142857142857143, |
|
"eval_loss": 0.3367214798927307, |
|
"eval_runtime": 81.6166, |
|
"eval_samples_per_second": 23.757, |
|
"eval_steps_per_second": 2.977, |
|
"eval_wer": 0.49135135135135133, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 7.417582417582418, |
|
"grad_norm": 0.5145979523658752, |
|
"learning_rate": 0.00022793900184842883, |
|
"loss": 0.1562, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"grad_norm": 0.8669461011886597, |
|
"learning_rate": 0.00022516635859519407, |
|
"loss": 0.157, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.6923076923076925, |
|
"eval_loss": 0.32062390446662903, |
|
"eval_runtime": 81.6437, |
|
"eval_samples_per_second": 23.75, |
|
"eval_steps_per_second": 2.976, |
|
"eval_wer": 0.49027027027027026, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 7.967032967032967, |
|
"grad_norm": 0.7915688753128052, |
|
"learning_rate": 0.00022239371534195932, |
|
"loss": 0.1602, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 8.241758241758241, |
|
"grad_norm": 0.7078336477279663, |
|
"learning_rate": 0.00021962107208872456, |
|
"loss": 0.1398, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.241758241758241, |
|
"eval_loss": 0.32599326968193054, |
|
"eval_runtime": 81.2462, |
|
"eval_samples_per_second": 23.866, |
|
"eval_steps_per_second": 2.991, |
|
"eval_wer": 0.4616756756756757, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 8.241758241758241, |
|
"step": 3000, |
|
"total_flos": 8.892175962497035e+18, |
|
"train_loss": 0.32454829279581704, |
|
"train_runtime": 5336.5113, |
|
"train_samples_per_second": 32.701, |
|
"train_steps_per_second": 2.046 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 10920, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 200, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 3 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.892175962497035e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|