{ "best_metric": 0.29688435792922974, "best_model_checkpoint": "/scratch/skscla001/results/w2v-bert-bem-genbed-combined-model/checkpoint-2400", "epoch": 8.241758241758241, "eval_steps": 200, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.27472527472527475, "grad_norm": 3.8470664024353027, "learning_rate": 0.00029699999999999996, "loss": 1.5247, "step": 100 }, { "epoch": 0.5494505494505495, "grad_norm": 4.275773048400879, "learning_rate": 0.00029725508317929756, "loss": 0.6407, "step": 200 }, { "epoch": 0.5494505494505495, "eval_loss": 0.6847326159477234, "eval_runtime": 81.9887, "eval_samples_per_second": 23.65, "eval_steps_per_second": 2.964, "eval_wer": 0.8380540540540541, "step": 200 }, { "epoch": 0.8241758241758241, "grad_norm": 4.533022403717041, "learning_rate": 0.0002944824399260628, "loss": 0.5465, "step": 300 }, { "epoch": 1.098901098901099, "grad_norm": 1.5235211849212646, "learning_rate": 0.00029170979667282804, "loss": 0.458, "step": 400 }, { "epoch": 1.098901098901099, "eval_loss": 0.4855729043483734, "eval_runtime": 81.2075, "eval_samples_per_second": 23.877, "eval_steps_per_second": 2.992, "eval_wer": 0.6787027027027027, "step": 400 }, { "epoch": 1.3736263736263736, "grad_norm": 1.3080778121948242, "learning_rate": 0.00028893715341959334, "loss": 0.4539, "step": 500 }, { "epoch": 1.6483516483516483, "grad_norm": 1.1141129732131958, "learning_rate": 0.0002861645101663586, "loss": 0.4014, "step": 600 }, { "epoch": 1.6483516483516483, "eval_loss": 0.4310147762298584, "eval_runtime": 81.4044, "eval_samples_per_second": 23.819, "eval_steps_per_second": 2.985, "eval_wer": 0.6258378378378379, "step": 600 }, { "epoch": 1.9230769230769231, "grad_norm": 1.3589787483215332, "learning_rate": 0.00028339186691312383, "loss": 0.4043, "step": 700 }, { "epoch": 2.197802197802198, "grad_norm": 1.0690594911575317, "learning_rate": 0.0002806192236598891, "loss": 0.3523, "step": 800 }, { "epoch": 2.197802197802198, "eval_loss": 0.36540287733078003, "eval_runtime": 81.4173, "eval_samples_per_second": 23.816, "eval_steps_per_second": 2.985, "eval_wer": 0.5421621621621622, "step": 800 }, { "epoch": 2.4725274725274726, "grad_norm": 1.0713083744049072, "learning_rate": 0.0002778465804066543, "loss": 0.3256, "step": 900 }, { "epoch": 2.7472527472527473, "grad_norm": 1.2681094408035278, "learning_rate": 0.00027507393715341956, "loss": 0.3298, "step": 1000 }, { "epoch": 2.7472527472527473, "eval_loss": 0.3534020483493805, "eval_runtime": 81.532, "eval_samples_per_second": 23.782, "eval_steps_per_second": 2.98, "eval_wer": 0.5373513513513514, "step": 1000 }, { "epoch": 3.021978021978022, "grad_norm": 1.4569008350372314, "learning_rate": 0.0002723012939001848, "loss": 0.3323, "step": 1100 }, { "epoch": 3.2967032967032965, "grad_norm": 0.8082828521728516, "learning_rate": 0.00026952865064695005, "loss": 0.2749, "step": 1200 }, { "epoch": 3.2967032967032965, "eval_loss": 0.34022244811058044, "eval_runtime": 81.3749, "eval_samples_per_second": 23.828, "eval_steps_per_second": 2.986, "eval_wer": 0.5196216216216216, "step": 1200 }, { "epoch": 3.571428571428571, "grad_norm": 0.5922394394874573, "learning_rate": 0.0002667560073937153, "loss": 0.2681, "step": 1300 }, { "epoch": 3.8461538461538463, "grad_norm": 0.7247134447097778, "learning_rate": 0.00026398336414048054, "loss": 0.2705, "step": 1400 }, { "epoch": 3.8461538461538463, "eval_loss": 0.3283730447292328, "eval_runtime": 86.9096, "eval_samples_per_second": 22.311, "eval_steps_per_second": 2.796, "eval_wer": 0.5250270270270271, "step": 1400 }, { "epoch": 4.1208791208791204, "grad_norm": 0.9202386140823364, "learning_rate": 0.00026121072088724584, "loss": 0.2516, "step": 1500 }, { "epoch": 4.395604395604396, "grad_norm": 1.350644588470459, "learning_rate": 0.0002584380776340111, "loss": 0.249, "step": 1600 }, { "epoch": 4.395604395604396, "eval_loss": 0.3498856723308563, "eval_runtime": 81.8027, "eval_samples_per_second": 23.703, "eval_steps_per_second": 2.971, "eval_wer": 0.5299459459459459, "step": 1600 }, { "epoch": 4.670329670329671, "grad_norm": 1.1849825382232666, "learning_rate": 0.00025566543438077633, "loss": 0.2415, "step": 1700 }, { "epoch": 4.945054945054945, "grad_norm": 1.1748567819595337, "learning_rate": 0.0002528927911275416, "loss": 0.2508, "step": 1800 }, { "epoch": 4.945054945054945, "eval_loss": 0.35116615891456604, "eval_runtime": 81.4977, "eval_samples_per_second": 23.792, "eval_steps_per_second": 2.982, "eval_wer": 0.5582162162162162, "step": 1800 }, { "epoch": 5.21978021978022, "grad_norm": 0.8483781814575195, "learning_rate": 0.0002501201478743068, "loss": 0.2172, "step": 1900 }, { "epoch": 5.4945054945054945, "grad_norm": 0.7349032759666443, "learning_rate": 0.00024734750462107206, "loss": 0.2081, "step": 2000 }, { "epoch": 5.4945054945054945, "eval_loss": 0.3216859996318817, "eval_runtime": 81.8476, "eval_samples_per_second": 23.69, "eval_steps_per_second": 2.969, "eval_wer": 0.4808108108108108, "step": 2000 }, { "epoch": 5.769230769230769, "grad_norm": 0.5661817193031311, "learning_rate": 0.0002445748613678373, "loss": 0.2039, "step": 2100 }, { "epoch": 6.043956043956044, "grad_norm": 0.5281690359115601, "learning_rate": 0.00024180221811460255, "loss": 0.2176, "step": 2200 }, { "epoch": 6.043956043956044, "eval_loss": 0.3141148090362549, "eval_runtime": 81.6241, "eval_samples_per_second": 23.755, "eval_steps_per_second": 2.977, "eval_wer": 0.472, "step": 2200 }, { "epoch": 6.318681318681318, "grad_norm": 0.5380920171737671, "learning_rate": 0.0002390295748613678, "loss": 0.1719, "step": 2300 }, { "epoch": 6.593406593406593, "grad_norm": 0.6829392910003662, "learning_rate": 0.0002362569316081331, "loss": 0.1784, "step": 2400 }, { "epoch": 6.593406593406593, "eval_loss": 0.29688435792922974, "eval_runtime": 81.2312, "eval_samples_per_second": 23.87, "eval_steps_per_second": 2.991, "eval_wer": 0.4669189189189189, "step": 2400 }, { "epoch": 6.868131868131869, "grad_norm": 0.5835310220718384, "learning_rate": 0.00023348428835489834, "loss": 0.1842, "step": 2500 }, { "epoch": 7.142857142857143, "grad_norm": 0.5060920715332031, "learning_rate": 0.00023071164510166358, "loss": 0.166, "step": 2600 }, { "epoch": 7.142857142857143, "eval_loss": 0.3367214798927307, "eval_runtime": 81.6166, "eval_samples_per_second": 23.757, "eval_steps_per_second": 2.977, "eval_wer": 0.49135135135135133, "step": 2600 }, { "epoch": 7.417582417582418, "grad_norm": 0.5145979523658752, "learning_rate": 0.00022793900184842883, "loss": 0.1562, "step": 2700 }, { "epoch": 7.6923076923076925, "grad_norm": 0.8669461011886597, "learning_rate": 0.00022516635859519407, "loss": 0.157, "step": 2800 }, { "epoch": 7.6923076923076925, "eval_loss": 0.32062390446662903, "eval_runtime": 81.6437, "eval_samples_per_second": 23.75, "eval_steps_per_second": 2.976, "eval_wer": 0.49027027027027026, "step": 2800 }, { "epoch": 7.967032967032967, "grad_norm": 0.7915688753128052, "learning_rate": 0.00022239371534195932, "loss": 0.1602, "step": 2900 }, { "epoch": 8.241758241758241, "grad_norm": 0.7078336477279663, "learning_rate": 0.00021962107208872456, "loss": 0.1398, "step": 3000 }, { "epoch": 8.241758241758241, "eval_loss": 0.32599326968193054, "eval_runtime": 81.2462, "eval_samples_per_second": 23.866, "eval_steps_per_second": 2.991, "eval_wer": 0.4616756756756757, "step": 3000 }, { "epoch": 8.241758241758241, "step": 3000, "total_flos": 8.892175962497035e+18, "train_loss": 0.32454829279581704, "train_runtime": 5336.5113, "train_samples_per_second": 32.701, "train_steps_per_second": 2.046 } ], "logging_steps": 100, "max_steps": 10920, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.892175962497035e+18, "train_batch_size": 8, "trial_name": null, "trial_params": null }