|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 60.0, |
|
"global_step": 6660, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 7.275e-06, |
|
"loss": 13.5875, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 1.4775e-05, |
|
"loss": 5.5556, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 2.2274999999999996e-05, |
|
"loss": 4.0171, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.9775e-05, |
|
"loss": 3.4219, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 3.3127081394195557, |
|
"eval_runtime": 73.7584, |
|
"eval_samples_per_second": 22.031, |
|
"eval_steps_per_second": 1.383, |
|
"eval_wer": 1.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 3.7275e-05, |
|
"loss": 3.2104, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 4.4775e-05, |
|
"loss": 3.152, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 5.227499999999999e-05, |
|
"loss": 3.0987, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 5.9774999999999996e-05, |
|
"loss": 3.0399, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 3.0330417156219482, |
|
"eval_runtime": 71.6796, |
|
"eval_samples_per_second": 22.67, |
|
"eval_steps_per_second": 1.423, |
|
"eval_wer": 1.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 6.7275e-05, |
|
"loss": 3.0035, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 7.477499999999999e-05, |
|
"loss": 2.7063, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 9.91, |
|
"learning_rate": 7.371466431095406e-05, |
|
"loss": 2.0137, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 7.238957597173144e-05, |
|
"loss": 1.5756, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"eval_loss": 0.6108289361000061, |
|
"eval_runtime": 70.7969, |
|
"eval_samples_per_second": 22.953, |
|
"eval_steps_per_second": 1.441, |
|
"eval_wer": 0.572425678586816, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 11.71, |
|
"learning_rate": 7.106448763250883e-05, |
|
"loss": 1.3527, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 12.61, |
|
"learning_rate": 6.973939929328621e-05, |
|
"loss": 1.2067, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 13.51, |
|
"learning_rate": 6.841431095406359e-05, |
|
"loss": 1.1508, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 6.708922261484098e-05, |
|
"loss": 1.0995, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"eval_loss": 0.30910709500312805, |
|
"eval_runtime": 70.2841, |
|
"eval_samples_per_second": 23.12, |
|
"eval_steps_per_second": 1.451, |
|
"eval_wer": 0.31538130116329166, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 6.576413427561838e-05, |
|
"loss": 1.0495, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 16.22, |
|
"learning_rate": 6.443904593639576e-05, |
|
"loss": 1.0183, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 17.12, |
|
"learning_rate": 6.311395759717314e-05, |
|
"loss": 0.9867, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"learning_rate": 6.178886925795053e-05, |
|
"loss": 0.9639, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.02, |
|
"eval_loss": 0.2596471905708313, |
|
"eval_runtime": 70.5333, |
|
"eval_samples_per_second": 23.039, |
|
"eval_steps_per_second": 1.446, |
|
"eval_wer": 0.2841016803102111, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 18.92, |
|
"learning_rate": 6.046378091872791e-05, |
|
"loss": 0.9383, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 19.82, |
|
"learning_rate": 5.913869257950529e-05, |
|
"loss": 0.9041, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 20.72, |
|
"learning_rate": 5.781360424028268e-05, |
|
"loss": 0.8936, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 21.62, |
|
"learning_rate": 5.648851590106007e-05, |
|
"loss": 0.9032, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 21.62, |
|
"eval_loss": 0.22702418267726898, |
|
"eval_runtime": 71.1235, |
|
"eval_samples_per_second": 22.848, |
|
"eval_steps_per_second": 1.434, |
|
"eval_wer": 0.2513571736320552, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 22.52, |
|
"learning_rate": 5.516342756183745e-05, |
|
"loss": 0.8739, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 23.42, |
|
"learning_rate": 5.383833922261484e-05, |
|
"loss": 0.8602, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 24.32, |
|
"learning_rate": 5.2513250883392223e-05, |
|
"loss": 0.8483, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 25.23, |
|
"learning_rate": 5.11881625441696e-05, |
|
"loss": 0.8145, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 25.23, |
|
"eval_loss": 0.21715673804283142, |
|
"eval_runtime": 70.4062, |
|
"eval_samples_per_second": 23.08, |
|
"eval_steps_per_second": 1.449, |
|
"eval_wer": 0.24834123222748816, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 26.13, |
|
"learning_rate": 4.986307420494699e-05, |
|
"loss": 0.8245, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 27.03, |
|
"learning_rate": 4.853798586572438e-05, |
|
"loss": 0.8085, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 27.93, |
|
"learning_rate": 4.721289752650177e-05, |
|
"loss": 0.8101, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 28.83, |
|
"learning_rate": 4.588780918727915e-05, |
|
"loss": 0.7845, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 28.83, |
|
"eval_loss": 0.2083793580532074, |
|
"eval_runtime": 70.1439, |
|
"eval_samples_per_second": 23.167, |
|
"eval_steps_per_second": 1.454, |
|
"eval_wer": 0.23326152520465315, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 29.73, |
|
"learning_rate": 4.4562720848056537e-05, |
|
"loss": 0.7816, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 30.63, |
|
"learning_rate": 4.3237632508833916e-05, |
|
"loss": 0.7655, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 31.53, |
|
"learning_rate": 4.19125441696113e-05, |
|
"loss": 0.762, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 32.43, |
|
"learning_rate": 4.058745583038869e-05, |
|
"loss": 0.7694, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 32.43, |
|
"eval_loss": 0.19744575023651123, |
|
"eval_runtime": 70.5594, |
|
"eval_samples_per_second": 23.03, |
|
"eval_steps_per_second": 1.446, |
|
"eval_wer": 0.22343817320120637, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 3.926236749116607e-05, |
|
"loss": 0.7517, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 34.23, |
|
"learning_rate": 3.7937279151943456e-05, |
|
"loss": 0.7401, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 35.14, |
|
"learning_rate": 3.661219081272084e-05, |
|
"loss": 0.7196, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 36.04, |
|
"learning_rate": 3.528710247349823e-05, |
|
"loss": 0.7333, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 36.04, |
|
"eval_loss": 0.20204411447048187, |
|
"eval_runtime": 69.6934, |
|
"eval_samples_per_second": 23.316, |
|
"eval_steps_per_second": 1.464, |
|
"eval_wer": 0.21852649719948297, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 36.94, |
|
"learning_rate": 3.3962014134275616e-05, |
|
"loss": 0.7251, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 37.84, |
|
"learning_rate": 3.2636925795053e-05, |
|
"loss": 0.7116, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 38.74, |
|
"learning_rate": 3.131183745583038e-05, |
|
"loss": 0.7236, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 39.64, |
|
"learning_rate": 2.998674911660777e-05, |
|
"loss": 0.693, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 39.64, |
|
"eval_loss": 0.194662407040596, |
|
"eval_runtime": 69.8883, |
|
"eval_samples_per_second": 23.251, |
|
"eval_steps_per_second": 1.459, |
|
"eval_wer": 0.21482119775958639, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 40.54, |
|
"learning_rate": 2.866166077738516e-05, |
|
"loss": 0.6943, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 41.44, |
|
"learning_rate": 2.7336572438162543e-05, |
|
"loss": 0.6938, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 42.34, |
|
"learning_rate": 2.6011484098939926e-05, |
|
"loss": 0.6831, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 43.24, |
|
"learning_rate": 2.469964664310954e-05, |
|
"loss": 0.6802, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 43.24, |
|
"eval_loss": 0.19601202011108398, |
|
"eval_runtime": 69.3985, |
|
"eval_samples_per_second": 23.416, |
|
"eval_steps_per_second": 1.47, |
|
"eval_wer": 0.2101680310211116, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 44.14, |
|
"learning_rate": 2.3374558303886924e-05, |
|
"loss": 0.6756, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 45.05, |
|
"learning_rate": 2.2049469964664307e-05, |
|
"loss": 0.67, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 45.95, |
|
"learning_rate": 2.0724381625441694e-05, |
|
"loss": 0.6753, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 46.85, |
|
"learning_rate": 1.939929328621908e-05, |
|
"loss": 0.667, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 46.85, |
|
"eval_loss": 0.19041335582733154, |
|
"eval_runtime": 70.1706, |
|
"eval_samples_per_second": 23.158, |
|
"eval_steps_per_second": 1.454, |
|
"eval_wer": 0.2072382593709608, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 47.75, |
|
"learning_rate": 1.8074204946996464e-05, |
|
"loss": 0.6562, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 48.65, |
|
"learning_rate": 1.674911660777385e-05, |
|
"loss": 0.666, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 49.55, |
|
"learning_rate": 1.5424028268551237e-05, |
|
"loss": 0.6572, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 50.45, |
|
"learning_rate": 1.409893992932862e-05, |
|
"loss": 0.6486, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 50.45, |
|
"eval_loss": 0.18806982040405273, |
|
"eval_runtime": 70.1099, |
|
"eval_samples_per_second": 23.178, |
|
"eval_steps_per_second": 1.455, |
|
"eval_wer": 0.2009478672985782, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 51.35, |
|
"learning_rate": 1.2773851590106007e-05, |
|
"loss": 0.6484, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 52.25, |
|
"learning_rate": 1.1448763250883392e-05, |
|
"loss": 0.6549, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 53.15, |
|
"learning_rate": 1.0123674911660777e-05, |
|
"loss": 0.6322, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 54.05, |
|
"learning_rate": 8.811837455830388e-06, |
|
"loss": 0.6339, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 54.05, |
|
"eval_loss": 0.1877446174621582, |
|
"eval_runtime": 70.1417, |
|
"eval_samples_per_second": 23.167, |
|
"eval_steps_per_second": 1.454, |
|
"eval_wer": 0.1988797931925894, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 54.95, |
|
"learning_rate": 7.486749116607773e-06, |
|
"loss": 0.6413, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 55.86, |
|
"learning_rate": 6.161660777385159e-06, |
|
"loss": 0.6392, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 56.76, |
|
"learning_rate": 4.836572438162544e-06, |
|
"loss": 0.6363, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 57.66, |
|
"learning_rate": 3.511484098939929e-06, |
|
"loss": 0.6254, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 57.66, |
|
"eval_loss": 0.18933533132076263, |
|
"eval_runtime": 69.6111, |
|
"eval_samples_per_second": 23.344, |
|
"eval_steps_per_second": 1.465, |
|
"eval_wer": 0.2002585092632486, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 58.56, |
|
"learning_rate": 2.1863957597173144e-06, |
|
"loss": 0.625, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 59.46, |
|
"learning_rate": 8.613074204946996e-07, |
|
"loss": 0.6379, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"step": 6660, |
|
"total_flos": 3.0269424793140363e+19, |
|
"train_loss": 1.3688410950852585, |
|
"train_runtime": 13291.4016, |
|
"train_samples_per_second": 16.016, |
|
"train_steps_per_second": 0.501 |
|
} |
|
], |
|
"max_steps": 6660, |
|
"num_train_epochs": 60, |
|
"total_flos": 3.0269424793140363e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|