|
{ |
|
"best_metric": 0.030572160546541417, |
|
"best_model_checkpoint": "Checkpoints/smallnew-checkpoints-ZLSPU/checkpoint-3545", |
|
"epoch": 5.0, |
|
"eval_steps": 1000, |
|
"global_step": 3545, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03526093088857546, |
|
"grad_norm": 59.2584342956543, |
|
"learning_rate": 2.820874471086037e-07, |
|
"loss": 6.028, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.07052186177715092, |
|
"grad_norm": 34.53611373901367, |
|
"learning_rate": 6.346967559943583e-07, |
|
"loss": 5.2721, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10578279266572638, |
|
"grad_norm": 44.954463958740234, |
|
"learning_rate": 9.590973201692525e-07, |
|
"loss": 4.3355, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.14104372355430184, |
|
"grad_norm": 24.87679672241211, |
|
"learning_rate": 1.311706629055007e-06, |
|
"loss": 3.7609, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1763046544428773, |
|
"grad_norm": 19.462013244628906, |
|
"learning_rate": 1.6643159379407616e-06, |
|
"loss": 3.0261, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.21156558533145275, |
|
"grad_norm": 18.697181701660156, |
|
"learning_rate": 2.016925246826516e-06, |
|
"loss": 2.367, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2468265162200282, |
|
"grad_norm": 20.409456253051758, |
|
"learning_rate": 2.369534555712271e-06, |
|
"loss": 2.0031, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.2820874471086037, |
|
"grad_norm": 16.5858097076416, |
|
"learning_rate": 2.7221438645980256e-06, |
|
"loss": 1.7145, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.31734837799717913, |
|
"grad_norm": 17.581275939941406, |
|
"learning_rate": 3.0747531734837804e-06, |
|
"loss": 1.572, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3526093088857546, |
|
"grad_norm": 13.090187072753906, |
|
"learning_rate": 3.427362482369535e-06, |
|
"loss": 1.447, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.38787023977433005, |
|
"grad_norm": 20.531681060791016, |
|
"learning_rate": 3.77997179125529e-06, |
|
"loss": 1.5685, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4231311706629055, |
|
"grad_norm": 14.029434204101562, |
|
"learning_rate": 4.132581100141044e-06, |
|
"loss": 1.3196, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.45839210155148097, |
|
"grad_norm": 14.186537742614746, |
|
"learning_rate": 4.4851904090267985e-06, |
|
"loss": 1.2374, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4936530324400564, |
|
"grad_norm": 16.276357650756836, |
|
"learning_rate": 4.837799717912554e-06, |
|
"loss": 1.1746, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5289139633286318, |
|
"grad_norm": 11.198856353759766, |
|
"learning_rate": 5.190409026798307e-06, |
|
"loss": 1.0245, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.5641748942172073, |
|
"grad_norm": 17.945167541503906, |
|
"learning_rate": 5.543018335684062e-06, |
|
"loss": 0.9595, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5994358251057827, |
|
"grad_norm": 16.26451873779297, |
|
"learning_rate": 5.895627644569817e-06, |
|
"loss": 0.8691, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.6346967559943583, |
|
"grad_norm": 13.700907707214355, |
|
"learning_rate": 6.248236953455572e-06, |
|
"loss": 0.8488, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6699576868829337, |
|
"grad_norm": 18.16777992248535, |
|
"learning_rate": 6.600846262341326e-06, |
|
"loss": 0.6322, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.7052186177715092, |
|
"grad_norm": 11.960015296936035, |
|
"learning_rate": 6.953455571227081e-06, |
|
"loss": 0.4834, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7404795486600846, |
|
"grad_norm": 10.09420394897461, |
|
"learning_rate": 7.306064880112836e-06, |
|
"loss": 0.477, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.7757404795486601, |
|
"grad_norm": 11.922008514404297, |
|
"learning_rate": 7.658674188998591e-06, |
|
"loss": 0.4654, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8110014104372355, |
|
"grad_norm": 11.810051918029785, |
|
"learning_rate": 8.011283497884344e-06, |
|
"loss": 0.4597, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.846262341325811, |
|
"grad_norm": 14.17561149597168, |
|
"learning_rate": 8.3638928067701e-06, |
|
"loss": 0.4481, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8815232722143864, |
|
"grad_norm": 10.79333782196045, |
|
"learning_rate": 8.716502115655853e-06, |
|
"loss": 0.4451, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.9167842031029619, |
|
"grad_norm": 11.658808708190918, |
|
"learning_rate": 9.069111424541608e-06, |
|
"loss": 0.4993, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.9520451339915373, |
|
"grad_norm": 11.166906356811523, |
|
"learning_rate": 9.421720733427363e-06, |
|
"loss": 0.4361, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.9873060648801129, |
|
"grad_norm": 11.330738067626953, |
|
"learning_rate": 9.774330042313119e-06, |
|
"loss": 0.4316, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.09430261701345444, |
|
"eval_runtime": 330.7202, |
|
"eval_samples_per_second": 0.952, |
|
"eval_steps_per_second": 0.478, |
|
"eval_wer": 0.035866780529461996, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.0225669957686883, |
|
"grad_norm": 12.845878601074219, |
|
"learning_rate": 9.99995091525116e-06, |
|
"loss": 0.3581, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.0578279266572637, |
|
"grad_norm": 10.885231018066406, |
|
"learning_rate": 9.99929949706731e-06, |
|
"loss": 0.4274, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0930888575458393, |
|
"grad_norm": 10.285490036010742, |
|
"learning_rate": 9.997890712637366e-06, |
|
"loss": 0.3938, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.1283497884344147, |
|
"grad_norm": 9.70152759552002, |
|
"learning_rate": 9.995724775384384e-06, |
|
"loss": 0.4225, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.16361071932299, |
|
"grad_norm": 16.16654396057129, |
|
"learning_rate": 9.992802013435885e-06, |
|
"loss": 0.4114, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.1988716502115655, |
|
"grad_norm": 11.366144180297852, |
|
"learning_rate": 9.989122869574155e-06, |
|
"loss": 0.3886, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.2341325811001411, |
|
"grad_norm": 8.861817359924316, |
|
"learning_rate": 9.984687901169156e-06, |
|
"loss": 0.3825, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.2693935119887165, |
|
"grad_norm": 8.017960548400879, |
|
"learning_rate": 9.979497780094093e-06, |
|
"loss": 0.3545, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.304654442877292, |
|
"grad_norm": 9.310171127319336, |
|
"learning_rate": 9.973553292623627e-06, |
|
"loss": 0.4492, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.3399153737658676, |
|
"grad_norm": 10.03092098236084, |
|
"learning_rate": 9.966855339314756e-06, |
|
"loss": 0.3924, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.375176304654443, |
|
"grad_norm": 12.273679733276367, |
|
"learning_rate": 9.959404934870393e-06, |
|
"loss": 0.3266, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.4104372355430184, |
|
"grad_norm": 8.787628173828125, |
|
"learning_rate": 9.95120320798564e-06, |
|
"loss": 0.4079, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.4456981664315938, |
|
"grad_norm": 12.66067886352539, |
|
"learning_rate": 9.942251401176792e-06, |
|
"loss": 0.369, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.4809590973201692, |
|
"grad_norm": 8.999523162841797, |
|
"learning_rate": 9.932550870593111e-06, |
|
"loss": 0.3972, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.5162200282087448, |
|
"grad_norm": 23.835445404052734, |
|
"learning_rate": 9.92210308581137e-06, |
|
"loss": 0.3846, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.5514809590973202, |
|
"grad_norm": 11.772904396057129, |
|
"learning_rate": 9.91090962961323e-06, |
|
"loss": 0.4238, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.5867418899858956, |
|
"grad_norm": 12.955037117004395, |
|
"learning_rate": 9.898972197745443e-06, |
|
"loss": 0.3488, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.6220028208744712, |
|
"grad_norm": 6.229507923126221, |
|
"learning_rate": 9.886292598662969e-06, |
|
"loss": 0.384, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.6572637517630464, |
|
"grad_norm": 11.097539901733398, |
|
"learning_rate": 9.872872753254996e-06, |
|
"loss": 0.3802, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.692524682651622, |
|
"grad_norm": 12.698493957519531, |
|
"learning_rate": 9.858714694553941e-06, |
|
"loss": 0.3506, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.7277856135401974, |
|
"grad_norm": 9.563314437866211, |
|
"learning_rate": 9.843820567427456e-06, |
|
"loss": 0.4094, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.7630465444287728, |
|
"grad_norm": 10.313949584960938, |
|
"learning_rate": 9.828192628253492e-06, |
|
"loss": 0.3782, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.7983074753173485, |
|
"grad_norm": 12.121296882629395, |
|
"learning_rate": 9.811833244578466e-06, |
|
"loss": 0.3521, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.8335684062059239, |
|
"grad_norm": 12.784782409667969, |
|
"learning_rate": 9.7947448947586e-06, |
|
"loss": 0.381, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.8688293370944993, |
|
"grad_norm": 10.644037246704102, |
|
"learning_rate": 9.776930167584457e-06, |
|
"loss": 0.3254, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.904090267983075, |
|
"grad_norm": 8.965851783752441, |
|
"learning_rate": 9.758391761888753e-06, |
|
"loss": 0.2973, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.93935119887165, |
|
"grad_norm": 10.444358825683594, |
|
"learning_rate": 9.7391324861375e-06, |
|
"loss": 0.3182, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.9746121297602257, |
|
"grad_norm": 8.695629119873047, |
|
"learning_rate": 9.719155258004542e-06, |
|
"loss": 0.33, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 0.06297292560338974, |
|
"eval_runtime": 331.9645, |
|
"eval_samples_per_second": 0.949, |
|
"eval_steps_per_second": 0.476, |
|
"eval_wer": 0.03125533731853117, |
|
"step": 1418 |
|
}, |
|
{ |
|
"epoch": 2.0098730606488013, |
|
"grad_norm": 8.408599853515625, |
|
"learning_rate": 9.698463103929542e-06, |
|
"loss": 0.3357, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 2.0451339915373765, |
|
"grad_norm": 8.19851016998291, |
|
"learning_rate": 9.677059158659492e-06, |
|
"loss": 0.3128, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.080394922425952, |
|
"grad_norm": 13.286957740783691, |
|
"learning_rate": 9.654946664773812e-06, |
|
"loss": 0.2853, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 2.1156558533145273, |
|
"grad_norm": 7.759203910827637, |
|
"learning_rate": 9.63212897219313e-06, |
|
"loss": 0.3004, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.150916784203103, |
|
"grad_norm": 9.274896621704102, |
|
"learning_rate": 9.608609537671767e-06, |
|
"loss": 0.3399, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 2.1861777150916786, |
|
"grad_norm": 6.893747806549072, |
|
"learning_rate": 9.584391924274083e-06, |
|
"loss": 0.281, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.2214386459802538, |
|
"grad_norm": 13.223069190979004, |
|
"learning_rate": 9.559479800834672e-06, |
|
"loss": 0.3175, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 2.2566995768688294, |
|
"grad_norm": 9.873945236206055, |
|
"learning_rate": 9.533876941402563e-06, |
|
"loss": 0.3431, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.2919605077574046, |
|
"grad_norm": 8.700020790100098, |
|
"learning_rate": 9.50758722466947e-06, |
|
"loss": 0.3036, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.32722143864598, |
|
"grad_norm": 7.030035018920898, |
|
"learning_rate": 9.480614633382192e-06, |
|
"loss": 0.3075, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.362482369534556, |
|
"grad_norm": 10.410675048828125, |
|
"learning_rate": 9.452963253739254e-06, |
|
"loss": 0.3399, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.397743300423131, |
|
"grad_norm": 12.658001899719238, |
|
"learning_rate": 9.424637274771854e-06, |
|
"loss": 0.3209, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.4330042313117066, |
|
"grad_norm": 8.876688003540039, |
|
"learning_rate": 9.395640987709275e-06, |
|
"loss": 0.3369, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.4682651622002822, |
|
"grad_norm": 9.614097595214844, |
|
"learning_rate": 9.36597878532876e-06, |
|
"loss": 0.3215, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.5035260930888574, |
|
"grad_norm": 10.007596015930176, |
|
"learning_rate": 9.335655161290047e-06, |
|
"loss": 0.2846, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.538787023977433, |
|
"grad_norm": 10.346504211425781, |
|
"learning_rate": 9.304674709454601e-06, |
|
"loss": 0.2765, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.5740479548660087, |
|
"grad_norm": 9.90110969543457, |
|
"learning_rate": 9.273042123189669e-06, |
|
"loss": 0.3337, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 2.609308885754584, |
|
"grad_norm": 7.371924877166748, |
|
"learning_rate": 9.240762194657254e-06, |
|
"loss": 0.2827, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.6445698166431595, |
|
"grad_norm": 10.066449165344238, |
|
"learning_rate": 9.207839814088145e-06, |
|
"loss": 0.3301, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.679830747531735, |
|
"grad_norm": 11.040672302246094, |
|
"learning_rate": 9.174279969041054e-06, |
|
"loss": 0.2725, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.7150916784203103, |
|
"grad_norm": 12.25363540649414, |
|
"learning_rate": 9.140087743647045e-06, |
|
"loss": 0.3398, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.750352609308886, |
|
"grad_norm": 8.830925941467285, |
|
"learning_rate": 9.10526831783931e-06, |
|
"loss": 0.2997, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.785613540197461, |
|
"grad_norm": 9.66303539276123, |
|
"learning_rate": 9.069826966568434e-06, |
|
"loss": 0.3317, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.8208744710860367, |
|
"grad_norm": 10.275798797607422, |
|
"learning_rate": 9.033769059003272e-06, |
|
"loss": 0.3228, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.856135401974612, |
|
"grad_norm": 10.289546012878418, |
|
"learning_rate": 8.997100057717558e-06, |
|
"loss": 0.2757, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 2.8913963328631875, |
|
"grad_norm": 9.519564628601074, |
|
"learning_rate": 8.959825517862337e-06, |
|
"loss": 0.2814, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.926657263751763, |
|
"grad_norm": 6.923116207122803, |
|
"learning_rate": 8.92195108632441e-06, |
|
"loss": 0.3032, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.9619181946403383, |
|
"grad_norm": 8.474822044372559, |
|
"learning_rate": 8.883482500870847e-06, |
|
"loss": 0.2851, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.997179125528914, |
|
"grad_norm": 10.44246768951416, |
|
"learning_rate": 8.844425589279757e-06, |
|
"loss": 0.2835, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.05503656342625618, |
|
"eval_runtime": 332.8642, |
|
"eval_samples_per_second": 0.946, |
|
"eval_steps_per_second": 0.475, |
|
"eval_wer": 0.038770281810418444, |
|
"step": 2127 |
|
}, |
|
{ |
|
"epoch": 3.0324400564174896, |
|
"grad_norm": 18.03895378112793, |
|
"learning_rate": 8.804786268457402e-06, |
|
"loss": 0.2404, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 3.0677009873060648, |
|
"grad_norm": 10.602093696594238, |
|
"learning_rate": 8.764570543541829e-06, |
|
"loss": 0.2813, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 3.1029619181946404, |
|
"grad_norm": 7.453354835510254, |
|
"learning_rate": 8.723784506993116e-06, |
|
"loss": 0.265, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.138222849083216, |
|
"grad_norm": 13.981342315673828, |
|
"learning_rate": 8.682434337670408e-06, |
|
"loss": 0.259, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 3.173483779971791, |
|
"grad_norm": 9.910590171813965, |
|
"learning_rate": 8.640526299895838e-06, |
|
"loss": 0.2731, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 3.208744710860367, |
|
"grad_norm": 9.800156593322754, |
|
"learning_rate": 8.598066742505533e-06, |
|
"loss": 0.2825, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 3.244005641748942, |
|
"grad_norm": 8.811142921447754, |
|
"learning_rate": 8.555062097887796e-06, |
|
"loss": 0.2707, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 3.2792665726375176, |
|
"grad_norm": 11.076090812683105, |
|
"learning_rate": 8.511518881008625e-06, |
|
"loss": 0.2356, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 3.3145275035260933, |
|
"grad_norm": 10.804423332214355, |
|
"learning_rate": 8.467443688424748e-06, |
|
"loss": 0.3101, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 3.3497884344146684, |
|
"grad_norm": 8.413751602172852, |
|
"learning_rate": 8.422843197284264e-06, |
|
"loss": 0.2527, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 3.385049365303244, |
|
"grad_norm": 9.56535530090332, |
|
"learning_rate": 8.377724164315107e-06, |
|
"loss": 0.3021, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.4203102961918193, |
|
"grad_norm": 8.586977005004883, |
|
"learning_rate": 8.332093424801428e-06, |
|
"loss": 0.2837, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 3.455571227080395, |
|
"grad_norm": 13.055554389953613, |
|
"learning_rate": 8.285957891548094e-06, |
|
"loss": 0.2633, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.4908321579689705, |
|
"grad_norm": 7.521981716156006, |
|
"learning_rate": 8.239324553833434e-06, |
|
"loss": 0.2264, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 3.5260930888575457, |
|
"grad_norm": 7.147644519805908, |
|
"learning_rate": 8.192200476350402e-06, |
|
"loss": 0.2874, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 3.5613540197461213, |
|
"grad_norm": 10.37299919128418, |
|
"learning_rate": 8.14459279813631e-06, |
|
"loss": 0.2449, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 3.596614950634697, |
|
"grad_norm": 9.178033828735352, |
|
"learning_rate": 8.096508731491317e-06, |
|
"loss": 0.2983, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 3.631875881523272, |
|
"grad_norm": 5.560033321380615, |
|
"learning_rate": 8.04795556088579e-06, |
|
"loss": 0.287, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 3.6671368124118477, |
|
"grad_norm": 8.004313468933105, |
|
"learning_rate": 7.998940641856756e-06, |
|
"loss": 0.2452, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.7023977433004234, |
|
"grad_norm": 12.359275817871094, |
|
"learning_rate": 7.949471399893577e-06, |
|
"loss": 0.281, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 3.7376586741889986, |
|
"grad_norm": 7.848395824432373, |
|
"learning_rate": 7.899555329313022e-06, |
|
"loss": 0.2596, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 3.772919605077574, |
|
"grad_norm": 9.536077499389648, |
|
"learning_rate": 7.84919999212394e-06, |
|
"loss": 0.2787, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 3.8081805359661494, |
|
"grad_norm": 11.713583946228027, |
|
"learning_rate": 7.79841301688163e-06, |
|
"loss": 0.296, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 3.843441466854725, |
|
"grad_norm": 11.016488075256348, |
|
"learning_rate": 7.74720209753218e-06, |
|
"loss": 0.2367, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 3.8787023977433, |
|
"grad_norm": 6.952511787414551, |
|
"learning_rate": 7.69557499224686e-06, |
|
"loss": 0.2599, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 3.913963328631876, |
|
"grad_norm": 9.327037811279297, |
|
"learning_rate": 7.643539522246816e-06, |
|
"loss": 0.2862, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 3.9492242595204514, |
|
"grad_norm": 12.434979438781738, |
|
"learning_rate": 7.591103570618188e-06, |
|
"loss": 0.2488, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.9844851904090266, |
|
"grad_norm": 9.145892143249512, |
|
"learning_rate": 7.538275081117868e-06, |
|
"loss": 0.3211, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 0.05180150642991066, |
|
"eval_runtime": 330.3815, |
|
"eval_samples_per_second": 0.953, |
|
"eval_steps_per_second": 0.478, |
|
"eval_wer": 0.031938514090520925, |
|
"step": 2836 |
|
}, |
|
{ |
|
"epoch": 4.019746121297603, |
|
"grad_norm": 7.585170269012451, |
|
"learning_rate": 7.485062056970071e-06, |
|
"loss": 0.2148, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 4.055007052186178, |
|
"grad_norm": 7.359046459197998, |
|
"learning_rate": 7.431472559653883e-06, |
|
"loss": 0.2343, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 4.090267983074753, |
|
"grad_norm": 3.967226505279541, |
|
"learning_rate": 7.377514707681993e-06, |
|
"loss": 0.2115, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 4.125528913963328, |
|
"grad_norm": 12.629058837890625, |
|
"learning_rate": 7.323196675370788e-06, |
|
"loss": 0.2376, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 4.160789844851904, |
|
"grad_norm": 11.459778785705566, |
|
"learning_rate": 7.268526691601983e-06, |
|
"loss": 0.2464, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 4.1960507757404795, |
|
"grad_norm": 13.925875663757324, |
|
"learning_rate": 7.213513038575999e-06, |
|
"loss": 0.272, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 4.231311706629055, |
|
"grad_norm": 9.624604225158691, |
|
"learning_rate": 7.158164050557244e-06, |
|
"loss": 0.2604, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 4.266572637517631, |
|
"grad_norm": 9.38764476776123, |
|
"learning_rate": 7.102488112611525e-06, |
|
"loss": 0.2413, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 4.301833568406206, |
|
"grad_norm": 8.488667488098145, |
|
"learning_rate": 7.046493659335762e-06, |
|
"loss": 0.2741, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 4.337094499294781, |
|
"grad_norm": 7.635845184326172, |
|
"learning_rate": 6.9901891735801795e-06, |
|
"loss": 0.2515, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 4.372355430183357, |
|
"grad_norm": 9.501697540283203, |
|
"learning_rate": 6.933583185163215e-06, |
|
"loss": 0.2537, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 4.407616361071932, |
|
"grad_norm": 12.442976951599121, |
|
"learning_rate": 6.876684269579291e-06, |
|
"loss": 0.2435, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 4.4428772919605075, |
|
"grad_norm": 9.078912734985352, |
|
"learning_rate": 6.819501046699677e-06, |
|
"loss": 0.2132, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 4.478138222849084, |
|
"grad_norm": 10.42381763458252, |
|
"learning_rate": 6.762042179466628e-06, |
|
"loss": 0.2596, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 4.513399153737659, |
|
"grad_norm": 9.87898063659668, |
|
"learning_rate": 6.70431637258099e-06, |
|
"loss": 0.2426, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.548660084626234, |
|
"grad_norm": 8.841654777526855, |
|
"learning_rate": 6.646332371183498e-06, |
|
"loss": 0.208, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 4.583921015514809, |
|
"grad_norm": 7.4318389892578125, |
|
"learning_rate": 6.588098959529927e-06, |
|
"loss": 0.2186, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 4.619181946403385, |
|
"grad_norm": 8.87193775177002, |
|
"learning_rate": 6.529624959660324e-06, |
|
"loss": 0.2561, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 4.65444287729196, |
|
"grad_norm": 8.252178192138672, |
|
"learning_rate": 6.4709192300625236e-06, |
|
"loss": 0.2173, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 4.689703808180536, |
|
"grad_norm": 10.684964179992676, |
|
"learning_rate": 6.411990664330133e-06, |
|
"loss": 0.2315, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 4.724964739069112, |
|
"grad_norm": 9.784955024719238, |
|
"learning_rate": 6.352848189815204e-06, |
|
"loss": 0.2493, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 4.760225669957687, |
|
"grad_norm": 6.553511142730713, |
|
"learning_rate": 6.293500766275787e-06, |
|
"loss": 0.2416, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 4.795486600846262, |
|
"grad_norm": 6.237346649169922, |
|
"learning_rate": 6.23395738451857e-06, |
|
"loss": 0.199, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.830747531734838, |
|
"grad_norm": 8.595192909240723, |
|
"learning_rate": 6.1742270650368395e-06, |
|
"loss": 0.2025, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 4.866008462623413, |
|
"grad_norm": 8.099990844726562, |
|
"learning_rate": 6.1143188566439075e-06, |
|
"loss": 0.2262, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 4.901269393511988, |
|
"grad_norm": 7.406097412109375, |
|
"learning_rate": 6.054241835102271e-06, |
|
"loss": 0.2082, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 4.9365303244005645, |
|
"grad_norm": 11.898475646972656, |
|
"learning_rate": 5.994005101748691e-06, |
|
"loss": 0.2605, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 4.97179125528914, |
|
"grad_norm": 10.15351676940918, |
|
"learning_rate": 5.933617782115382e-06, |
|
"loss": 0.2751, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 0.046728044748306274, |
|
"eval_runtime": 364.0425, |
|
"eval_samples_per_second": 0.865, |
|
"eval_steps_per_second": 0.434, |
|
"eval_wer": 0.030572160546541417, |
|
"step": 3545 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 7090, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 1000, |
|
"total_flos": 1.4467074564096e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|