{ "best_metric": 0.046749928297655986, "best_model_checkpoint": "./whisper-large-v3-turbo-finetuned-lora/checkpoint-1000", "epoch": 6.0, "eval_steps": 250, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "grad_norm": 11.320270538330078, "learning_rate": 1.25e-06, "loss": 3.7364, "step": 5 }, { "epoch": 0.04, "grad_norm": 10.51279354095459, "learning_rate": 2.8124999999999998e-06, "loss": 3.669, "step": 10 }, { "epoch": 0.06, "grad_norm": 10.274462699890137, "learning_rate": 4.3750000000000005e-06, "loss": 3.6416, "step": 15 }, { "epoch": 0.08, "grad_norm": 10.032905578613281, "learning_rate": 5.9375e-06, "loss": 3.5981, "step": 20 }, { "epoch": 0.1, "grad_norm": 9.448946952819824, "learning_rate": 7.1875e-06, "loss": 3.4937, "step": 25 }, { "epoch": 0.12, "grad_norm": 8.844466209411621, "learning_rate": 8.750000000000001e-06, "loss": 3.3145, "step": 30 }, { "epoch": 0.14, "grad_norm": 8.622856140136719, "learning_rate": 1.03125e-05, "loss": 3.0978, "step": 35 }, { "epoch": 0.16, "grad_norm": 6.81293249130249, "learning_rate": 1.1875e-05, "loss": 2.9583, "step": 40 }, { "epoch": 0.18, "grad_norm": 5.21970272064209, "learning_rate": 1.34375e-05, "loss": 2.7668, "step": 45 }, { "epoch": 0.2, "grad_norm": 4.440727710723877, "learning_rate": 1.5e-05, "loss": 2.5467, "step": 50 }, { "epoch": 0.22, "grad_norm": 4.219883918762207, "learning_rate": 1.6562500000000003e-05, "loss": 2.3237, "step": 55 }, { "epoch": 0.24, "grad_norm": 3.6006925106048584, "learning_rate": 1.8125e-05, "loss": 2.1307, "step": 60 }, { "epoch": 0.26, "grad_norm": 3.07859206199646, "learning_rate": 1.96875e-05, "loss": 1.9725, "step": 65 }, { "epoch": 0.28, "grad_norm": 3.0515847206115723, "learning_rate": 2.125e-05, "loss": 1.8568, "step": 70 }, { "epoch": 0.3, "grad_norm": 2.7134475708007812, "learning_rate": 2.28125e-05, "loss": 1.6583, "step": 75 }, { "epoch": 0.32, "grad_norm": 2.7643449306488037, "learning_rate": 2.4375000000000003e-05, "loss": 1.4433, "step": 80 }, { "epoch": 0.34, "grad_norm": 2.427220582962036, "learning_rate": 2.59375e-05, "loss": 1.3435, "step": 85 }, { "epoch": 0.36, "grad_norm": 2.0288472175598145, "learning_rate": 2.75e-05, "loss": 1.2446, "step": 90 }, { "epoch": 0.38, "grad_norm": 2.7967100143432617, "learning_rate": 2.90625e-05, "loss": 1.1031, "step": 95 }, { "epoch": 0.4, "grad_norm": 2.407944440841675, "learning_rate": 3.0625e-05, "loss": 0.9777, "step": 100 }, { "epoch": 0.42, "grad_norm": 3.4412190914154053, "learning_rate": 3.21875e-05, "loss": 0.9294, "step": 105 }, { "epoch": 0.44, "grad_norm": 2.0439155101776123, "learning_rate": 3.375e-05, "loss": 0.8108, "step": 110 }, { "epoch": 0.46, "grad_norm": 1.616652011871338, "learning_rate": 3.53125e-05, "loss": 0.6951, "step": 115 }, { "epoch": 0.48, "grad_norm": 1.690824031829834, "learning_rate": 3.6875e-05, "loss": 0.6352, "step": 120 }, { "epoch": 0.5, "grad_norm": 2.7902116775512695, "learning_rate": 3.84375e-05, "loss": 0.5562, "step": 125 }, { "epoch": 0.52, "grad_norm": 2.4872801303863525, "learning_rate": 4e-05, "loss": 0.5111, "step": 130 }, { "epoch": 0.54, "grad_norm": 2.4177122116088867, "learning_rate": 4.15625e-05, "loss": 0.4696, "step": 135 }, { "epoch": 0.56, "grad_norm": 2.0231056213378906, "learning_rate": 4.3125e-05, "loss": 0.4462, "step": 140 }, { "epoch": 0.58, "grad_norm": 2.004688024520874, "learning_rate": 4.46875e-05, "loss": 0.4224, "step": 145 }, { "epoch": 0.6, "grad_norm": 3.170652389526367, "learning_rate": 4.625e-05, "loss": 0.3967, "step": 150 }, { "epoch": 0.62, "grad_norm": 1.8620476722717285, "learning_rate": 4.7812500000000003e-05, "loss": 0.3739, "step": 155 }, { "epoch": 0.64, "grad_norm": 2.4667856693267822, "learning_rate": 4.9375e-05, "loss": 0.3542, "step": 160 }, { "epoch": 0.66, "grad_norm": 1.7935612201690674, "learning_rate": 5.09375e-05, "loss": 0.3409, "step": 165 }, { "epoch": 0.68, "grad_norm": 1.6230987310409546, "learning_rate": 5.25e-05, "loss": 0.3068, "step": 170 }, { "epoch": 0.7, "grad_norm": 2.739957094192505, "learning_rate": 5.40625e-05, "loss": 0.2963, "step": 175 }, { "epoch": 0.72, "grad_norm": 1.7342944145202637, "learning_rate": 5.5625000000000004e-05, "loss": 0.253, "step": 180 }, { "epoch": 0.74, "grad_norm": 2.0191333293914795, "learning_rate": 5.71875e-05, "loss": 0.2175, "step": 185 }, { "epoch": 0.76, "grad_norm": 1.6039254665374756, "learning_rate": 5.875e-05, "loss": 0.2009, "step": 190 }, { "epoch": 0.78, "grad_norm": 2.2860054969787598, "learning_rate": 6.03125e-05, "loss": 0.1774, "step": 195 }, { "epoch": 0.8, "grad_norm": 1.528680443763733, "learning_rate": 6.1875e-05, "loss": 0.1603, "step": 200 }, { "epoch": 0.82, "grad_norm": 1.526693344116211, "learning_rate": 6.25e-05, "loss": 0.1504, "step": 205 }, { "epoch": 0.84, "grad_norm": 2.199506998062134, "learning_rate": 6.25e-05, "loss": 0.1357, "step": 210 }, { "epoch": 0.86, "grad_norm": 2.170020341873169, "learning_rate": 6.25e-05, "loss": 0.1519, "step": 215 }, { "epoch": 0.88, "grad_norm": 1.5418131351470947, "learning_rate": 6.25e-05, "loss": 0.1524, "step": 220 }, { "epoch": 0.9, "grad_norm": 2.1583192348480225, "learning_rate": 6.25e-05, "loss": 0.1264, "step": 225 }, { "epoch": 0.92, "grad_norm": 1.908937692642212, "learning_rate": 6.25e-05, "loss": 0.1221, "step": 230 }, { "epoch": 0.94, "grad_norm": 1.4072145223617554, "learning_rate": 6.25e-05, "loss": 0.1254, "step": 235 }, { "epoch": 0.96, "grad_norm": 1.3102571964263916, "learning_rate": 6.25e-05, "loss": 0.1412, "step": 240 }, { "epoch": 0.98, "grad_norm": 1.4941678047180176, "learning_rate": 6.25e-05, "loss": 0.1203, "step": 245 }, { "epoch": 1.0, "grad_norm": 1.8279727697372437, "learning_rate": 6.25e-05, "loss": 0.1107, "step": 250 }, { "epoch": 1.0, "eval_cer": 0.07099835736448257, "eval_loss": 0.11958163231611252, "eval_runtime": 280.2233, "eval_samples_per_second": 1.784, "eval_steps_per_second": 0.446, "step": 250 }, { "epoch": 1.02, "grad_norm": 1.0286716222763062, "learning_rate": 6.25e-05, "loss": 0.1047, "step": 255 }, { "epoch": 1.04, "grad_norm": 1.7464964389801025, "learning_rate": 6.25e-05, "loss": 0.1108, "step": 260 }, { "epoch": 1.06, "grad_norm": 1.3992992639541626, "learning_rate": 6.25e-05, "loss": 0.1176, "step": 265 }, { "epoch": 1.08, "grad_norm": 1.150010347366333, "learning_rate": 6.25e-05, "loss": 0.1059, "step": 270 }, { "epoch": 1.1, "grad_norm": 1.983775019645691, "learning_rate": 6.25e-05, "loss": 0.1223, "step": 275 }, { "epoch": 1.12, "grad_norm": 1.0895100831985474, "learning_rate": 6.25e-05, "loss": 0.0929, "step": 280 }, { "epoch": 1.1400000000000001, "grad_norm": 1.631362795829773, "learning_rate": 6.25e-05, "loss": 0.1005, "step": 285 }, { "epoch": 1.16, "grad_norm": 1.39686918258667, "learning_rate": 6.25e-05, "loss": 0.1022, "step": 290 }, { "epoch": 1.18, "grad_norm": 1.2856369018554688, "learning_rate": 6.25e-05, "loss": 0.0908, "step": 295 }, { "epoch": 1.2, "grad_norm": 1.2866718769073486, "learning_rate": 6.25e-05, "loss": 0.103, "step": 300 }, { "epoch": 1.22, "grad_norm": 1.1115745306015015, "learning_rate": 6.25e-05, "loss": 0.0934, "step": 305 }, { "epoch": 1.24, "grad_norm": 1.2674397230148315, "learning_rate": 6.25e-05, "loss": 0.0807, "step": 310 }, { "epoch": 1.26, "grad_norm": 3.003493547439575, "learning_rate": 6.25e-05, "loss": 0.1163, "step": 315 }, { "epoch": 1.28, "grad_norm": 1.3472819328308105, "learning_rate": 6.25e-05, "loss": 0.0939, "step": 320 }, { "epoch": 1.3, "grad_norm": 1.576393485069275, "learning_rate": 6.25e-05, "loss": 0.1023, "step": 325 }, { "epoch": 1.32, "grad_norm": 1.2895311117172241, "learning_rate": 6.25e-05, "loss": 0.0908, "step": 330 }, { "epoch": 1.34, "grad_norm": 1.7044769525527954, "learning_rate": 6.25e-05, "loss": 0.0849, "step": 335 }, { "epoch": 1.3599999999999999, "grad_norm": 1.316157341003418, "learning_rate": 6.25e-05, "loss": 0.0865, "step": 340 }, { "epoch": 1.38, "grad_norm": 1.4973046779632568, "learning_rate": 6.25e-05, "loss": 0.0864, "step": 345 }, { "epoch": 1.4, "grad_norm": 0.7230541706085205, "learning_rate": 6.25e-05, "loss": 0.074, "step": 350 }, { "epoch": 1.42, "grad_norm": 1.026584267616272, "learning_rate": 6.25e-05, "loss": 0.0788, "step": 355 }, { "epoch": 1.44, "grad_norm": 1.3976407051086426, "learning_rate": 6.25e-05, "loss": 0.0862, "step": 360 }, { "epoch": 1.46, "grad_norm": 1.3054964542388916, "learning_rate": 6.25e-05, "loss": 0.0757, "step": 365 }, { "epoch": 1.48, "grad_norm": 1.8163508176803589, "learning_rate": 6.25e-05, "loss": 0.0822, "step": 370 }, { "epoch": 1.5, "grad_norm": 1.23239004611969, "learning_rate": 6.25e-05, "loss": 0.0886, "step": 375 }, { "epoch": 1.52, "grad_norm": 1.1320103406906128, "learning_rate": 6.25e-05, "loss": 0.0834, "step": 380 }, { "epoch": 1.54, "grad_norm": 1.8913005590438843, "learning_rate": 6.25e-05, "loss": 0.0917, "step": 385 }, { "epoch": 1.56, "grad_norm": 1.3522365093231201, "learning_rate": 6.25e-05, "loss": 0.0927, "step": 390 }, { "epoch": 1.58, "grad_norm": 1.3687407970428467, "learning_rate": 6.25e-05, "loss": 0.0701, "step": 395 }, { "epoch": 1.6, "grad_norm": 1.6905425786972046, "learning_rate": 6.25e-05, "loss": 0.0914, "step": 400 }, { "epoch": 1.62, "grad_norm": 1.3366554975509644, "learning_rate": 6.25e-05, "loss": 0.0833, "step": 405 }, { "epoch": 1.6400000000000001, "grad_norm": 1.0540807247161865, "learning_rate": 6.25e-05, "loss": 0.088, "step": 410 }, { "epoch": 1.6600000000000001, "grad_norm": 1.0234986543655396, "learning_rate": 6.25e-05, "loss": 0.075, "step": 415 }, { "epoch": 1.6800000000000002, "grad_norm": 1.3205113410949707, "learning_rate": 6.25e-05, "loss": 0.0931, "step": 420 }, { "epoch": 1.7, "grad_norm": 0.8865799307823181, "learning_rate": 6.25e-05, "loss": 0.0831, "step": 425 }, { "epoch": 1.72, "grad_norm": 1.2646653652191162, "learning_rate": 6.25e-05, "loss": 0.0825, "step": 430 }, { "epoch": 1.74, "grad_norm": 0.9991198182106018, "learning_rate": 6.25e-05, "loss": 0.0825, "step": 435 }, { "epoch": 1.76, "grad_norm": 0.8784312605857849, "learning_rate": 6.25e-05, "loss": 0.082, "step": 440 }, { "epoch": 1.78, "grad_norm": 1.304877519607544, "learning_rate": 6.25e-05, "loss": 0.0775, "step": 445 }, { "epoch": 1.8, "grad_norm": 1.2007408142089844, "learning_rate": 6.25e-05, "loss": 0.0796, "step": 450 }, { "epoch": 1.8199999999999998, "grad_norm": 0.9978143572807312, "learning_rate": 6.25e-05, "loss": 0.0794, "step": 455 }, { "epoch": 1.8399999999999999, "grad_norm": 1.2883387804031372, "learning_rate": 6.25e-05, "loss": 0.0744, "step": 460 }, { "epoch": 1.8599999999999999, "grad_norm": 0.8542335629463196, "learning_rate": 6.25e-05, "loss": 0.074, "step": 465 }, { "epoch": 1.88, "grad_norm": 1.0009572505950928, "learning_rate": 6.25e-05, "loss": 0.0823, "step": 470 }, { "epoch": 1.9, "grad_norm": 1.126528263092041, "learning_rate": 6.25e-05, "loss": 0.0805, "step": 475 }, { "epoch": 1.92, "grad_norm": 0.9336584210395813, "learning_rate": 6.25e-05, "loss": 0.0722, "step": 480 }, { "epoch": 1.94, "grad_norm": 1.0387274026870728, "learning_rate": 6.25e-05, "loss": 0.0722, "step": 485 }, { "epoch": 1.96, "grad_norm": 1.4692296981811523, "learning_rate": 6.25e-05, "loss": 0.071, "step": 490 }, { "epoch": 1.98, "grad_norm": 0.9859362244606018, "learning_rate": 6.25e-05, "loss": 0.0795, "step": 495 }, { "epoch": 2.0, "grad_norm": 1.0557219982147217, "learning_rate": 6.25e-05, "loss": 0.0741, "step": 500 }, { "epoch": 2.0, "eval_cer": 0.05483273798659818, "eval_loss": 0.0840950757265091, "eval_runtime": 280.5964, "eval_samples_per_second": 1.782, "eval_steps_per_second": 0.445, "step": 500 }, { "epoch": 2.02, "grad_norm": 1.1070911884307861, "learning_rate": 6.25e-05, "loss": 0.0552, "step": 505 }, { "epoch": 2.04, "grad_norm": 0.7629583477973938, "learning_rate": 6.25e-05, "loss": 0.0613, "step": 510 }, { "epoch": 2.06, "grad_norm": 1.3060976266860962, "learning_rate": 6.25e-05, "loss": 0.0746, "step": 515 }, { "epoch": 2.08, "grad_norm": 1.067309021949768, "learning_rate": 6.25e-05, "loss": 0.0717, "step": 520 }, { "epoch": 2.1, "grad_norm": 1.1334558725357056, "learning_rate": 6.25e-05, "loss": 0.0582, "step": 525 }, { "epoch": 2.12, "grad_norm": 0.8565890192985535, "learning_rate": 6.25e-05, "loss": 0.066, "step": 530 }, { "epoch": 2.14, "grad_norm": 1.042038083076477, "learning_rate": 6.25e-05, "loss": 0.0686, "step": 535 }, { "epoch": 2.16, "grad_norm": 0.8787774443626404, "learning_rate": 6.25e-05, "loss": 0.0714, "step": 540 }, { "epoch": 2.18, "grad_norm": 1.2969956398010254, "learning_rate": 6.25e-05, "loss": 0.069, "step": 545 }, { "epoch": 2.2, "grad_norm": 1.0625072717666626, "learning_rate": 6.25e-05, "loss": 0.0701, "step": 550 }, { "epoch": 2.22, "grad_norm": 1.1786212921142578, "learning_rate": 6.25e-05, "loss": 0.0672, "step": 555 }, { "epoch": 2.24, "grad_norm": 0.7678006887435913, "learning_rate": 6.25e-05, "loss": 0.0637, "step": 560 }, { "epoch": 2.26, "grad_norm": 1.087916612625122, "learning_rate": 6.25e-05, "loss": 0.0592, "step": 565 }, { "epoch": 2.2800000000000002, "grad_norm": 1.0709354877471924, "learning_rate": 6.25e-05, "loss": 0.0641, "step": 570 }, { "epoch": 2.3, "grad_norm": 0.9933990240097046, "learning_rate": 6.25e-05, "loss": 0.0724, "step": 575 }, { "epoch": 2.32, "grad_norm": 0.9537047147750854, "learning_rate": 6.25e-05, "loss": 0.0565, "step": 580 }, { "epoch": 2.34, "grad_norm": 0.8913723230361938, "learning_rate": 6.25e-05, "loss": 0.0601, "step": 585 }, { "epoch": 2.36, "grad_norm": 1.4037823677062988, "learning_rate": 6.25e-05, "loss": 0.0656, "step": 590 }, { "epoch": 2.38, "grad_norm": 0.8686001896858215, "learning_rate": 6.25e-05, "loss": 0.0617, "step": 595 }, { "epoch": 2.4, "grad_norm": 1.1040139198303223, "learning_rate": 6.25e-05, "loss": 0.0612, "step": 600 }, { "epoch": 2.42, "grad_norm": 0.8995397090911865, "learning_rate": 6.25e-05, "loss": 0.0528, "step": 605 }, { "epoch": 2.44, "grad_norm": 1.0924474000930786, "learning_rate": 6.25e-05, "loss": 0.0551, "step": 610 }, { "epoch": 2.46, "grad_norm": 1.0748484134674072, "learning_rate": 6.25e-05, "loss": 0.0573, "step": 615 }, { "epoch": 2.48, "grad_norm": 0.8827953338623047, "learning_rate": 6.25e-05, "loss": 0.0595, "step": 620 }, { "epoch": 2.5, "grad_norm": 0.8614113926887512, "learning_rate": 6.25e-05, "loss": 0.0694, "step": 625 }, { "epoch": 2.52, "grad_norm": 0.6579775810241699, "learning_rate": 6.25e-05, "loss": 0.0608, "step": 630 }, { "epoch": 2.54, "grad_norm": 0.923587441444397, "learning_rate": 6.25e-05, "loss": 0.0684, "step": 635 }, { "epoch": 2.56, "grad_norm": 1.119313359260559, "learning_rate": 6.25e-05, "loss": 0.0651, "step": 640 }, { "epoch": 2.58, "grad_norm": 1.1630853414535522, "learning_rate": 6.25e-05, "loss": 0.0773, "step": 645 }, { "epoch": 2.6, "grad_norm": 0.9517636299133301, "learning_rate": 6.25e-05, "loss": 0.0574, "step": 650 }, { "epoch": 2.62, "grad_norm": 0.767271101474762, "learning_rate": 6.25e-05, "loss": 0.071, "step": 655 }, { "epoch": 2.64, "grad_norm": 1.3324207067489624, "learning_rate": 6.25e-05, "loss": 0.0672, "step": 660 }, { "epoch": 2.66, "grad_norm": 0.8638308048248291, "learning_rate": 6.25e-05, "loss": 0.0602, "step": 665 }, { "epoch": 2.68, "grad_norm": 0.9522351622581482, "learning_rate": 6.25e-05, "loss": 0.0626, "step": 670 }, { "epoch": 2.7, "grad_norm": 0.7264077067375183, "learning_rate": 6.25e-05, "loss": 0.0654, "step": 675 }, { "epoch": 2.7199999999999998, "grad_norm": 1.185275912284851, "learning_rate": 6.25e-05, "loss": 0.0638, "step": 680 }, { "epoch": 2.74, "grad_norm": 1.549625277519226, "learning_rate": 6.25e-05, "loss": 0.0661, "step": 685 }, { "epoch": 2.76, "grad_norm": 1.202415108680725, "learning_rate": 6.25e-05, "loss": 0.0709, "step": 690 }, { "epoch": 2.7800000000000002, "grad_norm": 0.7902194857597351, "learning_rate": 6.25e-05, "loss": 0.0604, "step": 695 }, { "epoch": 2.8, "grad_norm": 1.0128028392791748, "learning_rate": 6.25e-05, "loss": 0.0612, "step": 700 }, { "epoch": 2.82, "grad_norm": 0.8418397903442383, "learning_rate": 6.25e-05, "loss": 0.0616, "step": 705 }, { "epoch": 2.84, "grad_norm": 0.9352026581764221, "learning_rate": 6.25e-05, "loss": 0.0635, "step": 710 }, { "epoch": 2.86, "grad_norm": 0.679918110370636, "learning_rate": 6.25e-05, "loss": 0.0588, "step": 715 }, { "epoch": 2.88, "grad_norm": 0.836438000202179, "learning_rate": 6.25e-05, "loss": 0.0635, "step": 720 }, { "epoch": 2.9, "grad_norm": 0.7643904089927673, "learning_rate": 6.25e-05, "loss": 0.0554, "step": 725 }, { "epoch": 2.92, "grad_norm": 0.9192042946815491, "learning_rate": 6.25e-05, "loss": 0.0541, "step": 730 }, { "epoch": 2.94, "grad_norm": 0.9899188280105591, "learning_rate": 6.25e-05, "loss": 0.0591, "step": 735 }, { "epoch": 2.96, "grad_norm": 1.112701654434204, "learning_rate": 6.25e-05, "loss": 0.0611, "step": 740 }, { "epoch": 2.98, "grad_norm": 0.9096015095710754, "learning_rate": 6.25e-05, "loss": 0.0594, "step": 745 }, { "epoch": 3.0, "grad_norm": 1.158527135848999, "learning_rate": 6.25e-05, "loss": 0.0703, "step": 750 }, { "epoch": 3.0, "eval_cer": 0.05350298542486898, "eval_loss": 0.07945344597101212, "eval_runtime": 281.5513, "eval_samples_per_second": 1.776, "eval_steps_per_second": 0.444, "step": 750 }, { "epoch": 3.02, "grad_norm": 0.8544594049453735, "learning_rate": 6.25e-05, "loss": 0.0461, "step": 755 }, { "epoch": 3.04, "grad_norm": 0.8411735892295837, "learning_rate": 6.25e-05, "loss": 0.0429, "step": 760 }, { "epoch": 3.06, "grad_norm": 0.7515286207199097, "learning_rate": 6.25e-05, "loss": 0.0559, "step": 765 }, { "epoch": 3.08, "grad_norm": 0.8125985264778137, "learning_rate": 6.25e-05, "loss": 0.044, "step": 770 }, { "epoch": 3.1, "grad_norm": 0.8093322515487671, "learning_rate": 6.25e-05, "loss": 0.0529, "step": 775 }, { "epoch": 3.12, "grad_norm": 0.8852378129959106, "learning_rate": 6.25e-05, "loss": 0.0508, "step": 780 }, { "epoch": 3.14, "grad_norm": 0.6388903856277466, "learning_rate": 6.25e-05, "loss": 0.0491, "step": 785 }, { "epoch": 3.16, "grad_norm": 0.9803158640861511, "learning_rate": 6.25e-05, "loss": 0.051, "step": 790 }, { "epoch": 3.18, "grad_norm": 1.163065791130066, "learning_rate": 6.25e-05, "loss": 0.0538, "step": 795 }, { "epoch": 3.2, "grad_norm": 0.942138671875, "learning_rate": 6.25e-05, "loss": 0.0548, "step": 800 }, { "epoch": 3.22, "grad_norm": 0.763847827911377, "learning_rate": 6.25e-05, "loss": 0.0497, "step": 805 }, { "epoch": 3.24, "grad_norm": 1.1041572093963623, "learning_rate": 6.25e-05, "loss": 0.0513, "step": 810 }, { "epoch": 3.26, "grad_norm": 0.8744838237762451, "learning_rate": 6.25e-05, "loss": 0.0574, "step": 815 }, { "epoch": 3.2800000000000002, "grad_norm": 0.8737279176712036, "learning_rate": 6.25e-05, "loss": 0.0485, "step": 820 }, { "epoch": 3.3, "grad_norm": 0.6367043256759644, "learning_rate": 6.25e-05, "loss": 0.0462, "step": 825 }, { "epoch": 3.32, "grad_norm": 0.7195335030555725, "learning_rate": 6.25e-05, "loss": 0.0529, "step": 830 }, { "epoch": 3.34, "grad_norm": 0.7411594986915588, "learning_rate": 6.25e-05, "loss": 0.0558, "step": 835 }, { "epoch": 3.36, "grad_norm": 0.5583875179290771, "learning_rate": 6.25e-05, "loss": 0.0498, "step": 840 }, { "epoch": 3.38, "grad_norm": 0.7013912796974182, "learning_rate": 6.25e-05, "loss": 0.0465, "step": 845 }, { "epoch": 3.4, "grad_norm": 1.1267294883728027, "learning_rate": 6.25e-05, "loss": 0.0505, "step": 850 }, { "epoch": 3.42, "grad_norm": 1.3056484460830688, "learning_rate": 6.25e-05, "loss": 0.0515, "step": 855 }, { "epoch": 3.44, "grad_norm": 1.182433843612671, "learning_rate": 6.25e-05, "loss": 0.0525, "step": 860 }, { "epoch": 3.46, "grad_norm": 0.8969308733940125, "learning_rate": 6.25e-05, "loss": 0.0517, "step": 865 }, { "epoch": 3.48, "grad_norm": 0.7779067158699036, "learning_rate": 6.25e-05, "loss": 0.0539, "step": 870 }, { "epoch": 3.5, "grad_norm": 0.591754674911499, "learning_rate": 6.25e-05, "loss": 0.0546, "step": 875 }, { "epoch": 3.52, "grad_norm": 0.8097557425498962, "learning_rate": 6.25e-05, "loss": 0.0529, "step": 880 }, { "epoch": 3.54, "grad_norm": 0.7054248452186584, "learning_rate": 6.25e-05, "loss": 0.0436, "step": 885 }, { "epoch": 3.56, "grad_norm": 0.5832129716873169, "learning_rate": 6.25e-05, "loss": 0.048, "step": 890 }, { "epoch": 3.58, "grad_norm": 0.8104725480079651, "learning_rate": 6.25e-05, "loss": 0.0503, "step": 895 }, { "epoch": 3.6, "grad_norm": 0.9961804151535034, "learning_rate": 6.25e-05, "loss": 0.0565, "step": 900 }, { "epoch": 3.62, "grad_norm": 0.8466907143592834, "learning_rate": 6.25e-05, "loss": 0.054, "step": 905 }, { "epoch": 3.64, "grad_norm": 0.8867480158805847, "learning_rate": 6.25e-05, "loss": 0.0547, "step": 910 }, { "epoch": 3.66, "grad_norm": 0.9030736684799194, "learning_rate": 6.25e-05, "loss": 0.0481, "step": 915 }, { "epoch": 3.68, "grad_norm": 0.6740151643753052, "learning_rate": 6.25e-05, "loss": 0.0529, "step": 920 }, { "epoch": 3.7, "grad_norm": 0.653508722782135, "learning_rate": 6.25e-05, "loss": 0.0633, "step": 925 }, { "epoch": 3.7199999999999998, "grad_norm": 0.7304302453994751, "learning_rate": 6.25e-05, "loss": 0.0493, "step": 930 }, { "epoch": 3.74, "grad_norm": 0.8343582153320312, "learning_rate": 6.25e-05, "loss": 0.059, "step": 935 }, { "epoch": 3.76, "grad_norm": 0.8459467887878418, "learning_rate": 6.25e-05, "loss": 0.0531, "step": 940 }, { "epoch": 3.7800000000000002, "grad_norm": 0.7470009326934814, "learning_rate": 6.25e-05, "loss": 0.0548, "step": 945 }, { "epoch": 3.8, "grad_norm": 0.8183557987213135, "learning_rate": 6.25e-05, "loss": 0.0471, "step": 950 }, { "epoch": 3.82, "grad_norm": 0.9448140263557434, "learning_rate": 6.25e-05, "loss": 0.045, "step": 955 }, { "epoch": 3.84, "grad_norm": 0.7056401371955872, "learning_rate": 6.25e-05, "loss": 0.045, "step": 960 }, { "epoch": 3.86, "grad_norm": 0.7785059213638306, "learning_rate": 6.25e-05, "loss": 0.0554, "step": 965 }, { "epoch": 3.88, "grad_norm": 0.8976256251335144, "learning_rate": 6.25e-05, "loss": 0.0529, "step": 970 }, { "epoch": 3.9, "grad_norm": 1.0849542617797852, "learning_rate": 6.25e-05, "loss": 0.0457, "step": 975 }, { "epoch": 3.92, "grad_norm": 1.1612681150436401, "learning_rate": 6.25e-05, "loss": 0.0513, "step": 980 }, { "epoch": 3.94, "grad_norm": 0.6912779211997986, "learning_rate": 6.25e-05, "loss": 0.0469, "step": 985 }, { "epoch": 3.96, "grad_norm": 0.7129920125007629, "learning_rate": 6.25e-05, "loss": 0.0509, "step": 990 }, { "epoch": 3.98, "grad_norm": 0.6439591646194458, "learning_rate": 6.25e-05, "loss": 0.0412, "step": 995 }, { "epoch": 4.0, "grad_norm": 0.7044887542724609, "learning_rate": 6.25e-05, "loss": 0.0558, "step": 1000 }, { "epoch": 4.0, "eval_cer": 0.046749928297655986, "eval_loss": 0.07047422975301743, "eval_runtime": 280.6209, "eval_samples_per_second": 1.782, "eval_steps_per_second": 0.445, "step": 1000 }, { "epoch": 4.02, "grad_norm": 0.6291618943214417, "learning_rate": 6.25e-05, "loss": 0.0432, "step": 1005 }, { "epoch": 4.04, "grad_norm": 0.5485780239105225, "learning_rate": 6.25e-05, "loss": 0.0459, "step": 1010 }, { "epoch": 4.06, "grad_norm": 0.5912005305290222, "learning_rate": 6.25e-05, "loss": 0.0416, "step": 1015 }, { "epoch": 4.08, "grad_norm": 0.5929523706436157, "learning_rate": 6.25e-05, "loss": 0.0358, "step": 1020 }, { "epoch": 4.1, "grad_norm": 0.4929662346839905, "learning_rate": 6.25e-05, "loss": 0.0389, "step": 1025 }, { "epoch": 4.12, "grad_norm": 0.6707394123077393, "learning_rate": 6.25e-05, "loss": 0.0388, "step": 1030 }, { "epoch": 4.14, "grad_norm": 0.9774329662322998, "learning_rate": 6.25e-05, "loss": 0.0401, "step": 1035 }, { "epoch": 4.16, "grad_norm": 0.6821659803390503, "learning_rate": 6.25e-05, "loss": 0.0403, "step": 1040 }, { "epoch": 4.18, "grad_norm": 0.796459436416626, "learning_rate": 6.25e-05, "loss": 0.0425, "step": 1045 }, { "epoch": 4.2, "grad_norm": 0.6956031918525696, "learning_rate": 6.25e-05, "loss": 0.0475, "step": 1050 }, { "epoch": 4.22, "grad_norm": 0.7577043175697327, "learning_rate": 6.25e-05, "loss": 0.0483, "step": 1055 }, { "epoch": 4.24, "grad_norm": 0.5384642481803894, "learning_rate": 6.25e-05, "loss": 0.0372, "step": 1060 }, { "epoch": 4.26, "grad_norm": 0.791437566280365, "learning_rate": 6.25e-05, "loss": 0.0485, "step": 1065 }, { "epoch": 4.28, "grad_norm": 0.5820832252502441, "learning_rate": 6.25e-05, "loss": 0.0466, "step": 1070 }, { "epoch": 4.3, "grad_norm": 0.9597232341766357, "learning_rate": 6.25e-05, "loss": 0.0437, "step": 1075 }, { "epoch": 4.32, "grad_norm": 0.9876553416252136, "learning_rate": 6.25e-05, "loss": 0.05, "step": 1080 }, { "epoch": 4.34, "grad_norm": 0.6902226805686951, "learning_rate": 6.25e-05, "loss": 0.0401, "step": 1085 }, { "epoch": 4.36, "grad_norm": 0.5399324893951416, "learning_rate": 6.25e-05, "loss": 0.043, "step": 1090 }, { "epoch": 4.38, "grad_norm": 0.7499954700469971, "learning_rate": 6.25e-05, "loss": 0.0426, "step": 1095 }, { "epoch": 4.4, "grad_norm": 0.7145591378211975, "learning_rate": 6.25e-05, "loss": 0.0503, "step": 1100 }, { "epoch": 4.42, "grad_norm": 0.5746826529502869, "learning_rate": 6.25e-05, "loss": 0.0383, "step": 1105 }, { "epoch": 4.44, "grad_norm": 0.7018007040023804, "learning_rate": 6.25e-05, "loss": 0.0466, "step": 1110 }, { "epoch": 4.46, "grad_norm": 0.6607512831687927, "learning_rate": 6.25e-05, "loss": 0.038, "step": 1115 }, { "epoch": 4.48, "grad_norm": 0.5863096714019775, "learning_rate": 6.25e-05, "loss": 0.0462, "step": 1120 }, { "epoch": 4.5, "grad_norm": 0.674934983253479, "learning_rate": 6.25e-05, "loss": 0.0523, "step": 1125 }, { "epoch": 4.52, "grad_norm": 0.7824676036834717, "learning_rate": 6.25e-05, "loss": 0.0467, "step": 1130 }, { "epoch": 4.54, "grad_norm": 1.4591455459594727, "learning_rate": 6.25e-05, "loss": 0.0485, "step": 1135 }, { "epoch": 4.5600000000000005, "grad_norm": 0.6413418650627136, "learning_rate": 6.25e-05, "loss": 0.0435, "step": 1140 }, { "epoch": 4.58, "grad_norm": 0.5044887065887451, "learning_rate": 6.25e-05, "loss": 0.0432, "step": 1145 }, { "epoch": 4.6, "grad_norm": 0.4768076539039612, "learning_rate": 6.25e-05, "loss": 0.0422, "step": 1150 }, { "epoch": 4.62, "grad_norm": 0.7008136510848999, "learning_rate": 6.25e-05, "loss": 0.045, "step": 1155 }, { "epoch": 4.64, "grad_norm": 1.1213037967681885, "learning_rate": 6.25e-05, "loss": 0.0469, "step": 1160 }, { "epoch": 4.66, "grad_norm": 0.6898444890975952, "learning_rate": 6.25e-05, "loss": 0.0398, "step": 1165 }, { "epoch": 4.68, "grad_norm": 0.6885802149772644, "learning_rate": 6.25e-05, "loss": 0.0475, "step": 1170 }, { "epoch": 4.7, "grad_norm": 0.644440770149231, "learning_rate": 6.25e-05, "loss": 0.0403, "step": 1175 }, { "epoch": 4.72, "grad_norm": 0.6610418558120728, "learning_rate": 6.25e-05, "loss": 0.0415, "step": 1180 }, { "epoch": 4.74, "grad_norm": 0.7127951979637146, "learning_rate": 6.25e-05, "loss": 0.0466, "step": 1185 }, { "epoch": 4.76, "grad_norm": 0.7608262300491333, "learning_rate": 6.25e-05, "loss": 0.0398, "step": 1190 }, { "epoch": 4.78, "grad_norm": 0.6554054021835327, "learning_rate": 6.25e-05, "loss": 0.0395, "step": 1195 }, { "epoch": 4.8, "grad_norm": 0.7710177302360535, "learning_rate": 6.25e-05, "loss": 0.0412, "step": 1200 }, { "epoch": 4.82, "grad_norm": 0.5044788718223572, "learning_rate": 6.25e-05, "loss": 0.0378, "step": 1205 }, { "epoch": 4.84, "grad_norm": 0.4640452265739441, "learning_rate": 6.25e-05, "loss": 0.0394, "step": 1210 }, { "epoch": 4.86, "grad_norm": 0.6121119260787964, "learning_rate": 6.25e-05, "loss": 0.0373, "step": 1215 }, { "epoch": 4.88, "grad_norm": 0.7307333946228027, "learning_rate": 6.25e-05, "loss": 0.0462, "step": 1220 }, { "epoch": 4.9, "grad_norm": 0.841369092464447, "learning_rate": 6.25e-05, "loss": 0.0433, "step": 1225 }, { "epoch": 4.92, "grad_norm": 0.48274680972099304, "learning_rate": 6.25e-05, "loss": 0.0481, "step": 1230 }, { "epoch": 4.9399999999999995, "grad_norm": 0.6552777290344238, "learning_rate": 6.25e-05, "loss": 0.0449, "step": 1235 }, { "epoch": 4.96, "grad_norm": 1.0837739706039429, "learning_rate": 6.25e-05, "loss": 0.0465, "step": 1240 }, { "epoch": 4.98, "grad_norm": 0.7444823384284973, "learning_rate": 6.25e-05, "loss": 0.0513, "step": 1245 }, { "epoch": 5.0, "grad_norm": 0.561403214931488, "learning_rate": 6.25e-05, "loss": 0.0458, "step": 1250 }, { "epoch": 5.0, "eval_cer": 0.07253669856334576, "eval_loss": 0.06918226927518845, "eval_runtime": 282.8276, "eval_samples_per_second": 1.768, "eval_steps_per_second": 0.442, "step": 1250 }, { "epoch": 5.02, "grad_norm": 0.67482990026474, "learning_rate": 6.25e-05, "loss": 0.037, "step": 1255 }, { "epoch": 5.04, "grad_norm": 0.6839190721511841, "learning_rate": 6.25e-05, "loss": 0.0445, "step": 1260 }, { "epoch": 5.06, "grad_norm": 0.8001631498336792, "learning_rate": 6.25e-05, "loss": 0.0386, "step": 1265 }, { "epoch": 5.08, "grad_norm": 0.8353962898254395, "learning_rate": 6.25e-05, "loss": 0.0407, "step": 1270 }, { "epoch": 5.1, "grad_norm": 0.556709885597229, "learning_rate": 6.25e-05, "loss": 0.0355, "step": 1275 }, { "epoch": 5.12, "grad_norm": 0.5634174942970276, "learning_rate": 6.25e-05, "loss": 0.0322, "step": 1280 }, { "epoch": 5.14, "grad_norm": 0.6530662775039673, "learning_rate": 6.25e-05, "loss": 0.041, "step": 1285 }, { "epoch": 5.16, "grad_norm": 0.5771991610527039, "learning_rate": 6.25e-05, "loss": 0.0375, "step": 1290 }, { "epoch": 5.18, "grad_norm": 0.5936269164085388, "learning_rate": 6.25e-05, "loss": 0.0362, "step": 1295 }, { "epoch": 5.2, "grad_norm": 0.6964532136917114, "learning_rate": 6.25e-05, "loss": 0.0361, "step": 1300 }, { "epoch": 5.22, "grad_norm": 1.0432935953140259, "learning_rate": 6.25e-05, "loss": 0.0346, "step": 1305 }, { "epoch": 5.24, "grad_norm": 0.6481297016143799, "learning_rate": 6.25e-05, "loss": 0.0351, "step": 1310 }, { "epoch": 5.26, "grad_norm": 0.9188110828399658, "learning_rate": 6.25e-05, "loss": 0.0378, "step": 1315 }, { "epoch": 5.28, "grad_norm": 0.4248051345348358, "learning_rate": 6.25e-05, "loss": 0.0296, "step": 1320 }, { "epoch": 5.3, "grad_norm": 0.5334679484367371, "learning_rate": 6.25e-05, "loss": 0.0397, "step": 1325 }, { "epoch": 5.32, "grad_norm": 0.7321200370788574, "learning_rate": 6.25e-05, "loss": 0.0414, "step": 1330 }, { "epoch": 5.34, "grad_norm": 0.5322144627571106, "learning_rate": 6.25e-05, "loss": 0.0381, "step": 1335 }, { "epoch": 5.36, "grad_norm": 0.8044850826263428, "learning_rate": 6.25e-05, "loss": 0.0348, "step": 1340 }, { "epoch": 5.38, "grad_norm": 0.6011214256286621, "learning_rate": 6.25e-05, "loss": 0.0379, "step": 1345 }, { "epoch": 5.4, "grad_norm": 0.7421667575836182, "learning_rate": 6.25e-05, "loss": 0.0379, "step": 1350 }, { "epoch": 5.42, "grad_norm": 0.4418427348136902, "learning_rate": 6.25e-05, "loss": 0.0328, "step": 1355 }, { "epoch": 5.44, "grad_norm": 0.6037031412124634, "learning_rate": 6.25e-05, "loss": 0.0351, "step": 1360 }, { "epoch": 5.46, "grad_norm": 0.7416286468505859, "learning_rate": 6.25e-05, "loss": 0.0344, "step": 1365 }, { "epoch": 5.48, "grad_norm": 0.9417647123336792, "learning_rate": 6.25e-05, "loss": 0.0504, "step": 1370 }, { "epoch": 5.5, "grad_norm": 0.5485287308692932, "learning_rate": 6.25e-05, "loss": 0.0383, "step": 1375 }, { "epoch": 5.52, "grad_norm": 0.647965133190155, "learning_rate": 6.25e-05, "loss": 0.0388, "step": 1380 }, { "epoch": 5.54, "grad_norm": 0.7375500202178955, "learning_rate": 6.25e-05, "loss": 0.0368, "step": 1385 }, { "epoch": 5.5600000000000005, "grad_norm": 0.7219087481498718, "learning_rate": 6.25e-05, "loss": 0.0497, "step": 1390 }, { "epoch": 5.58, "grad_norm": 0.49959471821784973, "learning_rate": 6.25e-05, "loss": 0.038, "step": 1395 }, { "epoch": 5.6, "grad_norm": 0.5299109816551208, "learning_rate": 6.25e-05, "loss": 0.0303, "step": 1400 }, { "epoch": 5.62, "grad_norm": 0.48730289936065674, "learning_rate": 6.25e-05, "loss": 0.0337, "step": 1405 }, { "epoch": 5.64, "grad_norm": 0.3811701834201813, "learning_rate": 6.25e-05, "loss": 0.0367, "step": 1410 }, { "epoch": 5.66, "grad_norm": 0.4611757695674896, "learning_rate": 6.25e-05, "loss": 0.0396, "step": 1415 }, { "epoch": 5.68, "grad_norm": 0.5509118437767029, "learning_rate": 6.25e-05, "loss": 0.0397, "step": 1420 }, { "epoch": 5.7, "grad_norm": 0.8130658268928528, "learning_rate": 6.25e-05, "loss": 0.0346, "step": 1425 }, { "epoch": 5.72, "grad_norm": 0.4248274266719818, "learning_rate": 6.25e-05, "loss": 0.0334, "step": 1430 }, { "epoch": 5.74, "grad_norm": 1.1918326616287231, "learning_rate": 6.25e-05, "loss": 0.041, "step": 1435 }, { "epoch": 5.76, "grad_norm": 0.6501240730285645, "learning_rate": 6.25e-05, "loss": 0.0423, "step": 1440 }, { "epoch": 5.78, "grad_norm": 1.216350793838501, "learning_rate": 6.25e-05, "loss": 0.0366, "step": 1445 }, { "epoch": 5.8, "grad_norm": 0.48442235589027405, "learning_rate": 6.25e-05, "loss": 0.0335, "step": 1450 }, { "epoch": 5.82, "grad_norm": 0.5834723711013794, "learning_rate": 6.25e-05, "loss": 0.0414, "step": 1455 }, { "epoch": 5.84, "grad_norm": 0.7862647771835327, "learning_rate": 6.25e-05, "loss": 0.0438, "step": 1460 }, { "epoch": 5.86, "grad_norm": 0.8282245397567749, "learning_rate": 6.25e-05, "loss": 0.0384, "step": 1465 }, { "epoch": 5.88, "grad_norm": 0.8185272812843323, "learning_rate": 6.25e-05, "loss": 0.0386, "step": 1470 }, { "epoch": 5.9, "grad_norm": 0.6197579503059387, "learning_rate": 6.25e-05, "loss": 0.036, "step": 1475 }, { "epoch": 5.92, "grad_norm": 0.5256204009056091, "learning_rate": 6.25e-05, "loss": 0.0331, "step": 1480 }, { "epoch": 5.9399999999999995, "grad_norm": 0.5693526864051819, "learning_rate": 6.25e-05, "loss": 0.0404, "step": 1485 }, { "epoch": 5.96, "grad_norm": 0.505524754524231, "learning_rate": 6.25e-05, "loss": 0.0345, "step": 1490 }, { "epoch": 5.98, "grad_norm": 0.7480014562606812, "learning_rate": 6.25e-05, "loss": 0.0421, "step": 1495 }, { "epoch": 6.0, "grad_norm": 0.6769825220108032, "learning_rate": 6.25e-05, "loss": 0.0364, "step": 1500 }, { "epoch": 6.0, "eval_cer": 0.04693244335514823, "eval_loss": 0.07296038419008255, "eval_runtime": 281.2143, "eval_samples_per_second": 1.778, "eval_steps_per_second": 0.445, "step": 1500 } ], "logging_steps": 5, "max_steps": 5000, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.128121552896e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }