{ "best_metric": 0.7191163976210705, "best_model_checkpoint": "BengaliRegionalASR_finetune/checkpoint-450", "epoch": 22.228571428571428, "eval_steps": 50, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.38095238095238093, "grad_norm": 17.112415313720703, "learning_rate": 1.8000000000000001e-06, "loss": 2.3043, "step": 10 }, { "epoch": 0.7619047619047619, "grad_norm": 10.634852409362793, "learning_rate": 3.8000000000000005e-06, "loss": 1.9102, "step": 20 }, { "epoch": 1.1142857142857143, "grad_norm": 7.627511501312256, "learning_rate": 5.8e-06, "loss": 1.5181, "step": 30 }, { "epoch": 1.4952380952380953, "grad_norm": 6.949349880218506, "learning_rate": 7.800000000000002e-06, "loss": 1.5209, "step": 40 }, { "epoch": 1.8761904761904762, "grad_norm": 6.8650946617126465, "learning_rate": 9.800000000000001e-06, "loss": 1.4268, "step": 50 }, { "epoch": 1.8761904761904762, "eval_loss": 0.33518293499946594, "eval_runtime": 171.0124, "eval_samples_per_second": 2.035, "eval_steps_per_second": 0.064, "eval_wer": 0.7963528413910094, "step": 50 }, { "epoch": 2.2285714285714286, "grad_norm": 6.442819118499756, "learning_rate": 9.876712328767123e-06, "loss": 1.1942, "step": 60 }, { "epoch": 2.6095238095238096, "grad_norm": 5.460833549499512, "learning_rate": 9.73972602739726e-06, "loss": 1.2687, "step": 70 }, { "epoch": 2.9904761904761905, "grad_norm": 5.580706596374512, "learning_rate": 9.602739726027398e-06, "loss": 1.2302, "step": 80 }, { "epoch": 3.342857142857143, "grad_norm": 6.585939884185791, "learning_rate": 9.465753424657535e-06, "loss": 1.0556, "step": 90 }, { "epoch": 3.723809523809524, "grad_norm": 5.867043972015381, "learning_rate": 9.328767123287673e-06, "loss": 1.0962, "step": 100 }, { "epoch": 3.723809523809524, "eval_loss": 0.3013468086719513, "eval_runtime": 173.407, "eval_samples_per_second": 2.007, "eval_steps_per_second": 0.063, "eval_wer": 0.7469838572642311, "step": 100 }, { "epoch": 4.076190476190476, "grad_norm": 5.975539207458496, "learning_rate": 9.19178082191781e-06, "loss": 1.0079, "step": 110 }, { "epoch": 4.457142857142857, "grad_norm": 4.361755847930908, "learning_rate": 9.054794520547945e-06, "loss": 1.0166, "step": 120 }, { "epoch": 4.838095238095238, "grad_norm": 5.646440029144287, "learning_rate": 8.917808219178083e-06, "loss": 1.0029, "step": 130 }, { "epoch": 5.190476190476191, "grad_norm": 4.995810031890869, "learning_rate": 8.78082191780822e-06, "loss": 0.8789, "step": 140 }, { "epoch": 5.571428571428571, "grad_norm": 5.576609134674072, "learning_rate": 8.643835616438357e-06, "loss": 0.9212, "step": 150 }, { "epoch": 5.571428571428571, "eval_loss": 0.2936047613620758, "eval_runtime": 169.8763, "eval_samples_per_second": 2.049, "eval_steps_per_second": 0.065, "eval_wer": 0.7334293948126801, "step": 150 }, { "epoch": 5.9523809523809526, "grad_norm": 5.173150539398193, "learning_rate": 8.506849315068494e-06, "loss": 0.9167, "step": 160 }, { "epoch": 6.304761904761905, "grad_norm": 5.523489475250244, "learning_rate": 8.36986301369863e-06, "loss": 0.8009, "step": 170 }, { "epoch": 6.685714285714286, "grad_norm": 4.563538551330566, "learning_rate": 8.232876712328767e-06, "loss": 0.8372, "step": 180 }, { "epoch": 7.038095238095238, "grad_norm": 5.588221073150635, "learning_rate": 8.095890410958904e-06, "loss": 0.7638, "step": 190 }, { "epoch": 7.419047619047619, "grad_norm": 6.018068313598633, "learning_rate": 7.958904109589042e-06, "loss": 0.7809, "step": 200 }, { "epoch": 7.419047619047619, "eval_loss": 0.2970726490020752, "eval_runtime": 170.5208, "eval_samples_per_second": 2.041, "eval_steps_per_second": 0.065, "eval_wer": 0.7514037774374681, "step": 200 }, { "epoch": 7.8, "grad_norm": 7.281743049621582, "learning_rate": 7.821917808219179e-06, "loss": 0.7772, "step": 210 }, { "epoch": 8.152380952380952, "grad_norm": 4.583686828613281, "learning_rate": 7.684931506849316e-06, "loss": 0.6768, "step": 220 }, { "epoch": 8.533333333333333, "grad_norm": 5.136018753051758, "learning_rate": 7.5479452054794526e-06, "loss": 0.7121, "step": 230 }, { "epoch": 8.914285714285715, "grad_norm": 5.522968769073486, "learning_rate": 7.41095890410959e-06, "loss": 0.7137, "step": 240 }, { "epoch": 9.266666666666667, "grad_norm": 7.7931623458862305, "learning_rate": 7.273972602739726e-06, "loss": 0.6274, "step": 250 }, { "epoch": 9.266666666666667, "eval_loss": 0.3050971031188965, "eval_runtime": 185.0062, "eval_samples_per_second": 1.881, "eval_steps_per_second": 0.059, "eval_wer": 0.7347907134384003, "step": 250 }, { "epoch": 9.647619047619047, "grad_norm": 6.836337089538574, "learning_rate": 7.1369863013698635e-06, "loss": 0.6651, "step": 260 }, { "epoch": 10.0, "grad_norm": 1.960423231124878, "learning_rate": 7e-06, "loss": 0.6095, "step": 270 }, { "epoch": 10.380952380952381, "grad_norm": 6.1859049797058105, "learning_rate": 6.863013698630138e-06, "loss": 0.6111, "step": 280 }, { "epoch": 10.761904761904763, "grad_norm": 5.500946998596191, "learning_rate": 6.726027397260274e-06, "loss": 0.6252, "step": 290 }, { "epoch": 11.114285714285714, "grad_norm": 7.6666741371154785, "learning_rate": 6.589041095890412e-06, "loss": 0.5611, "step": 300 }, { "epoch": 11.114285714285714, "eval_loss": 0.31078749895095825, "eval_runtime": 178.31, "eval_samples_per_second": 1.952, "eval_steps_per_second": 0.062, "eval_wer": 0.7256418976364564, "step": 300 }, { "epoch": 11.495238095238095, "grad_norm": 4.770754814147949, "learning_rate": 6.452054794520548e-06, "loss": 0.5764, "step": 310 }, { "epoch": 11.876190476190477, "grad_norm": 4.720264911651611, "learning_rate": 6.315068493150685e-06, "loss": 0.5792, "step": 320 }, { "epoch": 12.228571428571428, "grad_norm": 5.056512355804443, "learning_rate": 6.178082191780822e-06, "loss": 0.5049, "step": 330 }, { "epoch": 12.60952380952381, "grad_norm": 5.898259162902832, "learning_rate": 6.04109589041096e-06, "loss": 0.5427, "step": 340 }, { "epoch": 12.99047619047619, "grad_norm": 5.173780918121338, "learning_rate": 5.904109589041096e-06, "loss": 0.5373, "step": 350 }, { "epoch": 12.99047619047619, "eval_loss": 0.3216899335384369, "eval_runtime": 173.4786, "eval_samples_per_second": 2.006, "eval_steps_per_second": 0.063, "eval_wer": 0.7399762147468569, "step": 350 }, { "epoch": 13.342857142857143, "grad_norm": 5.095024585723877, "learning_rate": 5.7671232876712335e-06, "loss": 0.4668, "step": 360 }, { "epoch": 13.723809523809523, "grad_norm": 5.228797912597656, "learning_rate": 5.63013698630137e-06, "loss": 0.4953, "step": 370 }, { "epoch": 14.076190476190476, "grad_norm": 4.96743106842041, "learning_rate": 5.493150684931507e-06, "loss": 0.463, "step": 380 }, { "epoch": 14.457142857142857, "grad_norm": 4.670540809631348, "learning_rate": 5.3561643835616435e-06, "loss": 0.4639, "step": 390 }, { "epoch": 14.838095238095239, "grad_norm": 5.489909648895264, "learning_rate": 5.219178082191782e-06, "loss": 0.4621, "step": 400 }, { "epoch": 14.838095238095239, "eval_loss": 0.33466461300849915, "eval_runtime": 174.1549, "eval_samples_per_second": 1.998, "eval_steps_per_second": 0.063, "eval_wer": 0.7304318201175368, "step": 400 }, { "epoch": 15.19047619047619, "grad_norm": 4.55846643447876, "learning_rate": 5.082191780821918e-06, "loss": 0.4221, "step": 410 }, { "epoch": 15.571428571428571, "grad_norm": 4.863076686859131, "learning_rate": 4.945205479452055e-06, "loss": 0.4294, "step": 420 }, { "epoch": 15.952380952380953, "grad_norm": 4.473245143890381, "learning_rate": 4.8082191780821926e-06, "loss": 0.4465, "step": 430 }, { "epoch": 16.304761904761904, "grad_norm": 4.792644023895264, "learning_rate": 4.671232876712329e-06, "loss": 0.3884, "step": 440 }, { "epoch": 16.685714285714287, "grad_norm": 4.901365756988525, "learning_rate": 4.534246575342466e-06, "loss": 0.4084, "step": 450 }, { "epoch": 16.685714285714287, "eval_loss": 0.34350478649139404, "eval_runtime": 168.7236, "eval_samples_per_second": 2.063, "eval_steps_per_second": 0.065, "eval_wer": 0.7191163976210705, "step": 450 }, { "epoch": 17.038095238095238, "grad_norm": 4.849997520446777, "learning_rate": 4.3972602739726035e-06, "loss": 0.3827, "step": 460 }, { "epoch": 17.419047619047618, "grad_norm": 5.026614189147949, "learning_rate": 4.260273972602741e-06, "loss": 0.3994, "step": 470 }, { "epoch": 17.8, "grad_norm": 4.951253890991211, "learning_rate": 4.123287671232877e-06, "loss": 0.3751, "step": 480 }, { "epoch": 18.152380952380952, "grad_norm": 4.608191967010498, "learning_rate": 3.9863013698630135e-06, "loss": 0.3591, "step": 490 }, { "epoch": 18.533333333333335, "grad_norm": 4.329087257385254, "learning_rate": 3.849315068493151e-06, "loss": 0.3685, "step": 500 }, { "epoch": 18.533333333333335, "eval_loss": 0.3573543429374695, "eval_runtime": 167.7585, "eval_samples_per_second": 2.074, "eval_steps_per_second": 0.066, "eval_wer": 0.7201967101916229, "step": 500 }, { "epoch": 18.914285714285715, "grad_norm": 4.682867527008057, "learning_rate": 3.7123287671232876e-06, "loss": 0.3759, "step": 510 }, { "epoch": 19.266666666666666, "grad_norm": 4.454867839813232, "learning_rate": 3.575342465753425e-06, "loss": 0.3331, "step": 520 }, { "epoch": 19.64761904761905, "grad_norm": 5.05587100982666, "learning_rate": 3.4383561643835617e-06, "loss": 0.3595, "step": 530 }, { "epoch": 20.0, "grad_norm": 2.0231711864471436, "learning_rate": 3.3013698630136985e-06, "loss": 0.3198, "step": 540 }, { "epoch": 20.38095238095238, "grad_norm": 4.62886905670166, "learning_rate": 3.164383561643836e-06, "loss": 0.3446, "step": 550 }, { "epoch": 20.38095238095238, "eval_loss": 0.36290082335472107, "eval_runtime": 169.1887, "eval_samples_per_second": 2.057, "eval_steps_per_second": 0.065, "eval_wer": 0.7294386976428693, "step": 550 }, { "epoch": 20.761904761904763, "grad_norm": 4.633991241455078, "learning_rate": 3.0273972602739726e-06, "loss": 0.3269, "step": 560 }, { "epoch": 21.114285714285714, "grad_norm": 4.232445240020752, "learning_rate": 2.8904109589041095e-06, "loss": 0.3164, "step": 570 }, { "epoch": 21.495238095238093, "grad_norm": 4.396146297454834, "learning_rate": 2.7534246575342467e-06, "loss": 0.3248, "step": 580 }, { "epoch": 21.876190476190477, "grad_norm": 4.671775817871094, "learning_rate": 2.6164383561643835e-06, "loss": 0.3297, "step": 590 }, { "epoch": 22.228571428571428, "grad_norm": 4.537514686584473, "learning_rate": 2.479452054794521e-06, "loss": 0.2998, "step": 600 }, { "epoch": 22.228571428571428, "eval_loss": 0.3707055151462555, "eval_runtime": 171.8197, "eval_samples_per_second": 2.025, "eval_steps_per_second": 0.064, "eval_wer": 0.7299481160159905, "step": 600 } ], "logging_steps": 10, "max_steps": 780, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.1553866764464423e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }