{ "best_metric": 55.83411121482864, "best_model_checkpoint": "./linshoufanfork-whisper-small-nan-tw/checkpoint-500", "epoch": 0.1607200257152041, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "grad_norm": 48.035465240478516, "learning_rate": 2.5e-06, "loss": 6.8285, "step": 25 }, { "epoch": 0.02, "grad_norm": 142.98660278320312, "learning_rate": 5e-06, "loss": 3.2493, "step": 50 }, { "epoch": 0.02, "grad_norm": 27.529985427856445, "learning_rate": 7.500000000000001e-06, "loss": 2.2763, "step": 75 }, { "epoch": 0.03, "grad_norm": 21.324289321899414, "learning_rate": 1e-05, "loss": 1.7193, "step": 100 }, { "epoch": 0.04, "grad_norm": 13.650047302246094, "learning_rate": 9.91697110594487e-06, "loss": 1.3177, "step": 125 }, { "epoch": 0.05, "grad_norm": 14.939111709594727, "learning_rate": 9.83394221188974e-06, "loss": 1.2486, "step": 150 }, { "epoch": 0.06, "grad_norm": 14.85805606842041, "learning_rate": 9.750913317834608e-06, "loss": 1.1991, "step": 175 }, { "epoch": 0.06, "grad_norm": 15.52128791809082, "learning_rate": 9.667884423779476e-06, "loss": 1.1126, "step": 200 }, { "epoch": 0.07, "grad_norm": 22.642026901245117, "learning_rate": 9.584855529724345e-06, "loss": 1.0437, "step": 225 }, { "epoch": 0.08, "grad_norm": 16.712217330932617, "learning_rate": 9.501826635669213e-06, "loss": 0.9986, "step": 250 }, { "epoch": 0.09, "grad_norm": 13.973222732543945, "learning_rate": 9.418797741614083e-06, "loss": 0.9188, "step": 275 }, { "epoch": 0.1, "grad_norm": 13.404074668884277, "learning_rate": 9.335768847558952e-06, "loss": 0.9244, "step": 300 }, { "epoch": 0.1, "grad_norm": 12.21960163116455, "learning_rate": 9.25273995350382e-06, "loss": 0.9018, "step": 325 }, { "epoch": 0.11, "grad_norm": 14.460400581359863, "learning_rate": 9.169711059448689e-06, "loss": 0.8398, "step": 350 }, { "epoch": 0.12, "grad_norm": 14.076154708862305, "learning_rate": 9.086682165393557e-06, "loss": 0.8828, "step": 375 }, { "epoch": 0.13, "grad_norm": 13.204269409179688, "learning_rate": 9.003653271338426e-06, "loss": 0.8503, "step": 400 }, { "epoch": 0.14, "grad_norm": 10.978958129882812, "learning_rate": 8.920624377283296e-06, "loss": 0.8198, "step": 425 }, { "epoch": 0.14, "grad_norm": 13.963995933532715, "learning_rate": 8.837595483228164e-06, "loss": 0.8202, "step": 450 }, { "epoch": 0.15, "grad_norm": 13.337563514709473, "learning_rate": 8.754566589173033e-06, "loss": 0.7536, "step": 475 }, { "epoch": 0.16, "grad_norm": 12.711252212524414, "learning_rate": 8.671537695117903e-06, "loss": 0.7938, "step": 500 }, { "epoch": 0.16, "eval_cer": 55.83411121482864, "eval_loss": 0.7767874002456665, "eval_runtime": 1802.202, "eval_samples_per_second": 2.461, "eval_steps_per_second": 0.308, "step": 500 } ], "logging_steps": 25, "max_steps": 3111, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "total_flos": 2.30868320256e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }