{ "best_metric": 0.16783047, "best_model_checkpoint": "/home/patrickbarker/output/qwen2-vl-7b-instruct/v5-20241227-205641/checkpoint-75", "epoch": 5.0, "eval_steps": 200, "global_step": 75, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "acc": 0.79618824, "epoch": 0.06666666666666667, "grad_norm": 60.2325912978456, "learning_rate": 0.0, "loss": 1.4069984, "memory(GiB)": 54.58, "step": 1, "train_speed(iter/s)": 0.02356 }, { "acc": 0.82087338, "epoch": 0.3333333333333333, "grad_norm": 24.5612524344975, "learning_rate": 9.995106621986608e-06, "loss": 1.17183995, "memory(GiB)": 73.76, "step": 5, "train_speed(iter/s)": 0.030848 }, { "acc": 0.83782043, "epoch": 0.6666666666666666, "grad_norm": 20.128635487004736, "learning_rate": 9.82484209015679e-06, "loss": 0.9988678, "memory(GiB)": 73.76, "step": 10, "train_speed(iter/s)": 0.032078 }, { "acc": 0.86735916, "epoch": 1.0, "grad_norm": 8.494494640812043, "learning_rate": 9.419403729161433e-06, "loss": 0.72015114, "memory(GiB)": 73.76, "step": 15, "train_speed(iter/s)": 0.032504 }, { "acc": 0.89613667, "epoch": 1.3333333333333333, "grad_norm": 4.0574812705218255, "learning_rate": 8.798555579374133e-06, "loss": 0.58566475, "memory(GiB)": 73.76, "step": 20, "train_speed(iter/s)": 0.032659 }, { "acc": 0.88684921, "epoch": 1.6666666666666665, "grad_norm": 6.742631309792843, "learning_rate": 7.9925623347132e-06, "loss": 0.58472939, "memory(GiB)": 73.76, "step": 25, "train_speed(iter/s)": 0.032798 }, { "acc": 0.92209816, "epoch": 2.0, "grad_norm": 5.05572579013588, "learning_rate": 7.040714019411203e-06, "loss": 0.45826292, "memory(GiB)": 73.76, "step": 30, "train_speed(iter/s)": 0.032892 }, { "acc": 0.9140625, "epoch": 2.3333333333333335, "grad_norm": 1.9502052102657774, "learning_rate": 5.989410703967095e-06, "loss": 0.45564661, "memory(GiB)": 73.76, "step": 35, "train_speed(iter/s)": 0.032925 }, { "acc": 0.91242561, "epoch": 2.6666666666666665, "grad_norm": 2.7058645210729177, "learning_rate": 4.8899006252776434e-06, "loss": 0.45886307, "memory(GiB)": 73.76, "step": 40, "train_speed(iter/s)": 0.032984 }, { "acc": 0.92076473, "epoch": 3.0, "grad_norm": 3.446087600085013, "learning_rate": 3.7957819715681576e-06, "loss": 0.45811167, "memory(GiB)": 73.76, "step": 45, "train_speed(iter/s)": 0.033025 }, { "acc": 0.92433033, "epoch": 3.3333333333333335, "grad_norm": 2.5558432936438447, "learning_rate": 2.760390113454102e-06, "loss": 0.39850535, "memory(GiB)": 73.76, "step": 50, "train_speed(iter/s)": 0.033034 }, { "acc": 0.93854742, "epoch": 3.6666666666666665, "grad_norm": 3.6644266665911105, "learning_rate": 1.8341976466612901e-06, "loss": 0.33202767, "memory(GiB)": 73.76, "step": 55, "train_speed(iter/s)": 0.033064 }, { "acc": 0.91703873, "epoch": 4.0, "grad_norm": 3.2494285333084614, "learning_rate": 1.0623539873980678e-06, "loss": 0.43731127, "memory(GiB)": 73.76, "step": 60, "train_speed(iter/s)": 0.033091 }, { "acc": 0.92857151, "epoch": 4.333333333333333, "grad_norm": 2.1823786608490225, "learning_rate": 4.824844585528e-07, "loss": 0.35523658, "memory(GiB)": 73.76, "step": 65, "train_speed(iter/s)": 0.033095 }, { "acc": 0.93809528, "epoch": 4.666666666666667, "grad_norm": 2.2497604778446716, "learning_rate": 1.2285615540324237e-07, "loss": 0.34353948, "memory(GiB)": 73.76, "step": 70, "train_speed(iter/s)": 0.033115 }, { "acc": 0.93839283, "epoch": 5.0, "grad_norm": 2.1104371354292435, "learning_rate": 1e-09, "loss": 0.31291947, "memory(GiB)": 73.76, "step": 75, "train_speed(iter/s)": 0.033132 }, { "epoch": 5.0, "eval_acc": 0.9376854599406528, "eval_loss": 0.1678304672241211, "eval_runtime": 9.9956, "eval_samples_per_second": 1.201, "eval_steps_per_second": 0.2, "step": 75 } ], "logging_steps": 5, "max_steps": 75, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 181464165515264.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }