{ "best_metric": 0.8715686274509804, "best_model_checkpoint": "resnet-50-finetuned-oxfordflowers/checkpoint-384", "epoch": 15.0, "eval_steps": 500, "global_step": 480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15625, "grad_norm": 2.278627395629883, "learning_rate": 0.0009921875, "loss": 4.6544, "step": 5 }, { "epoch": 0.3125, "grad_norm": 2.4123668670654297, "learning_rate": 0.000984375, "loss": 4.6374, "step": 10 }, { "epoch": 0.46875, "grad_norm": 1.4760686159133911, "learning_rate": 0.0009765625, "loss": 4.6478, "step": 15 }, { "epoch": 0.625, "grad_norm": 1.425757646560669, "learning_rate": 0.00096875, "loss": 4.6103, "step": 20 }, { "epoch": 0.78125, "grad_norm": 0.9740359783172607, "learning_rate": 0.0009609375, "loss": 4.5578, "step": 25 }, { "epoch": 0.9375, "grad_norm": 1.2600048780441284, "learning_rate": 0.000953125, "loss": 4.4813, "step": 30 }, { "epoch": 1.0, "eval_accuracy": 0.3176470588235294, "eval_f1": 0.2599013976774049, "eval_loss": 4.193436622619629, "eval_precision": 0.35219345124204177, "eval_recall": 0.3176470588235294, "eval_runtime": 14.8413, "eval_samples_per_second": 68.727, "eval_steps_per_second": 2.156, "step": 32 }, { "epoch": 1.09375, "grad_norm": 1.5796399116516113, "learning_rate": 0.0009453125, "loss": 4.2318, "step": 35 }, { "epoch": 1.25, "grad_norm": 1.804262638092041, "learning_rate": 0.0009375, "loss": 3.8701, "step": 40 }, { "epoch": 1.40625, "grad_norm": 2.242269277572632, "learning_rate": 0.0009296875000000001, "loss": 3.4891, "step": 45 }, { "epoch": 1.5625, "grad_norm": 2.3672704696655273, "learning_rate": 0.0009218750000000001, "loss": 3.0951, "step": 50 }, { "epoch": 1.71875, "grad_norm": 2.732805013656616, "learning_rate": 0.0009140625, "loss": 2.8821, "step": 55 }, { "epoch": 1.875, "grad_norm": 3.0011582374572754, "learning_rate": 0.00090625, "loss": 2.6507, "step": 60 }, { "epoch": 2.0, "eval_accuracy": 0.538235294117647, "eval_f1": 0.49295429074645647, "eval_loss": 1.8715856075286865, "eval_precision": 0.5792423763013405, "eval_recall": 0.538235294117647, "eval_runtime": 15.933, "eval_samples_per_second": 64.018, "eval_steps_per_second": 2.008, "step": 64 }, { "epoch": 2.03125, "grad_norm": 2.653271436691284, "learning_rate": 0.0008984375, "loss": 2.4669, "step": 65 }, { "epoch": 2.1875, "grad_norm": 2.3712799549102783, "learning_rate": 0.000890625, "loss": 1.8497, "step": 70 }, { "epoch": 2.34375, "grad_norm": 2.974799156188965, "learning_rate": 0.0008828125, "loss": 1.5857, "step": 75 }, { "epoch": 2.5, "grad_norm": 2.836200714111328, "learning_rate": 0.000875, "loss": 1.4951, "step": 80 }, { "epoch": 2.65625, "grad_norm": 2.4715311527252197, "learning_rate": 0.0008671875, "loss": 1.281, "step": 85 }, { "epoch": 2.8125, "grad_norm": 2.7568747997283936, "learning_rate": 0.000859375, "loss": 1.2142, "step": 90 }, { "epoch": 2.96875, "grad_norm": 3.03707218170166, "learning_rate": 0.0008515625, "loss": 1.257, "step": 95 }, { "epoch": 3.0, "eval_accuracy": 0.7215686274509804, "eval_f1": 0.708531826075014, "eval_loss": 1.0998024940490723, "eval_precision": 0.7663447807478948, "eval_recall": 0.7215686274509804, "eval_runtime": 14.8914, "eval_samples_per_second": 68.496, "eval_steps_per_second": 2.149, "step": 96 }, { "epoch": 3.125, "grad_norm": 1.8445321321487427, "learning_rate": 0.00084375, "loss": 0.6848, "step": 100 }, { "epoch": 3.28125, "grad_norm": 2.4257466793060303, "learning_rate": 0.0008359375, "loss": 0.6886, "step": 105 }, { "epoch": 3.4375, "grad_norm": 2.526425361633301, "learning_rate": 0.000828125, "loss": 0.5957, "step": 110 }, { "epoch": 3.59375, "grad_norm": 2.6436715126037598, "learning_rate": 0.0008203125, "loss": 0.4805, "step": 115 }, { "epoch": 3.75, "grad_norm": 2.039067506790161, "learning_rate": 0.0008125000000000001, "loss": 0.4231, "step": 120 }, { "epoch": 3.90625, "grad_norm": 2.0841586589813232, "learning_rate": 0.0008046875000000001, "loss": 0.5333, "step": 125 }, { "epoch": 4.0, "eval_accuracy": 0.7421568627450981, "eval_f1": 0.72961394279528, "eval_loss": 0.9723543524742126, "eval_precision": 0.7875164989046498, "eval_recall": 0.7421568627450981, "eval_runtime": 15.0411, "eval_samples_per_second": 67.814, "eval_steps_per_second": 2.127, "step": 128 }, { "epoch": 4.0625, "grad_norm": 1.630177617073059, "learning_rate": 0.0007968750000000001, "loss": 0.4732, "step": 130 }, { "epoch": 4.21875, "grad_norm": 1.7786303758621216, "learning_rate": 0.0007890625, "loss": 0.2683, "step": 135 }, { "epoch": 4.375, "grad_norm": 1.009931206703186, "learning_rate": 0.00078125, "loss": 0.2352, "step": 140 }, { "epoch": 4.53125, "grad_norm": 1.3342106342315674, "learning_rate": 0.0007734375, "loss": 0.2391, "step": 145 }, { "epoch": 4.6875, "grad_norm": 1.673535704612732, "learning_rate": 0.000765625, "loss": 0.2227, "step": 150 }, { "epoch": 4.84375, "grad_norm": 0.9774993062019348, "learning_rate": 0.0007578125, "loss": 0.2422, "step": 155 }, { "epoch": 5.0, "grad_norm": 1.4892919063568115, "learning_rate": 0.00075, "loss": 0.2506, "step": 160 }, { "epoch": 5.0, "eval_accuracy": 0.7627450980392156, "eval_f1": 0.7565831926906549, "eval_loss": 0.8243059515953064, "eval_precision": 0.7974773644642521, "eval_recall": 0.7627450980392156, "eval_runtime": 15.4543, "eval_samples_per_second": 66.001, "eval_steps_per_second": 2.071, "step": 160 }, { "epoch": 5.15625, "grad_norm": 1.0996050834655762, "learning_rate": 0.0007421875, "loss": 0.1073, "step": 165 }, { "epoch": 5.3125, "grad_norm": 0.7374542951583862, "learning_rate": 0.000734375, "loss": 0.0825, "step": 170 }, { "epoch": 5.46875, "grad_norm": 1.6792224645614624, "learning_rate": 0.0007265625, "loss": 0.1156, "step": 175 }, { "epoch": 5.625, "grad_norm": 0.5832417607307434, "learning_rate": 0.00071875, "loss": 0.1354, "step": 180 }, { "epoch": 5.78125, "grad_norm": 1.3767541646957397, "learning_rate": 0.0007109375, "loss": 0.1285, "step": 185 }, { "epoch": 5.9375, "grad_norm": 0.7182935476303101, "learning_rate": 0.000703125, "loss": 0.0689, "step": 190 }, { "epoch": 6.0, "eval_accuracy": 0.8147058823529412, "eval_f1": 0.811140224680009, "eval_loss": 0.7067364454269409, "eval_precision": 0.8482235528219867, "eval_recall": 0.8147058823529412, "eval_runtime": 14.9102, "eval_samples_per_second": 68.409, "eval_steps_per_second": 2.146, "step": 192 }, { "epoch": 6.09375, "grad_norm": 1.419190764427185, "learning_rate": 0.0006953125, "loss": 0.0833, "step": 195 }, { "epoch": 6.25, "grad_norm": 0.5318430662155151, "learning_rate": 0.0006875, "loss": 0.0637, "step": 200 }, { "epoch": 6.40625, "grad_norm": 0.5169427990913391, "learning_rate": 0.0006796875000000001, "loss": 0.0549, "step": 205 }, { "epoch": 6.5625, "grad_norm": 1.499566912651062, "learning_rate": 0.0006718750000000001, "loss": 0.0618, "step": 210 }, { "epoch": 6.71875, "grad_norm": 0.38381507992744446, "learning_rate": 0.0006640625, "loss": 0.0498, "step": 215 }, { "epoch": 6.875, "grad_norm": 0.6742156744003296, "learning_rate": 0.00065625, "loss": 0.0325, "step": 220 }, { "epoch": 7.0, "eval_accuracy": 0.8205882352941176, "eval_f1": 0.8174916081835706, "eval_loss": 0.636988639831543, "eval_precision": 0.8427513390748685, "eval_recall": 0.8205882352941176, "eval_runtime": 14.9409, "eval_samples_per_second": 68.269, "eval_steps_per_second": 2.142, "step": 224 }, { "epoch": 7.03125, "grad_norm": 0.17029204964637756, "learning_rate": 0.0006484375, "loss": 0.0245, "step": 225 }, { "epoch": 7.1875, "grad_norm": 1.324432134628296, "learning_rate": 0.000640625, "loss": 0.0342, "step": 230 }, { "epoch": 7.34375, "grad_norm": 0.4813753366470337, "learning_rate": 0.0006328125, "loss": 0.027, "step": 235 }, { "epoch": 7.5, "grad_norm": 0.2641347348690033, "learning_rate": 0.000625, "loss": 0.0188, "step": 240 }, { "epoch": 7.65625, "grad_norm": 0.21439166367053986, "learning_rate": 0.0006171875, "loss": 0.0197, "step": 245 }, { "epoch": 7.8125, "grad_norm": 0.1329621970653534, "learning_rate": 0.000609375, "loss": 0.011, "step": 250 }, { "epoch": 7.96875, "grad_norm": 0.10818523168563843, "learning_rate": 0.0006015625, "loss": 0.0132, "step": 255 }, { "epoch": 8.0, "eval_accuracy": 0.8411764705882353, "eval_f1": 0.8389291224616064, "eval_loss": 0.5773842334747314, "eval_precision": 0.8617484911602559, "eval_recall": 0.8411764705882353, "eval_runtime": 14.9945, "eval_samples_per_second": 68.025, "eval_steps_per_second": 2.134, "step": 256 }, { "epoch": 8.125, "grad_norm": 0.10684143751859665, "learning_rate": 0.00059375, "loss": 0.0112, "step": 260 }, { "epoch": 8.28125, "grad_norm": 0.12075643986463547, "learning_rate": 0.0005859375, "loss": 0.0126, "step": 265 }, { "epoch": 8.4375, "grad_norm": 0.04288178309798241, "learning_rate": 0.000578125, "loss": 0.0102, "step": 270 }, { "epoch": 8.59375, "grad_norm": 0.18693001568317413, "learning_rate": 0.0005703125, "loss": 0.0083, "step": 275 }, { "epoch": 8.75, "grad_norm": 0.5971263647079468, "learning_rate": 0.0005625000000000001, "loss": 0.0129, "step": 280 }, { "epoch": 8.90625, "grad_norm": 0.3724566698074341, "learning_rate": 0.0005546875000000001, "loss": 0.0117, "step": 285 }, { "epoch": 9.0, "eval_accuracy": 0.8558823529411764, "eval_f1": 0.8542249227576717, "eval_loss": 0.5469252467155457, "eval_precision": 0.8726222622120454, "eval_recall": 0.8558823529411764, "eval_runtime": 15.713, "eval_samples_per_second": 64.915, "eval_steps_per_second": 2.037, "step": 288 }, { "epoch": 9.0625, "grad_norm": 0.3750954270362854, "learning_rate": 0.000546875, "loss": 0.0111, "step": 290 }, { "epoch": 9.21875, "grad_norm": 0.054302603006362915, "learning_rate": 0.0005390625, "loss": 0.0109, "step": 295 }, { "epoch": 9.375, "grad_norm": 0.037645090371370316, "learning_rate": 0.00053125, "loss": 0.0066, "step": 300 }, { "epoch": 9.53125, "grad_norm": 0.14295075833797455, "learning_rate": 0.0005234375, "loss": 0.0074, "step": 305 }, { "epoch": 9.6875, "grad_norm": 0.21661585569381714, "learning_rate": 0.000515625, "loss": 0.0444, "step": 310 }, { "epoch": 9.84375, "grad_norm": 0.043486110866069794, "learning_rate": 0.0005078125, "loss": 0.0064, "step": 315 }, { "epoch": 10.0, "grad_norm": 0.08744034916162491, "learning_rate": 0.0005, "loss": 0.0066, "step": 320 }, { "epoch": 10.0, "eval_accuracy": 0.8607843137254902, "eval_f1": 0.8574980705022552, "eval_loss": 0.5384429097175598, "eval_precision": 0.8722321747840778, "eval_recall": 0.8607843137254902, "eval_runtime": 15.0735, "eval_samples_per_second": 67.668, "eval_steps_per_second": 2.123, "step": 320 }, { "epoch": 10.15625, "grad_norm": 0.03366677463054657, "learning_rate": 0.0004921875, "loss": 0.0065, "step": 325 }, { "epoch": 10.3125, "grad_norm": 0.02908381260931492, "learning_rate": 0.000484375, "loss": 0.0046, "step": 330 }, { "epoch": 10.46875, "grad_norm": 0.05160678178071976, "learning_rate": 0.0004765625, "loss": 0.0104, "step": 335 }, { "epoch": 10.625, "grad_norm": 0.6550126671791077, "learning_rate": 0.00046875, "loss": 0.0081, "step": 340 }, { "epoch": 10.78125, "grad_norm": 0.060364119708538055, "learning_rate": 0.00046093750000000003, "loss": 0.0042, "step": 345 }, { "epoch": 10.9375, "grad_norm": 0.05566185340285301, "learning_rate": 0.000453125, "loss": 0.0072, "step": 350 }, { "epoch": 11.0, "eval_accuracy": 0.8686274509803922, "eval_f1": 0.864966910867614, "eval_loss": 0.5246109366416931, "eval_precision": 0.8783028790381732, "eval_recall": 0.8686274509803922, "eval_runtime": 15.3132, "eval_samples_per_second": 66.609, "eval_steps_per_second": 2.09, "step": 352 }, { "epoch": 11.09375, "grad_norm": 0.04832284152507782, "learning_rate": 0.0004453125, "loss": 0.0038, "step": 355 }, { "epoch": 11.25, "grad_norm": 0.0449366569519043, "learning_rate": 0.0004375, "loss": 0.0032, "step": 360 }, { "epoch": 11.40625, "grad_norm": 0.09296231716871262, "learning_rate": 0.0004296875, "loss": 0.0054, "step": 365 }, { "epoch": 11.5625, "grad_norm": 0.025081997737288475, "learning_rate": 0.000421875, "loss": 0.007, "step": 370 }, { "epoch": 11.71875, "grad_norm": 0.10262420773506165, "learning_rate": 0.0004140625, "loss": 0.0043, "step": 375 }, { "epoch": 11.875, "grad_norm": 0.23732630908489227, "learning_rate": 0.00040625000000000004, "loss": 0.0068, "step": 380 }, { "epoch": 12.0, "eval_accuracy": 0.8715686274509804, "eval_f1": 0.8679217967545507, "eval_loss": 0.5129648447036743, "eval_precision": 0.8790339018280195, "eval_recall": 0.8715686274509804, "eval_runtime": 14.8883, "eval_samples_per_second": 68.51, "eval_steps_per_second": 2.149, "step": 384 }, { "epoch": 12.03125, "grad_norm": 0.02126588113605976, "learning_rate": 0.00039843750000000003, "loss": 0.0043, "step": 385 }, { "epoch": 12.1875, "grad_norm": 0.01741660013794899, "learning_rate": 0.000390625, "loss": 0.0046, "step": 390 }, { "epoch": 12.34375, "grad_norm": 0.030482813715934753, "learning_rate": 0.0003828125, "loss": 0.0025, "step": 395 }, { "epoch": 12.5, "grad_norm": 0.02115827612578869, "learning_rate": 0.000375, "loss": 0.0022, "step": 400 }, { "epoch": 12.65625, "grad_norm": 0.022862544283270836, "learning_rate": 0.0003671875, "loss": 0.0027, "step": 405 }, { "epoch": 12.8125, "grad_norm": 0.027194203808903694, "learning_rate": 0.000359375, "loss": 0.0037, "step": 410 }, { "epoch": 12.96875, "grad_norm": 0.10136008262634277, "learning_rate": 0.0003515625, "loss": 0.0045, "step": 415 }, { "epoch": 13.0, "eval_accuracy": 0.8715686274509804, "eval_f1": 0.8690991954788165, "eval_loss": 0.5037897825241089, "eval_precision": 0.8814188033615278, "eval_recall": 0.8715686274509804, "eval_runtime": 14.8682, "eval_samples_per_second": 68.603, "eval_steps_per_second": 2.152, "step": 416 }, { "epoch": 13.125, "grad_norm": 0.012970151379704475, "learning_rate": 0.00034375, "loss": 0.002, "step": 420 }, { "epoch": 13.28125, "grad_norm": 0.0718131735920906, "learning_rate": 0.00033593750000000003, "loss": 0.0109, "step": 425 }, { "epoch": 13.4375, "grad_norm": 0.03494563698768616, "learning_rate": 0.000328125, "loss": 0.0019, "step": 430 }, { "epoch": 13.59375, "grad_norm": 0.011698140762746334, "learning_rate": 0.0003203125, "loss": 0.0029, "step": 435 }, { "epoch": 13.75, "grad_norm": 0.03332947567105293, "learning_rate": 0.0003125, "loss": 0.0031, "step": 440 }, { "epoch": 13.90625, "grad_norm": 0.01361924409866333, "learning_rate": 0.0003046875, "loss": 0.0025, "step": 445 }, { "epoch": 14.0, "eval_accuracy": 0.85, "eval_f1": 0.8447940576411276, "eval_loss": 0.5485906004905701, "eval_precision": 0.8627436785308759, "eval_recall": 0.85, "eval_runtime": 14.9785, "eval_samples_per_second": 68.098, "eval_steps_per_second": 2.136, "step": 448 }, { "epoch": 14.0625, "grad_norm": 0.07173674553632736, "learning_rate": 0.000296875, "loss": 0.0032, "step": 450 }, { "epoch": 14.21875, "grad_norm": 0.01797347702085972, "learning_rate": 0.0002890625, "loss": 0.0023, "step": 455 }, { "epoch": 14.375, "grad_norm": 0.028536120429635048, "learning_rate": 0.00028125000000000003, "loss": 0.0093, "step": 460 }, { "epoch": 14.53125, "grad_norm": 0.01648605801165104, "learning_rate": 0.0002734375, "loss": 0.0037, "step": 465 }, { "epoch": 14.6875, "grad_norm": 0.029014134779572487, "learning_rate": 0.000265625, "loss": 0.0024, "step": 470 }, { "epoch": 14.84375, "grad_norm": 0.018230870366096497, "learning_rate": 0.0002578125, "loss": 0.0044, "step": 475 }, { "epoch": 15.0, "grad_norm": 0.017591355368494987, "learning_rate": 0.00025, "loss": 0.0029, "step": 480 }, { "epoch": 15.0, "eval_accuracy": 0.8637254901960785, "eval_f1": 0.8618813910792286, "eval_loss": 0.49917495250701904, "eval_precision": 0.8735624397389105, "eval_recall": 0.8637254901960785, "eval_runtime": 15.3781, "eval_samples_per_second": 66.328, "eval_steps_per_second": 2.081, "step": 480 }, { "epoch": 15.0, "step": 480, "total_flos": 3.27733077030912e+17, "train_loss": 0.6847447718784678, "train_runtime": 581.9505, "train_samples_per_second": 35.055, "train_steps_per_second": 1.1 } ], "logging_steps": 5, "max_steps": 640, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 3, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 3 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.27733077030912e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }