diff --git "a/checkpoint-7656/trainer_state.json" "b/checkpoint-7656/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-7656/trainer_state.json" @@ -0,0 +1,13863 @@ +{ + "best_metric": 0.96282727, + "best_model_checkpoint": "/data1/wjx/model/swift/output/v1_prompt/output/internvl2-26b/v0-20240806-203157/checkpoint-7656", + "epoch": 6.0, + "eval_steps": 1, + "global_step": 7656, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "acc": 0.51014894, + "epoch": 0.0, + "learning_rate": 1.3054830287206266e-07, + "loss": 1.93593538, + "memory(GiB)": 59.04, + "step": 1, + "train_speed(iter/s)": 0.032837 + }, + { + "acc": 0.53850144, + "epoch": 0.0, + "learning_rate": 6.527415143603134e-07, + "loss": 1.74545956, + "memory(GiB)": 64.54, + "step": 5, + "train_speed(iter/s)": 0.035227 + }, + { + "acc": 0.5365521, + "epoch": 0.01, + "learning_rate": 1.3054830287206267e-06, + "loss": 1.77564487, + "memory(GiB)": 75.45, + "step": 10, + "train_speed(iter/s)": 0.034818 + }, + { + "acc": 0.53638253, + "epoch": 0.01, + "learning_rate": 1.9582245430809403e-06, + "loss": 1.75824242, + "memory(GiB)": 75.45, + "step": 15, + "train_speed(iter/s)": 0.035152 + }, + { + "acc": 0.5359807, + "epoch": 0.02, + "learning_rate": 2.6109660574412534e-06, + "loss": 1.74080467, + "memory(GiB)": 78.2, + "step": 20, + "train_speed(iter/s)": 0.034968 + }, + { + "acc": 0.52935181, + "epoch": 0.02, + "learning_rate": 3.263707571801567e-06, + "loss": 1.78961926, + "memory(GiB)": 71.11, + "step": 25, + "train_speed(iter/s)": 0.035135 + }, + { + "acc": 0.52923927, + "epoch": 0.02, + "learning_rate": 3.9164490861618806e-06, + "loss": 1.77055035, + "memory(GiB)": 71.11, + "step": 30, + "train_speed(iter/s)": 0.035279 + }, + { + "acc": 0.51313214, + "epoch": 0.03, + "learning_rate": 4.569190600522193e-06, + "loss": 1.85138378, + "memory(GiB)": 71.11, + "step": 35, + "train_speed(iter/s)": 0.035382 + }, + { + "acc": 0.53307762, + "epoch": 0.03, + "learning_rate": 5.221932114882507e-06, + "loss": 1.75060291, + "memory(GiB)": 71.11, + "step": 40, + "train_speed(iter/s)": 0.035438 + }, + { + "acc": 0.54460502, + "epoch": 0.04, + "learning_rate": 5.87467362924282e-06, + "loss": 1.69541492, + "memory(GiB)": 71.11, + "step": 45, + "train_speed(iter/s)": 0.035472 + }, + { + "acc": 0.53159156, + "epoch": 0.04, + "learning_rate": 6.527415143603134e-06, + "loss": 1.75854664, + "memory(GiB)": 71.11, + "step": 50, + "train_speed(iter/s)": 0.035355 + }, + { + "acc": 0.54251604, + "epoch": 0.04, + "learning_rate": 7.180156657963447e-06, + "loss": 1.72001286, + "memory(GiB)": 71.11, + "step": 55, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.54704895, + "epoch": 0.05, + "learning_rate": 7.832898172323761e-06, + "loss": 1.69055214, + "memory(GiB)": 71.11, + "step": 60, + "train_speed(iter/s)": 0.035313 + }, + { + "acc": 0.54154701, + "epoch": 0.05, + "learning_rate": 8.485639686684073e-06, + "loss": 1.73966904, + "memory(GiB)": 71.11, + "step": 65, + "train_speed(iter/s)": 0.035332 + }, + { + "acc": 0.54317932, + "epoch": 0.05, + "learning_rate": 9.138381201044387e-06, + "loss": 1.72243404, + "memory(GiB)": 71.11, + "step": 70, + "train_speed(iter/s)": 0.035356 + }, + { + "acc": 0.54935493, + "epoch": 0.06, + "learning_rate": 9.7911227154047e-06, + "loss": 1.67216759, + "memory(GiB)": 71.11, + "step": 75, + "train_speed(iter/s)": 0.03538 + }, + { + "acc": 0.5512825, + "epoch": 0.06, + "learning_rate": 1.0443864229765014e-05, + "loss": 1.63593922, + "memory(GiB)": 71.11, + "step": 80, + "train_speed(iter/s)": 0.035391 + }, + { + "acc": 0.55266647, + "epoch": 0.07, + "learning_rate": 1.1096605744125327e-05, + "loss": 1.64257793, + "memory(GiB)": 71.11, + "step": 85, + "train_speed(iter/s)": 0.035413 + }, + { + "acc": 0.5626039, + "epoch": 0.07, + "learning_rate": 1.174934725848564e-05, + "loss": 1.62862835, + "memory(GiB)": 71.11, + "step": 90, + "train_speed(iter/s)": 0.035346 + }, + { + "acc": 0.55852866, + "epoch": 0.07, + "learning_rate": 1.2402088772845953e-05, + "loss": 1.66630707, + "memory(GiB)": 71.11, + "step": 95, + "train_speed(iter/s)": 0.035372 + }, + { + "acc": 0.56864738, + "epoch": 0.08, + "learning_rate": 1.3054830287206268e-05, + "loss": 1.5820467, + "memory(GiB)": 71.11, + "step": 100, + "train_speed(iter/s)": 0.035404 + }, + { + "acc": 0.57975974, + "epoch": 0.08, + "learning_rate": 1.370757180156658e-05, + "loss": 1.50294018, + "memory(GiB)": 71.11, + "step": 105, + "train_speed(iter/s)": 0.035429 + }, + { + "acc": 0.56583891, + "epoch": 0.09, + "learning_rate": 1.4360313315926893e-05, + "loss": 1.5526598, + "memory(GiB)": 71.11, + "step": 110, + "train_speed(iter/s)": 0.035444 + }, + { + "acc": 0.57876883, + "epoch": 0.09, + "learning_rate": 1.5013054830287207e-05, + "loss": 1.52297754, + "memory(GiB)": 71.11, + "step": 115, + "train_speed(iter/s)": 0.03546 + }, + { + "acc": 0.56726651, + "epoch": 0.09, + "learning_rate": 1.5665796344647522e-05, + "loss": 1.5246254, + "memory(GiB)": 71.11, + "step": 120, + "train_speed(iter/s)": 0.035468 + }, + { + "acc": 0.57492142, + "epoch": 0.1, + "learning_rate": 1.6318537859007836e-05, + "loss": 1.51940918, + "memory(GiB)": 71.11, + "step": 125, + "train_speed(iter/s)": 0.035473 + }, + { + "acc": 0.57668262, + "epoch": 0.1, + "learning_rate": 1.6971279373368146e-05, + "loss": 1.49961472, + "memory(GiB)": 71.11, + "step": 130, + "train_speed(iter/s)": 0.03548 + }, + { + "acc": 0.58218517, + "epoch": 0.11, + "learning_rate": 1.762402088772846e-05, + "loss": 1.46871567, + "memory(GiB)": 71.11, + "step": 135, + "train_speed(iter/s)": 0.035434 + }, + { + "acc": 0.5942565, + "epoch": 0.11, + "learning_rate": 1.8276762402088773e-05, + "loss": 1.41751032, + "memory(GiB)": 71.11, + "step": 140, + "train_speed(iter/s)": 0.035445 + }, + { + "acc": 0.58242044, + "epoch": 0.11, + "learning_rate": 1.8929503916449087e-05, + "loss": 1.454846, + "memory(GiB)": 71.11, + "step": 145, + "train_speed(iter/s)": 0.035456 + }, + { + "acc": 0.59025207, + "epoch": 0.12, + "learning_rate": 1.95822454308094e-05, + "loss": 1.44744148, + "memory(GiB)": 71.11, + "step": 150, + "train_speed(iter/s)": 0.035462 + }, + { + "acc": 0.60451708, + "epoch": 0.12, + "learning_rate": 2.0234986945169714e-05, + "loss": 1.38559418, + "memory(GiB)": 71.11, + "step": 155, + "train_speed(iter/s)": 0.035424 + }, + { + "acc": 0.5968575, + "epoch": 0.13, + "learning_rate": 2.0887728459530027e-05, + "loss": 1.38553591, + "memory(GiB)": 71.11, + "step": 160, + "train_speed(iter/s)": 0.035433 + }, + { + "acc": 0.60566149, + "epoch": 0.13, + "learning_rate": 2.154046997389034e-05, + "loss": 1.3881608, + "memory(GiB)": 71.11, + "step": 165, + "train_speed(iter/s)": 0.035438 + }, + { + "acc": 0.59593248, + "epoch": 0.13, + "learning_rate": 2.2193211488250655e-05, + "loss": 1.41851988, + "memory(GiB)": 71.11, + "step": 170, + "train_speed(iter/s)": 0.035445 + }, + { + "acc": 0.60208702, + "epoch": 0.14, + "learning_rate": 2.2845953002610968e-05, + "loss": 1.37305822, + "memory(GiB)": 71.11, + "step": 175, + "train_speed(iter/s)": 0.035454 + }, + { + "acc": 0.60938406, + "epoch": 0.14, + "learning_rate": 2.349869451697128e-05, + "loss": 1.37933292, + "memory(GiB)": 71.11, + "step": 180, + "train_speed(iter/s)": 0.035385 + }, + { + "acc": 0.61727815, + "epoch": 0.14, + "learning_rate": 2.4151436031331595e-05, + "loss": 1.3533494, + "memory(GiB)": 71.11, + "step": 185, + "train_speed(iter/s)": 0.035394 + }, + { + "acc": 0.60119219, + "epoch": 0.15, + "learning_rate": 2.4804177545691905e-05, + "loss": 1.36805573, + "memory(GiB)": 71.11, + "step": 190, + "train_speed(iter/s)": 0.035403 + }, + { + "acc": 0.59402022, + "epoch": 0.15, + "learning_rate": 2.545691906005222e-05, + "loss": 1.43324385, + "memory(GiB)": 71.11, + "step": 195, + "train_speed(iter/s)": 0.03533 + }, + { + "acc": 0.59447851, + "epoch": 0.16, + "learning_rate": 2.6109660574412536e-05, + "loss": 1.40030098, + "memory(GiB)": 71.11, + "step": 200, + "train_speed(iter/s)": 0.035306 + }, + { + "acc": 0.60737519, + "epoch": 0.16, + "learning_rate": 2.6762402088772846e-05, + "loss": 1.3658123, + "memory(GiB)": 71.11, + "step": 205, + "train_speed(iter/s)": 0.035312 + }, + { + "acc": 0.60835319, + "epoch": 0.16, + "learning_rate": 2.741514360313316e-05, + "loss": 1.36259966, + "memory(GiB)": 71.11, + "step": 210, + "train_speed(iter/s)": 0.03532 + }, + { + "acc": 0.60979681, + "epoch": 0.17, + "learning_rate": 2.8067885117493477e-05, + "loss": 1.33051977, + "memory(GiB)": 71.11, + "step": 215, + "train_speed(iter/s)": 0.035329 + }, + { + "acc": 0.62091312, + "epoch": 0.17, + "learning_rate": 2.8720626631853787e-05, + "loss": 1.3052928, + "memory(GiB)": 71.11, + "step": 220, + "train_speed(iter/s)": 0.035339 + }, + { + "acc": 0.62655573, + "epoch": 0.18, + "learning_rate": 2.9373368146214104e-05, + "loss": 1.29806547, + "memory(GiB)": 71.11, + "step": 225, + "train_speed(iter/s)": 0.035353 + }, + { + "acc": 0.60786376, + "epoch": 0.18, + "learning_rate": 3.0026109660574414e-05, + "loss": 1.3772687, + "memory(GiB)": 71.11, + "step": 230, + "train_speed(iter/s)": 0.03536 + }, + { + "acc": 0.60273538, + "epoch": 0.18, + "learning_rate": 3.067885117493473e-05, + "loss": 1.36951752, + "memory(GiB)": 71.11, + "step": 235, + "train_speed(iter/s)": 0.035368 + }, + { + "acc": 0.61111588, + "epoch": 0.19, + "learning_rate": 3.1331592689295045e-05, + "loss": 1.35591669, + "memory(GiB)": 71.11, + "step": 240, + "train_speed(iter/s)": 0.035378 + }, + { + "acc": 0.613943, + "epoch": 0.19, + "learning_rate": 3.1984334203655355e-05, + "loss": 1.36014338, + "memory(GiB)": 71.11, + "step": 245, + "train_speed(iter/s)": 0.035382 + }, + { + "acc": 0.61394243, + "epoch": 0.2, + "learning_rate": 3.263707571801567e-05, + "loss": 1.36864185, + "memory(GiB)": 71.11, + "step": 250, + "train_speed(iter/s)": 0.03539 + }, + { + "acc": 0.61200385, + "epoch": 0.2, + "learning_rate": 3.328981723237598e-05, + "loss": 1.3423625, + "memory(GiB)": 71.11, + "step": 255, + "train_speed(iter/s)": 0.035401 + }, + { + "acc": 0.61476417, + "epoch": 0.2, + "learning_rate": 3.394255874673629e-05, + "loss": 1.36127625, + "memory(GiB)": 71.11, + "step": 260, + "train_speed(iter/s)": 0.035413 + }, + { + "acc": 0.61477809, + "epoch": 0.21, + "learning_rate": 3.459530026109661e-05, + "loss": 1.35783281, + "memory(GiB)": 71.11, + "step": 265, + "train_speed(iter/s)": 0.035422 + }, + { + "acc": 0.62444715, + "epoch": 0.21, + "learning_rate": 3.524804177545692e-05, + "loss": 1.31687889, + "memory(GiB)": 71.11, + "step": 270, + "train_speed(iter/s)": 0.035432 + }, + { + "acc": 0.61132975, + "epoch": 0.22, + "learning_rate": 3.5900783289817236e-05, + "loss": 1.3453701, + "memory(GiB)": 71.11, + "step": 275, + "train_speed(iter/s)": 0.035387 + }, + { + "acc": 0.61926041, + "epoch": 0.22, + "learning_rate": 3.6553524804177546e-05, + "loss": 1.30053482, + "memory(GiB)": 71.11, + "step": 280, + "train_speed(iter/s)": 0.035397 + }, + { + "acc": 0.61626806, + "epoch": 0.22, + "learning_rate": 3.720626631853786e-05, + "loss": 1.33418636, + "memory(GiB)": 71.11, + "step": 285, + "train_speed(iter/s)": 0.035406 + }, + { + "acc": 0.61756167, + "epoch": 0.23, + "learning_rate": 3.7859007832898173e-05, + "loss": 1.32053947, + "memory(GiB)": 79.4, + "step": 290, + "train_speed(iter/s)": 0.035387 + }, + { + "acc": 0.61689606, + "epoch": 0.23, + "learning_rate": 3.8511749347258484e-05, + "loss": 1.30887852, + "memory(GiB)": 82.21, + "step": 295, + "train_speed(iter/s)": 0.035371 + }, + { + "acc": 0.62750869, + "epoch": 0.24, + "learning_rate": 3.91644908616188e-05, + "loss": 1.34463081, + "memory(GiB)": 76.69, + "step": 300, + "train_speed(iter/s)": 0.035378 + }, + { + "acc": 0.60788245, + "epoch": 0.24, + "learning_rate": 3.981723237597911e-05, + "loss": 1.38475618, + "memory(GiB)": 76.69, + "step": 305, + "train_speed(iter/s)": 0.035362 + }, + { + "acc": 0.62858248, + "epoch": 0.24, + "learning_rate": 4.046997389033943e-05, + "loss": 1.28304482, + "memory(GiB)": 76.69, + "step": 310, + "train_speed(iter/s)": 0.03537 + }, + { + "acc": 0.63606496, + "epoch": 0.25, + "learning_rate": 4.112271540469974e-05, + "loss": 1.28804827, + "memory(GiB)": 76.69, + "step": 315, + "train_speed(iter/s)": 0.035378 + }, + { + "acc": 0.62939467, + "epoch": 0.25, + "learning_rate": 4.1775456919060055e-05, + "loss": 1.29029045, + "memory(GiB)": 76.69, + "step": 320, + "train_speed(iter/s)": 0.035385 + }, + { + "acc": 0.618082, + "epoch": 0.25, + "learning_rate": 4.242819843342037e-05, + "loss": 1.29355927, + "memory(GiB)": 76.69, + "step": 325, + "train_speed(iter/s)": 0.035395 + }, + { + "acc": 0.61671734, + "epoch": 0.26, + "learning_rate": 4.308093994778068e-05, + "loss": 1.32377872, + "memory(GiB)": 76.69, + "step": 330, + "train_speed(iter/s)": 0.035381 + }, + { + "acc": 0.62930007, + "epoch": 0.26, + "learning_rate": 4.3733681462141e-05, + "loss": 1.26064873, + "memory(GiB)": 76.69, + "step": 335, + "train_speed(iter/s)": 0.035391 + }, + { + "acc": 0.62707295, + "epoch": 0.27, + "learning_rate": 4.438642297650131e-05, + "loss": 1.25469856, + "memory(GiB)": 76.69, + "step": 340, + "train_speed(iter/s)": 0.035377 + }, + { + "acc": 0.63075371, + "epoch": 0.27, + "learning_rate": 4.503916449086162e-05, + "loss": 1.25198593, + "memory(GiB)": 76.69, + "step": 345, + "train_speed(iter/s)": 0.035383 + }, + { + "acc": 0.63224497, + "epoch": 0.27, + "learning_rate": 4.5691906005221936e-05, + "loss": 1.27103462, + "memory(GiB)": 76.69, + "step": 350, + "train_speed(iter/s)": 0.03539 + }, + { + "acc": 0.62364006, + "epoch": 0.28, + "learning_rate": 4.6344647519582246e-05, + "loss": 1.30759945, + "memory(GiB)": 76.69, + "step": 355, + "train_speed(iter/s)": 0.035395 + }, + { + "acc": 0.63504333, + "epoch": 0.28, + "learning_rate": 4.699738903394256e-05, + "loss": 1.27207603, + "memory(GiB)": 76.69, + "step": 360, + "train_speed(iter/s)": 0.0354 + }, + { + "acc": 0.61879263, + "epoch": 0.29, + "learning_rate": 4.7650130548302874e-05, + "loss": 1.32293415, + "memory(GiB)": 76.69, + "step": 365, + "train_speed(iter/s)": 0.035405 + }, + { + "acc": 0.62347574, + "epoch": 0.29, + "learning_rate": 4.830287206266319e-05, + "loss": 1.30256233, + "memory(GiB)": 76.69, + "step": 370, + "train_speed(iter/s)": 0.035409 + }, + { + "acc": 0.64439292, + "epoch": 0.29, + "learning_rate": 4.89556135770235e-05, + "loss": 1.2355484, + "memory(GiB)": 85.02, + "step": 375, + "train_speed(iter/s)": 0.035394 + }, + { + "acc": 0.62272491, + "epoch": 0.3, + "learning_rate": 4.960835509138381e-05, + "loss": 1.30343094, + "memory(GiB)": 79.49, + "step": 380, + "train_speed(iter/s)": 0.035398 + }, + { + "acc": 0.62577806, + "epoch": 0.3, + "learning_rate": 5.026109660574413e-05, + "loss": 1.26718674, + "memory(GiB)": 79.49, + "step": 385, + "train_speed(iter/s)": 0.035387 + }, + { + "acc": 0.62948937, + "epoch": 0.31, + "learning_rate": 5.091383812010444e-05, + "loss": 1.2764926, + "memory(GiB)": 79.49, + "step": 390, + "train_speed(iter/s)": 0.035394 + }, + { + "acc": 0.62468576, + "epoch": 0.31, + "learning_rate": 5.156657963446475e-05, + "loss": 1.27845716, + "memory(GiB)": 79.49, + "step": 395, + "train_speed(iter/s)": 0.035401 + }, + { + "acc": 0.63630462, + "epoch": 0.31, + "learning_rate": 5.221932114882507e-05, + "loss": 1.23259668, + "memory(GiB)": 79.49, + "step": 400, + "train_speed(iter/s)": 0.035391 + }, + { + "acc": 0.62778835, + "epoch": 0.32, + "learning_rate": 5.287206266318538e-05, + "loss": 1.2565299, + "memory(GiB)": 79.49, + "step": 405, + "train_speed(iter/s)": 0.035396 + }, + { + "acc": 0.62839155, + "epoch": 0.32, + "learning_rate": 5.352480417754569e-05, + "loss": 1.29196539, + "memory(GiB)": 79.49, + "step": 410, + "train_speed(iter/s)": 0.035403 + }, + { + "acc": 0.62438302, + "epoch": 0.33, + "learning_rate": 5.417754569190601e-05, + "loss": 1.31461248, + "memory(GiB)": 79.49, + "step": 415, + "train_speed(iter/s)": 0.035408 + }, + { + "acc": 0.63546472, + "epoch": 0.33, + "learning_rate": 5.483028720626632e-05, + "loss": 1.25184908, + "memory(GiB)": 79.49, + "step": 420, + "train_speed(iter/s)": 0.035379 + }, + { + "acc": 0.6268621, + "epoch": 0.33, + "learning_rate": 5.5483028720626636e-05, + "loss": 1.28653288, + "memory(GiB)": 79.49, + "step": 425, + "train_speed(iter/s)": 0.035384 + }, + { + "acc": 0.62927337, + "epoch": 0.34, + "learning_rate": 5.613577023498695e-05, + "loss": 1.3057992, + "memory(GiB)": 79.49, + "step": 430, + "train_speed(iter/s)": 0.035372 + }, + { + "acc": 0.63009191, + "epoch": 0.34, + "learning_rate": 5.6788511749347264e-05, + "loss": 1.27884521, + "memory(GiB)": 79.49, + "step": 435, + "train_speed(iter/s)": 0.035361 + }, + { + "acc": 0.62101679, + "epoch": 0.34, + "learning_rate": 5.7441253263707574e-05, + "loss": 1.32610731, + "memory(GiB)": 79.49, + "step": 440, + "train_speed(iter/s)": 0.035365 + }, + { + "acc": 0.62638683, + "epoch": 0.35, + "learning_rate": 5.8093994778067884e-05, + "loss": 1.26304455, + "memory(GiB)": 79.49, + "step": 445, + "train_speed(iter/s)": 0.03537 + }, + { + "acc": 0.62493896, + "epoch": 0.35, + "learning_rate": 5.874673629242821e-05, + "loss": 1.29808645, + "memory(GiB)": 79.49, + "step": 450, + "train_speed(iter/s)": 0.035357 + }, + { + "acc": 0.63067155, + "epoch": 0.36, + "learning_rate": 5.939947780678852e-05, + "loss": 1.25492554, + "memory(GiB)": 79.49, + "step": 455, + "train_speed(iter/s)": 0.035345 + }, + { + "acc": 0.62847533, + "epoch": 0.36, + "learning_rate": 6.005221932114883e-05, + "loss": 1.29804668, + "memory(GiB)": 79.49, + "step": 460, + "train_speed(iter/s)": 0.035335 + }, + { + "acc": 0.61040778, + "epoch": 0.36, + "learning_rate": 6.070496083550914e-05, + "loss": 1.3139143, + "memory(GiB)": 79.49, + "step": 465, + "train_speed(iter/s)": 0.035312 + }, + { + "acc": 0.62770967, + "epoch": 0.37, + "learning_rate": 6.135770234986946e-05, + "loss": 1.26855469, + "memory(GiB)": 79.49, + "step": 470, + "train_speed(iter/s)": 0.035319 + }, + { + "acc": 0.62000332, + "epoch": 0.37, + "learning_rate": 6.201044386422978e-05, + "loss": 1.30712109, + "memory(GiB)": 79.49, + "step": 475, + "train_speed(iter/s)": 0.035309 + }, + { + "acc": 0.62734308, + "epoch": 0.38, + "learning_rate": 6.266318537859009e-05, + "loss": 1.24606352, + "memory(GiB)": 79.49, + "step": 480, + "train_speed(iter/s)": 0.035316 + }, + { + "acc": 0.63649292, + "epoch": 0.38, + "learning_rate": 6.33159268929504e-05, + "loss": 1.22983913, + "memory(GiB)": 79.49, + "step": 485, + "train_speed(iter/s)": 0.035319 + }, + { + "acc": 0.64232011, + "epoch": 0.38, + "learning_rate": 6.396866840731071e-05, + "loss": 1.23846798, + "memory(GiB)": 79.49, + "step": 490, + "train_speed(iter/s)": 0.035309 + }, + { + "acc": 0.63173985, + "epoch": 0.39, + "learning_rate": 6.462140992167102e-05, + "loss": 1.26958008, + "memory(GiB)": 79.49, + "step": 495, + "train_speed(iter/s)": 0.035315 + }, + { + "acc": 0.6342205, + "epoch": 0.39, + "learning_rate": 6.527415143603134e-05, + "loss": 1.23609667, + "memory(GiB)": 79.49, + "step": 500, + "train_speed(iter/s)": 0.035321 + }, + { + "acc": 0.64169135, + "epoch": 0.4, + "learning_rate": 6.592689295039165e-05, + "loss": 1.2388341, + "memory(GiB)": 79.49, + "step": 505, + "train_speed(iter/s)": 0.035324 + }, + { + "acc": 0.6370995, + "epoch": 0.4, + "learning_rate": 6.657963446475196e-05, + "loss": 1.24629793, + "memory(GiB)": 79.49, + "step": 510, + "train_speed(iter/s)": 0.03533 + }, + { + "acc": 0.64395504, + "epoch": 0.4, + "learning_rate": 6.723237597911227e-05, + "loss": 1.25374393, + "memory(GiB)": 79.49, + "step": 515, + "train_speed(iter/s)": 0.035319 + }, + { + "acc": 0.62348013, + "epoch": 0.41, + "learning_rate": 6.788511749347258e-05, + "loss": 1.30115499, + "memory(GiB)": 79.49, + "step": 520, + "train_speed(iter/s)": 0.035324 + }, + { + "acc": 0.6296721, + "epoch": 0.41, + "learning_rate": 6.853785900783291e-05, + "loss": 1.24414005, + "memory(GiB)": 79.49, + "step": 525, + "train_speed(iter/s)": 0.035316 + }, + { + "acc": 0.64563322, + "epoch": 0.42, + "learning_rate": 6.919060052219322e-05, + "loss": 1.20363417, + "memory(GiB)": 87.82, + "step": 530, + "train_speed(iter/s)": 0.035306 + }, + { + "acc": 0.63340597, + "epoch": 0.42, + "learning_rate": 6.984334203655353e-05, + "loss": 1.26357803, + "memory(GiB)": 87.82, + "step": 535, + "train_speed(iter/s)": 0.035298 + }, + { + "acc": 0.63216286, + "epoch": 0.42, + "learning_rate": 7.049608355091384e-05, + "loss": 1.22720327, + "memory(GiB)": 87.82, + "step": 540, + "train_speed(iter/s)": 0.035302 + }, + { + "acc": 0.63558154, + "epoch": 0.43, + "learning_rate": 7.114882506527415e-05, + "loss": 1.23628178, + "memory(GiB)": 87.82, + "step": 545, + "train_speed(iter/s)": 0.035307 + }, + { + "acc": 0.63481274, + "epoch": 0.43, + "learning_rate": 7.180156657963447e-05, + "loss": 1.27170897, + "memory(GiB)": 87.82, + "step": 550, + "train_speed(iter/s)": 0.035311 + }, + { + "acc": 0.62329593, + "epoch": 0.43, + "learning_rate": 7.245430809399478e-05, + "loss": 1.29271698, + "memory(GiB)": 87.82, + "step": 555, + "train_speed(iter/s)": 0.035315 + }, + { + "acc": 0.62661791, + "epoch": 0.44, + "learning_rate": 7.310704960835509e-05, + "loss": 1.25905991, + "memory(GiB)": 87.82, + "step": 560, + "train_speed(iter/s)": 0.035318 + }, + { + "acc": 0.63888206, + "epoch": 0.44, + "learning_rate": 7.37597911227154e-05, + "loss": 1.23162041, + "memory(GiB)": 87.82, + "step": 565, + "train_speed(iter/s)": 0.035322 + }, + { + "acc": 0.6273633, + "epoch": 0.45, + "learning_rate": 7.441253263707573e-05, + "loss": 1.27724876, + "memory(GiB)": 87.82, + "step": 570, + "train_speed(iter/s)": 0.035314 + }, + { + "acc": 0.65672364, + "epoch": 0.45, + "learning_rate": 7.506527415143604e-05, + "loss": 1.18381844, + "memory(GiB)": 87.82, + "step": 575, + "train_speed(iter/s)": 0.035319 + }, + { + "acc": 0.63787546, + "epoch": 0.45, + "learning_rate": 7.571801566579635e-05, + "loss": 1.24759865, + "memory(GiB)": 87.82, + "step": 580, + "train_speed(iter/s)": 0.035322 + }, + { + "acc": 0.62682576, + "epoch": 0.46, + "learning_rate": 7.637075718015666e-05, + "loss": 1.26602411, + "memory(GiB)": 87.82, + "step": 585, + "train_speed(iter/s)": 0.035314 + }, + { + "acc": 0.64525466, + "epoch": 0.46, + "learning_rate": 7.702349869451697e-05, + "loss": 1.20333176, + "memory(GiB)": 87.82, + "step": 590, + "train_speed(iter/s)": 0.035318 + }, + { + "acc": 0.63623095, + "epoch": 0.47, + "learning_rate": 7.767624020887729e-05, + "loss": 1.23436947, + "memory(GiB)": 87.82, + "step": 595, + "train_speed(iter/s)": 0.035323 + }, + { + "acc": 0.63377829, + "epoch": 0.47, + "learning_rate": 7.83289817232376e-05, + "loss": 1.25122204, + "memory(GiB)": 87.82, + "step": 600, + "train_speed(iter/s)": 0.035326 + }, + { + "acc": 0.64111838, + "epoch": 0.47, + "learning_rate": 7.898172323759791e-05, + "loss": 1.23531017, + "memory(GiB)": 87.82, + "step": 605, + "train_speed(iter/s)": 0.03533 + }, + { + "acc": 0.631183, + "epoch": 0.48, + "learning_rate": 7.963446475195822e-05, + "loss": 1.25364704, + "memory(GiB)": 87.82, + "step": 610, + "train_speed(iter/s)": 0.035335 + }, + { + "acc": 0.63098602, + "epoch": 0.48, + "learning_rate": 8.028720626631853e-05, + "loss": 1.25691967, + "memory(GiB)": 87.82, + "step": 615, + "train_speed(iter/s)": 0.035328 + }, + { + "acc": 0.63813372, + "epoch": 0.49, + "learning_rate": 8.093994778067886e-05, + "loss": 1.22148628, + "memory(GiB)": 87.82, + "step": 620, + "train_speed(iter/s)": 0.035321 + }, + { + "acc": 0.63645906, + "epoch": 0.49, + "learning_rate": 8.159268929503917e-05, + "loss": 1.2717802, + "memory(GiB)": 87.82, + "step": 625, + "train_speed(iter/s)": 0.035324 + }, + { + "acc": 0.63675809, + "epoch": 0.49, + "learning_rate": 8.224543080939948e-05, + "loss": 1.22741051, + "memory(GiB)": 87.82, + "step": 630, + "train_speed(iter/s)": 0.035328 + }, + { + "acc": 0.63550248, + "epoch": 0.5, + "learning_rate": 8.28981723237598e-05, + "loss": 1.25706348, + "memory(GiB)": 87.82, + "step": 635, + "train_speed(iter/s)": 0.035332 + }, + { + "acc": 0.63328991, + "epoch": 0.5, + "learning_rate": 8.355091383812011e-05, + "loss": 1.24116373, + "memory(GiB)": 87.82, + "step": 640, + "train_speed(iter/s)": 0.035337 + }, + { + "acc": 0.64723411, + "epoch": 0.51, + "learning_rate": 8.420365535248042e-05, + "loss": 1.21720247, + "memory(GiB)": 87.82, + "step": 645, + "train_speed(iter/s)": 0.03534 + }, + { + "acc": 0.63475337, + "epoch": 0.51, + "learning_rate": 8.485639686684074e-05, + "loss": 1.25098181, + "memory(GiB)": 87.82, + "step": 650, + "train_speed(iter/s)": 0.035343 + }, + { + "acc": 0.65048108, + "epoch": 0.51, + "learning_rate": 8.550913838120105e-05, + "loss": 1.19720526, + "memory(GiB)": 87.82, + "step": 655, + "train_speed(iter/s)": 0.035348 + }, + { + "acc": 0.63787851, + "epoch": 0.52, + "learning_rate": 8.616187989556136e-05, + "loss": 1.24119263, + "memory(GiB)": 87.82, + "step": 660, + "train_speed(iter/s)": 0.035342 + }, + { + "acc": 0.63413329, + "epoch": 0.52, + "learning_rate": 8.681462140992167e-05, + "loss": 1.27156887, + "memory(GiB)": 87.82, + "step": 665, + "train_speed(iter/s)": 0.035346 + }, + { + "acc": 0.62740493, + "epoch": 0.53, + "learning_rate": 8.7467362924282e-05, + "loss": 1.27027969, + "memory(GiB)": 87.82, + "step": 670, + "train_speed(iter/s)": 0.035337 + }, + { + "acc": 0.64307756, + "epoch": 0.53, + "learning_rate": 8.812010443864231e-05, + "loss": 1.2422493, + "memory(GiB)": 87.82, + "step": 675, + "train_speed(iter/s)": 0.035341 + }, + { + "acc": 0.63943014, + "epoch": 0.53, + "learning_rate": 8.877284595300262e-05, + "loss": 1.2298399, + "memory(GiB)": 87.82, + "step": 680, + "train_speed(iter/s)": 0.035333 + }, + { + "acc": 0.6344151, + "epoch": 0.54, + "learning_rate": 8.942558746736293e-05, + "loss": 1.24074373, + "memory(GiB)": 87.82, + "step": 685, + "train_speed(iter/s)": 0.035317 + }, + { + "acc": 0.62509422, + "epoch": 0.54, + "learning_rate": 9.007832898172324e-05, + "loss": 1.29264908, + "memory(GiB)": 90.61, + "step": 690, + "train_speed(iter/s)": 0.035299 + }, + { + "acc": 0.64127073, + "epoch": 0.54, + "learning_rate": 9.073107049608356e-05, + "loss": 1.21613617, + "memory(GiB)": 85.12, + "step": 695, + "train_speed(iter/s)": 0.035303 + }, + { + "acc": 0.63301926, + "epoch": 0.55, + "learning_rate": 9.138381201044387e-05, + "loss": 1.25347605, + "memory(GiB)": 85.12, + "step": 700, + "train_speed(iter/s)": 0.035307 + }, + { + "acc": 0.63968863, + "epoch": 0.55, + "learning_rate": 9.203655352480418e-05, + "loss": 1.20407486, + "memory(GiB)": 85.12, + "step": 705, + "train_speed(iter/s)": 0.035311 + }, + { + "acc": 0.64357448, + "epoch": 0.56, + "learning_rate": 9.268929503916449e-05, + "loss": 1.19348354, + "memory(GiB)": 85.12, + "step": 710, + "train_speed(iter/s)": 0.035315 + }, + { + "acc": 0.6440393, + "epoch": 0.56, + "learning_rate": 9.33420365535248e-05, + "loss": 1.21024275, + "memory(GiB)": 85.12, + "step": 715, + "train_speed(iter/s)": 0.035319 + }, + { + "acc": 0.64507108, + "epoch": 0.56, + "learning_rate": 9.399477806788513e-05, + "loss": 1.23527927, + "memory(GiB)": 85.12, + "step": 720, + "train_speed(iter/s)": 0.035313 + }, + { + "acc": 0.64306412, + "epoch": 0.57, + "learning_rate": 9.464751958224544e-05, + "loss": 1.24440823, + "memory(GiB)": 85.12, + "step": 725, + "train_speed(iter/s)": 0.035318 + }, + { + "acc": 0.64716368, + "epoch": 0.57, + "learning_rate": 9.530026109660575e-05, + "loss": 1.23672924, + "memory(GiB)": 85.12, + "step": 730, + "train_speed(iter/s)": 0.035321 + }, + { + "acc": 0.6508863, + "epoch": 0.58, + "learning_rate": 9.595300261096606e-05, + "loss": 1.16483746, + "memory(GiB)": 85.12, + "step": 735, + "train_speed(iter/s)": 0.035324 + }, + { + "acc": 0.64414196, + "epoch": 0.58, + "learning_rate": 9.660574412532638e-05, + "loss": 1.24241323, + "memory(GiB)": 85.12, + "step": 740, + "train_speed(iter/s)": 0.035326 + }, + { + "acc": 0.64069014, + "epoch": 0.58, + "learning_rate": 9.725848563968669e-05, + "loss": 1.21243725, + "memory(GiB)": 85.12, + "step": 745, + "train_speed(iter/s)": 0.035329 + }, + { + "acc": 0.6319931, + "epoch": 0.59, + "learning_rate": 9.7911227154047e-05, + "loss": 1.23099251, + "memory(GiB)": 85.12, + "step": 750, + "train_speed(iter/s)": 0.035332 + }, + { + "acc": 0.63715777, + "epoch": 0.59, + "learning_rate": 9.856396866840731e-05, + "loss": 1.23596754, + "memory(GiB)": 85.12, + "step": 755, + "train_speed(iter/s)": 0.035336 + }, + { + "acc": 0.63574848, + "epoch": 0.6, + "learning_rate": 9.921671018276762e-05, + "loss": 1.23194542, + "memory(GiB)": 85.12, + "step": 760, + "train_speed(iter/s)": 0.035339 + }, + { + "acc": 0.63391104, + "epoch": 0.6, + "learning_rate": 9.986945169712795e-05, + "loss": 1.25005703, + "memory(GiB)": 85.12, + "step": 765, + "train_speed(iter/s)": 0.035343 + }, + { + "acc": 0.64336019, + "epoch": 0.6, + "learning_rate": 9.999998134167974e-05, + "loss": 1.19814224, + "memory(GiB)": 85.12, + "step": 770, + "train_speed(iter/s)": 0.035346 + }, + { + "acc": 0.64235554, + "epoch": 0.61, + "learning_rate": 9.999990554227756e-05, + "loss": 1.20386868, + "memory(GiB)": 85.12, + "step": 775, + "train_speed(iter/s)": 0.035348 + }, + { + "acc": 0.63095374, + "epoch": 0.61, + "learning_rate": 9.999977143573674e-05, + "loss": 1.24922619, + "memory(GiB)": 85.12, + "step": 780, + "train_speed(iter/s)": 0.03535 + }, + { + "acc": 0.63008108, + "epoch": 0.62, + "learning_rate": 9.99995790222137e-05, + "loss": 1.26964073, + "memory(GiB)": 85.12, + "step": 785, + "train_speed(iter/s)": 0.035353 + }, + { + "acc": 0.63752484, + "epoch": 0.62, + "learning_rate": 9.999932830193279e-05, + "loss": 1.22619057, + "memory(GiB)": 85.12, + "step": 790, + "train_speed(iter/s)": 0.035348 + }, + { + "acc": 0.63318844, + "epoch": 0.62, + "learning_rate": 9.999901927518642e-05, + "loss": 1.26003723, + "memory(GiB)": 85.12, + "step": 795, + "train_speed(iter/s)": 0.035351 + }, + { + "acc": 0.63203177, + "epoch": 0.63, + "learning_rate": 9.999865194233496e-05, + "loss": 1.24707184, + "memory(GiB)": 85.12, + "step": 800, + "train_speed(iter/s)": 0.035344 + }, + { + "acc": 0.62407198, + "epoch": 0.63, + "learning_rate": 9.999822630380674e-05, + "loss": 1.28778019, + "memory(GiB)": 85.12, + "step": 805, + "train_speed(iter/s)": 0.035345 + }, + { + "acc": 0.62447062, + "epoch": 0.63, + "learning_rate": 9.999774236009813e-05, + "loss": 1.30319834, + "memory(GiB)": 85.12, + "step": 810, + "train_speed(iter/s)": 0.035348 + }, + { + "acc": 0.63752298, + "epoch": 0.64, + "learning_rate": 9.999720011177348e-05, + "loss": 1.24396782, + "memory(GiB)": 85.12, + "step": 815, + "train_speed(iter/s)": 0.035342 + }, + { + "acc": 0.6344676, + "epoch": 0.64, + "learning_rate": 9.999659955946514e-05, + "loss": 1.24418392, + "memory(GiB)": 85.12, + "step": 820, + "train_speed(iter/s)": 0.035327 + }, + { + "acc": 0.63493099, + "epoch": 0.65, + "learning_rate": 9.999594070387343e-05, + "loss": 1.25098581, + "memory(GiB)": 85.12, + "step": 825, + "train_speed(iter/s)": 0.035329 + }, + { + "acc": 0.64141641, + "epoch": 0.65, + "learning_rate": 9.999522354576669e-05, + "loss": 1.21629629, + "memory(GiB)": 85.12, + "step": 830, + "train_speed(iter/s)": 0.035323 + }, + { + "acc": 0.65251746, + "epoch": 0.65, + "learning_rate": 9.99944480859812e-05, + "loss": 1.16716757, + "memory(GiB)": 85.12, + "step": 835, + "train_speed(iter/s)": 0.035309 + }, + { + "acc": 0.63880744, + "epoch": 0.66, + "learning_rate": 9.999361432542128e-05, + "loss": 1.21747561, + "memory(GiB)": 85.12, + "step": 840, + "train_speed(iter/s)": 0.035304 + }, + { + "acc": 0.6420105, + "epoch": 0.66, + "learning_rate": 9.99927222650592e-05, + "loss": 1.2190115, + "memory(GiB)": 85.12, + "step": 845, + "train_speed(iter/s)": 0.035308 + }, + { + "acc": 0.63001771, + "epoch": 0.67, + "learning_rate": 9.999177190593525e-05, + "loss": 1.25841103, + "memory(GiB)": 85.12, + "step": 850, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.62701368, + "epoch": 0.67, + "learning_rate": 9.999076324915768e-05, + "loss": 1.28360729, + "memory(GiB)": 85.12, + "step": 855, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.6483603, + "epoch": 0.67, + "learning_rate": 9.998969629590274e-05, + "loss": 1.19540863, + "memory(GiB)": 85.12, + "step": 860, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.64731088, + "epoch": 0.68, + "learning_rate": 9.998857104741461e-05, + "loss": 1.19496069, + "memory(GiB)": 85.12, + "step": 865, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.64258432, + "epoch": 0.68, + "learning_rate": 9.998738750500553e-05, + "loss": 1.2038908, + "memory(GiB)": 85.12, + "step": 870, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.63443327, + "epoch": 0.69, + "learning_rate": 9.998614567005569e-05, + "loss": 1.24369755, + "memory(GiB)": 85.12, + "step": 875, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.63675046, + "epoch": 0.69, + "learning_rate": 9.998484554401323e-05, + "loss": 1.25242014, + "memory(GiB)": 85.12, + "step": 880, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.63459558, + "epoch": 0.69, + "learning_rate": 9.99834871283943e-05, + "loss": 1.20982609, + "memory(GiB)": 85.12, + "step": 885, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.62059612, + "epoch": 0.7, + "learning_rate": 9.998207042478298e-05, + "loss": 1.29975319, + "memory(GiB)": 85.12, + "step": 890, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.64450974, + "epoch": 0.7, + "learning_rate": 9.998059543483138e-05, + "loss": 1.21123343, + "memory(GiB)": 85.12, + "step": 895, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.64039574, + "epoch": 0.71, + "learning_rate": 9.997906216025954e-05, + "loss": 1.22988548, + "memory(GiB)": 85.12, + "step": 900, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.64195466, + "epoch": 0.71, + "learning_rate": 9.997747060285548e-05, + "loss": 1.23618727, + "memory(GiB)": 85.12, + "step": 905, + "train_speed(iter/s)": 0.035279 + }, + { + "acc": 0.64903636, + "epoch": 0.71, + "learning_rate": 9.99758207644752e-05, + "loss": 1.1861618, + "memory(GiB)": 85.12, + "step": 910, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.64557695, + "epoch": 0.72, + "learning_rate": 9.997411264704264e-05, + "loss": 1.16902866, + "memory(GiB)": 85.12, + "step": 915, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.6429708, + "epoch": 0.72, + "learning_rate": 9.99723462525497e-05, + "loss": 1.20082893, + "memory(GiB)": 85.12, + "step": 920, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.64234171, + "epoch": 0.72, + "learning_rate": 9.99705215830563e-05, + "loss": 1.2100441, + "memory(GiB)": 85.12, + "step": 925, + "train_speed(iter/s)": 0.035283 + }, + { + "acc": 0.64274926, + "epoch": 0.73, + "learning_rate": 9.996863864069019e-05, + "loss": 1.15481606, + "memory(GiB)": 85.12, + "step": 930, + "train_speed(iter/s)": 0.035285 + }, + { + "acc": 0.63187203, + "epoch": 0.73, + "learning_rate": 9.996669742764722e-05, + "loss": 1.26747904, + "memory(GiB)": 85.12, + "step": 935, + "train_speed(iter/s)": 0.035288 + }, + { + "acc": 0.65685897, + "epoch": 0.74, + "learning_rate": 9.996469794619111e-05, + "loss": 1.1622716, + "memory(GiB)": 85.12, + "step": 940, + "train_speed(iter/s)": 0.035283 + }, + { + "acc": 0.64430394, + "epoch": 0.74, + "learning_rate": 9.996264019865353e-05, + "loss": 1.21984491, + "memory(GiB)": 85.12, + "step": 945, + "train_speed(iter/s)": 0.035287 + }, + { + "acc": 0.64375916, + "epoch": 0.74, + "learning_rate": 9.996052418743414e-05, + "loss": 1.23850603, + "memory(GiB)": 85.12, + "step": 950, + "train_speed(iter/s)": 0.035289 + }, + { + "acc": 0.63599143, + "epoch": 0.75, + "learning_rate": 9.995834991500048e-05, + "loss": 1.2474575, + "memory(GiB)": 85.12, + "step": 955, + "train_speed(iter/s)": 0.035292 + }, + { + "acc": 0.63893137, + "epoch": 0.75, + "learning_rate": 9.99561173838881e-05, + "loss": 1.23318357, + "memory(GiB)": 85.12, + "step": 960, + "train_speed(iter/s)": 0.035294 + }, + { + "acc": 0.64323502, + "epoch": 0.76, + "learning_rate": 9.995382659670043e-05, + "loss": 1.20357561, + "memory(GiB)": 85.12, + "step": 965, + "train_speed(iter/s)": 0.035295 + }, + { + "acc": 0.64130268, + "epoch": 0.76, + "learning_rate": 9.995147755610885e-05, + "loss": 1.21935377, + "memory(GiB)": 85.12, + "step": 970, + "train_speed(iter/s)": 0.035299 + }, + { + "acc": 0.64334393, + "epoch": 0.76, + "learning_rate": 9.994907026485273e-05, + "loss": 1.18746109, + "memory(GiB)": 85.12, + "step": 975, + "train_speed(iter/s)": 0.035301 + }, + { + "acc": 0.64836397, + "epoch": 0.77, + "learning_rate": 9.994660472573929e-05, + "loss": 1.18723745, + "memory(GiB)": 85.12, + "step": 980, + "train_speed(iter/s)": 0.035305 + }, + { + "acc": 0.64067945, + "epoch": 0.77, + "learning_rate": 9.994408094164369e-05, + "loss": 1.21092262, + "memory(GiB)": 85.12, + "step": 985, + "train_speed(iter/s)": 0.035308 + }, + { + "acc": 0.65696807, + "epoch": 0.78, + "learning_rate": 9.994149891550906e-05, + "loss": 1.15461454, + "memory(GiB)": 85.12, + "step": 990, + "train_speed(iter/s)": 0.035311 + }, + { + "acc": 0.64268813, + "epoch": 0.78, + "learning_rate": 9.99388586503464e-05, + "loss": 1.2276782, + "memory(GiB)": 85.12, + "step": 995, + "train_speed(iter/s)": 0.0353 + }, + { + "acc": 0.64682531, + "epoch": 0.78, + "learning_rate": 9.993616014923464e-05, + "loss": 1.19210787, + "memory(GiB)": 85.12, + "step": 1000, + "train_speed(iter/s)": 0.035295 + }, + { + "acc": 0.63967905, + "epoch": 0.79, + "learning_rate": 9.993340341532063e-05, + "loss": 1.21900482, + "memory(GiB)": 85.12, + "step": 1005, + "train_speed(iter/s)": 0.035298 + }, + { + "acc": 0.64601598, + "epoch": 0.79, + "learning_rate": 9.993058845181913e-05, + "loss": 1.18572149, + "memory(GiB)": 85.12, + "step": 1010, + "train_speed(iter/s)": 0.0353 + }, + { + "acc": 0.6581532, + "epoch": 0.8, + "learning_rate": 9.992771526201278e-05, + "loss": 1.17555571, + "memory(GiB)": 85.12, + "step": 1015, + "train_speed(iter/s)": 0.035303 + }, + { + "acc": 0.63905029, + "epoch": 0.8, + "learning_rate": 9.992478384925215e-05, + "loss": 1.22506828, + "memory(GiB)": 85.12, + "step": 1020, + "train_speed(iter/s)": 0.035306 + }, + { + "acc": 0.63737507, + "epoch": 0.8, + "learning_rate": 9.992179421695566e-05, + "loss": 1.24342728, + "memory(GiB)": 85.12, + "step": 1025, + "train_speed(iter/s)": 0.035309 + }, + { + "acc": 0.63883562, + "epoch": 0.81, + "learning_rate": 9.99187463686097e-05, + "loss": 1.22715645, + "memory(GiB)": 85.12, + "step": 1030, + "train_speed(iter/s)": 0.035304 + }, + { + "acc": 0.64026632, + "epoch": 0.81, + "learning_rate": 9.991564030776847e-05, + "loss": 1.2529954, + "memory(GiB)": 85.12, + "step": 1035, + "train_speed(iter/s)": 0.035307 + }, + { + "acc": 0.65661197, + "epoch": 0.82, + "learning_rate": 9.99124760380541e-05, + "loss": 1.17418413, + "memory(GiB)": 85.12, + "step": 1040, + "train_speed(iter/s)": 0.035295 + }, + { + "acc": 0.64462228, + "epoch": 0.82, + "learning_rate": 9.990925356315659e-05, + "loss": 1.21107912, + "memory(GiB)": 85.12, + "step": 1045, + "train_speed(iter/s)": 0.035297 + }, + { + "acc": 0.65130072, + "epoch": 0.82, + "learning_rate": 9.99059728868338e-05, + "loss": 1.18552179, + "memory(GiB)": 85.12, + "step": 1050, + "train_speed(iter/s)": 0.0353 + }, + { + "acc": 0.64942465, + "epoch": 0.83, + "learning_rate": 9.990263401291149e-05, + "loss": 1.19978065, + "memory(GiB)": 85.12, + "step": 1055, + "train_speed(iter/s)": 0.035302 + }, + { + "acc": 0.64416938, + "epoch": 0.83, + "learning_rate": 9.989923694528327e-05, + "loss": 1.21087933, + "memory(GiB)": 85.12, + "step": 1060, + "train_speed(iter/s)": 0.035298 + }, + { + "acc": 0.64448967, + "epoch": 0.83, + "learning_rate": 9.989578168791059e-05, + "loss": 1.23659315, + "memory(GiB)": 85.12, + "step": 1065, + "train_speed(iter/s)": 0.0353 + }, + { + "acc": 0.6494998, + "epoch": 0.84, + "learning_rate": 9.989226824482281e-05, + "loss": 1.1761158, + "memory(GiB)": 85.12, + "step": 1070, + "train_speed(iter/s)": 0.035302 + }, + { + "acc": 0.64944773, + "epoch": 0.84, + "learning_rate": 9.98886966201171e-05, + "loss": 1.17971296, + "memory(GiB)": 85.12, + "step": 1075, + "train_speed(iter/s)": 0.035305 + }, + { + "acc": 0.64949713, + "epoch": 0.85, + "learning_rate": 9.98850668179585e-05, + "loss": 1.19467411, + "memory(GiB)": 85.12, + "step": 1080, + "train_speed(iter/s)": 0.035301 + }, + { + "acc": 0.6409584, + "epoch": 0.85, + "learning_rate": 9.988137884257987e-05, + "loss": 1.22220039, + "memory(GiB)": 85.12, + "step": 1085, + "train_speed(iter/s)": 0.035289 + }, + { + "acc": 0.65775137, + "epoch": 0.85, + "learning_rate": 9.987763269828194e-05, + "loss": 1.15307426, + "memory(GiB)": 85.12, + "step": 1090, + "train_speed(iter/s)": 0.035292 + }, + { + "acc": 0.65808535, + "epoch": 0.86, + "learning_rate": 9.987382838943325e-05, + "loss": 1.16586542, + "memory(GiB)": 85.12, + "step": 1095, + "train_speed(iter/s)": 0.035294 + }, + { + "acc": 0.63382239, + "epoch": 0.86, + "learning_rate": 9.986996592047017e-05, + "loss": 1.22561712, + "memory(GiB)": 85.12, + "step": 1100, + "train_speed(iter/s)": 0.035297 + }, + { + "acc": 0.64164915, + "epoch": 0.87, + "learning_rate": 9.986604529589691e-05, + "loss": 1.2104146, + "memory(GiB)": 85.12, + "step": 1105, + "train_speed(iter/s)": 0.035299 + }, + { + "acc": 0.65060873, + "epoch": 0.87, + "learning_rate": 9.98620665202855e-05, + "loss": 1.18811502, + "memory(GiB)": 85.12, + "step": 1110, + "train_speed(iter/s)": 0.035301 + }, + { + "acc": 0.64695697, + "epoch": 0.87, + "learning_rate": 9.985802959827573e-05, + "loss": 1.20489264, + "memory(GiB)": 85.12, + "step": 1115, + "train_speed(iter/s)": 0.035303 + }, + { + "acc": 0.63937025, + "epoch": 0.88, + "learning_rate": 9.985393453457526e-05, + "loss": 1.20603237, + "memory(GiB)": 85.12, + "step": 1120, + "train_speed(iter/s)": 0.035306 + }, + { + "acc": 0.65824099, + "epoch": 0.88, + "learning_rate": 9.984978133395954e-05, + "loss": 1.16597528, + "memory(GiB)": 85.12, + "step": 1125, + "train_speed(iter/s)": 0.035309 + }, + { + "acc": 0.63783302, + "epoch": 0.89, + "learning_rate": 9.984557000127177e-05, + "loss": 1.2381628, + "memory(GiB)": 85.12, + "step": 1130, + "train_speed(iter/s)": 0.035311 + }, + { + "acc": 0.64608231, + "epoch": 0.89, + "learning_rate": 9.984130054142302e-05, + "loss": 1.20785971, + "memory(GiB)": 85.12, + "step": 1135, + "train_speed(iter/s)": 0.035313 + }, + { + "acc": 0.64283195, + "epoch": 0.89, + "learning_rate": 9.983697295939205e-05, + "loss": 1.19586048, + "memory(GiB)": 85.12, + "step": 1140, + "train_speed(iter/s)": 0.035315 + }, + { + "acc": 0.64510169, + "epoch": 0.9, + "learning_rate": 9.983258726022549e-05, + "loss": 1.17720518, + "memory(GiB)": 85.12, + "step": 1145, + "train_speed(iter/s)": 0.035316 + }, + { + "acc": 0.64794512, + "epoch": 0.9, + "learning_rate": 9.982814344903766e-05, + "loss": 1.20341921, + "memory(GiB)": 85.12, + "step": 1150, + "train_speed(iter/s)": 0.035319 + }, + { + "acc": 0.64761252, + "epoch": 0.91, + "learning_rate": 9.982364153101072e-05, + "loss": 1.20330772, + "memory(GiB)": 85.12, + "step": 1155, + "train_speed(iter/s)": 0.035321 + }, + { + "acc": 0.64469285, + "epoch": 0.91, + "learning_rate": 9.981908151139456e-05, + "loss": 1.22739487, + "memory(GiB)": 85.12, + "step": 1160, + "train_speed(iter/s)": 0.035323 + }, + { + "acc": 0.65169878, + "epoch": 0.91, + "learning_rate": 9.98144633955068e-05, + "loss": 1.17159405, + "memory(GiB)": 85.12, + "step": 1165, + "train_speed(iter/s)": 0.035325 + }, + { + "acc": 0.64189563, + "epoch": 0.92, + "learning_rate": 9.980978718873286e-05, + "loss": 1.21570683, + "memory(GiB)": 85.12, + "step": 1170, + "train_speed(iter/s)": 0.035327 + }, + { + "acc": 0.65856137, + "epoch": 0.92, + "learning_rate": 9.980505289652585e-05, + "loss": 1.14105463, + "memory(GiB)": 85.12, + "step": 1175, + "train_speed(iter/s)": 0.035328 + }, + { + "acc": 0.63436284, + "epoch": 0.92, + "learning_rate": 9.980026052440665e-05, + "loss": 1.2412138, + "memory(GiB)": 85.12, + "step": 1180, + "train_speed(iter/s)": 0.035325 + }, + { + "acc": 0.65267048, + "epoch": 0.93, + "learning_rate": 9.979541007796388e-05, + "loss": 1.17890749, + "memory(GiB)": 85.12, + "step": 1185, + "train_speed(iter/s)": 0.035327 + }, + { + "acc": 0.64592175, + "epoch": 0.93, + "learning_rate": 9.979050156285384e-05, + "loss": 1.19027033, + "memory(GiB)": 85.12, + "step": 1190, + "train_speed(iter/s)": 0.03533 + }, + { + "acc": 0.66446619, + "epoch": 0.94, + "learning_rate": 9.978553498480057e-05, + "loss": 1.15592375, + "memory(GiB)": 85.12, + "step": 1195, + "train_speed(iter/s)": 0.035325 + }, + { + "acc": 0.64879594, + "epoch": 0.94, + "learning_rate": 9.978051034959583e-05, + "loss": 1.2092351, + "memory(GiB)": 85.12, + "step": 1200, + "train_speed(iter/s)": 0.035327 + }, + { + "acc": 0.64852567, + "epoch": 0.94, + "learning_rate": 9.977542766309907e-05, + "loss": 1.19442778, + "memory(GiB)": 85.12, + "step": 1205, + "train_speed(iter/s)": 0.035323 + }, + { + "acc": 0.640028, + "epoch": 0.95, + "learning_rate": 9.977028693123744e-05, + "loss": 1.21321182, + "memory(GiB)": 85.12, + "step": 1210, + "train_speed(iter/s)": 0.035325 + }, + { + "acc": 0.64608712, + "epoch": 0.95, + "learning_rate": 9.976508816000578e-05, + "loss": 1.21685104, + "memory(GiB)": 85.12, + "step": 1215, + "train_speed(iter/s)": 0.035317 + }, + { + "acc": 0.65058255, + "epoch": 0.96, + "learning_rate": 9.975983135546661e-05, + "loss": 1.20579329, + "memory(GiB)": 85.12, + "step": 1220, + "train_speed(iter/s)": 0.035308 + }, + { + "acc": 0.64077854, + "epoch": 0.96, + "learning_rate": 9.975451652375012e-05, + "loss": 1.22381687, + "memory(GiB)": 85.12, + "step": 1225, + "train_speed(iter/s)": 0.035304 + }, + { + "acc": 0.64167862, + "epoch": 0.96, + "learning_rate": 9.974914367105419e-05, + "loss": 1.20327978, + "memory(GiB)": 85.12, + "step": 1230, + "train_speed(iter/s)": 0.035306 + }, + { + "acc": 0.64583015, + "epoch": 0.97, + "learning_rate": 9.974371280364431e-05, + "loss": 1.19592552, + "memory(GiB)": 85.12, + "step": 1235, + "train_speed(iter/s)": 0.035308 + }, + { + "acc": 0.6488265, + "epoch": 0.97, + "learning_rate": 9.973822392785373e-05, + "loss": 1.17611341, + "memory(GiB)": 85.12, + "step": 1240, + "train_speed(iter/s)": 0.035304 + }, + { + "acc": 0.65858684, + "epoch": 0.98, + "learning_rate": 9.973267705008318e-05, + "loss": 1.11910753, + "memory(GiB)": 85.12, + "step": 1245, + "train_speed(iter/s)": 0.035306 + }, + { + "acc": 0.63276486, + "epoch": 0.98, + "learning_rate": 9.97270721768012e-05, + "loss": 1.24935932, + "memory(GiB)": 85.12, + "step": 1250, + "train_speed(iter/s)": 0.035298 + }, + { + "acc": 0.63920984, + "epoch": 0.98, + "learning_rate": 9.972140931454385e-05, + "loss": 1.24413643, + "memory(GiB)": 85.12, + "step": 1255, + "train_speed(iter/s)": 0.035299 + }, + { + "acc": 0.64515247, + "epoch": 0.99, + "learning_rate": 9.971568846991486e-05, + "loss": 1.18529148, + "memory(GiB)": 85.12, + "step": 1260, + "train_speed(iter/s)": 0.035301 + }, + { + "acc": 0.64360232, + "epoch": 0.99, + "learning_rate": 9.970990964958556e-05, + "loss": 1.21833725, + "memory(GiB)": 85.12, + "step": 1265, + "train_speed(iter/s)": 0.035303 + }, + { + "acc": 0.64962535, + "epoch": 1.0, + "learning_rate": 9.970407286029487e-05, + "loss": 1.16980963, + "memory(GiB)": 85.12, + "step": 1270, + "train_speed(iter/s)": 0.035306 + }, + { + "acc": 0.64517279, + "epoch": 1.0, + "learning_rate": 9.969817810884937e-05, + "loss": 1.19798498, + "memory(GiB)": 85.12, + "step": 1275, + "train_speed(iter/s)": 0.035307 + }, + { + "epoch": 1.0, + "eval_acc": 0.66190833959429, + "eval_loss": 1.1170213222503662, + "eval_runtime": 85.7668, + "eval_samples_per_second": 1.084, + "eval_steps_per_second": 1.084, + "step": 1276 + }, + { + "acc": 0.66531973, + "epoch": 1.0, + "learning_rate": 9.969222540212319e-05, + "loss": 1.12897282, + "memory(GiB)": 85.12, + "step": 1280, + "train_speed(iter/s)": 0.035228 + }, + { + "acc": 0.64908504, + "epoch": 1.01, + "learning_rate": 9.968621474705802e-05, + "loss": 1.19679098, + "memory(GiB)": 85.12, + "step": 1285, + "train_speed(iter/s)": 0.035225 + }, + { + "acc": 0.65595608, + "epoch": 1.01, + "learning_rate": 9.96801461506632e-05, + "loss": 1.12893848, + "memory(GiB)": 85.12, + "step": 1290, + "train_speed(iter/s)": 0.035228 + }, + { + "acc": 0.66325932, + "epoch": 1.01, + "learning_rate": 9.967401962001553e-05, + "loss": 1.12414293, + "memory(GiB)": 85.12, + "step": 1295, + "train_speed(iter/s)": 0.035225 + }, + { + "acc": 0.63181615, + "epoch": 1.02, + "learning_rate": 9.966783516225948e-05, + "loss": 1.23086386, + "memory(GiB)": 85.12, + "step": 1300, + "train_speed(iter/s)": 0.035227 + }, + { + "acc": 0.64660926, + "epoch": 1.02, + "learning_rate": 9.966159278460703e-05, + "loss": 1.1694212, + "memory(GiB)": 85.12, + "step": 1305, + "train_speed(iter/s)": 0.035223 + }, + { + "acc": 0.65136437, + "epoch": 1.03, + "learning_rate": 9.965529249433768e-05, + "loss": 1.14605751, + "memory(GiB)": 85.12, + "step": 1310, + "train_speed(iter/s)": 0.035226 + }, + { + "acc": 0.64723616, + "epoch": 1.03, + "learning_rate": 9.964893429879846e-05, + "loss": 1.17278271, + "memory(GiB)": 85.12, + "step": 1315, + "train_speed(iter/s)": 0.035228 + }, + { + "acc": 0.63427768, + "epoch": 1.03, + "learning_rate": 9.9642518205404e-05, + "loss": 1.19455042, + "memory(GiB)": 85.12, + "step": 1320, + "train_speed(iter/s)": 0.03523 + }, + { + "acc": 0.65323257, + "epoch": 1.04, + "learning_rate": 9.963604422163636e-05, + "loss": 1.15521383, + "memory(GiB)": 85.12, + "step": 1325, + "train_speed(iter/s)": 0.035233 + }, + { + "acc": 0.65213785, + "epoch": 1.04, + "learning_rate": 9.962951235504511e-05, + "loss": 1.17218103, + "memory(GiB)": 85.12, + "step": 1330, + "train_speed(iter/s)": 0.035225 + }, + { + "acc": 0.63362112, + "epoch": 1.05, + "learning_rate": 9.962292261324744e-05, + "loss": 1.21349621, + "memory(GiB)": 85.12, + "step": 1335, + "train_speed(iter/s)": 0.035222 + }, + { + "acc": 0.64905601, + "epoch": 1.05, + "learning_rate": 9.961627500392788e-05, + "loss": 1.19248028, + "memory(GiB)": 85.12, + "step": 1340, + "train_speed(iter/s)": 0.035224 + }, + { + "acc": 0.63903928, + "epoch": 1.05, + "learning_rate": 9.960956953483854e-05, + "loss": 1.21704388, + "memory(GiB)": 85.12, + "step": 1345, + "train_speed(iter/s)": 0.035226 + }, + { + "acc": 0.64893613, + "epoch": 1.06, + "learning_rate": 9.960280621379891e-05, + "loss": 1.18590031, + "memory(GiB)": 85.12, + "step": 1350, + "train_speed(iter/s)": 0.035228 + }, + { + "acc": 0.66188636, + "epoch": 1.06, + "learning_rate": 9.959598504869608e-05, + "loss": 1.1234787, + "memory(GiB)": 85.12, + "step": 1355, + "train_speed(iter/s)": 0.035225 + }, + { + "acc": 0.65642624, + "epoch": 1.07, + "learning_rate": 9.958910604748449e-05, + "loss": 1.17125835, + "memory(GiB)": 85.12, + "step": 1360, + "train_speed(iter/s)": 0.035227 + }, + { + "acc": 0.64671488, + "epoch": 1.07, + "learning_rate": 9.958216921818602e-05, + "loss": 1.17184534, + "memory(GiB)": 85.12, + "step": 1365, + "train_speed(iter/s)": 0.035224 + }, + { + "acc": 0.65104051, + "epoch": 1.07, + "learning_rate": 9.957517456889005e-05, + "loss": 1.15897675, + "memory(GiB)": 85.12, + "step": 1370, + "train_speed(iter/s)": 0.035227 + }, + { + "acc": 0.6516345, + "epoch": 1.08, + "learning_rate": 9.956812210775336e-05, + "loss": 1.16180744, + "memory(GiB)": 85.12, + "step": 1375, + "train_speed(iter/s)": 0.035229 + }, + { + "acc": 0.64146729, + "epoch": 1.08, + "learning_rate": 9.956101184300012e-05, + "loss": 1.1801156, + "memory(GiB)": 85.12, + "step": 1380, + "train_speed(iter/s)": 0.035231 + }, + { + "acc": 0.64465218, + "epoch": 1.09, + "learning_rate": 9.955384378292195e-05, + "loss": 1.16287785, + "memory(GiB)": 85.12, + "step": 1385, + "train_speed(iter/s)": 0.035233 + }, + { + "acc": 0.65234551, + "epoch": 1.09, + "learning_rate": 9.954661793587783e-05, + "loss": 1.16832972, + "memory(GiB)": 85.12, + "step": 1390, + "train_speed(iter/s)": 0.03523 + }, + { + "acc": 0.64926839, + "epoch": 1.09, + "learning_rate": 9.953933431029417e-05, + "loss": 1.16717663, + "memory(GiB)": 85.12, + "step": 1395, + "train_speed(iter/s)": 0.035232 + }, + { + "acc": 0.6558826, + "epoch": 1.1, + "learning_rate": 9.953199291466469e-05, + "loss": 1.14773283, + "memory(GiB)": 85.12, + "step": 1400, + "train_speed(iter/s)": 0.035234 + }, + { + "acc": 0.64793453, + "epoch": 1.1, + "learning_rate": 9.952459375755056e-05, + "loss": 1.19053068, + "memory(GiB)": 85.12, + "step": 1405, + "train_speed(iter/s)": 0.035237 + }, + { + "acc": 0.64704976, + "epoch": 1.11, + "learning_rate": 9.951713684758027e-05, + "loss": 1.18572483, + "memory(GiB)": 85.12, + "step": 1410, + "train_speed(iter/s)": 0.035238 + }, + { + "acc": 0.64501376, + "epoch": 1.11, + "learning_rate": 9.950962219344963e-05, + "loss": 1.17802401, + "memory(GiB)": 85.12, + "step": 1415, + "train_speed(iter/s)": 0.03524 + }, + { + "acc": 0.64344835, + "epoch": 1.11, + "learning_rate": 9.950204980392185e-05, + "loss": 1.21547565, + "memory(GiB)": 85.12, + "step": 1420, + "train_speed(iter/s)": 0.035242 + }, + { + "acc": 0.65633788, + "epoch": 1.12, + "learning_rate": 9.94944196878274e-05, + "loss": 1.15950899, + "memory(GiB)": 85.12, + "step": 1425, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.64611464, + "epoch": 1.12, + "learning_rate": 9.948673185406412e-05, + "loss": 1.21565819, + "memory(GiB)": 85.12, + "step": 1430, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.64696469, + "epoch": 1.12, + "learning_rate": 9.947898631159716e-05, + "loss": 1.18466921, + "memory(GiB)": 85.12, + "step": 1435, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.65379381, + "epoch": 1.13, + "learning_rate": 9.947118306945888e-05, + "loss": 1.16033335, + "memory(GiB)": 85.12, + "step": 1440, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.64872618, + "epoch": 1.13, + "learning_rate": 9.946332213674907e-05, + "loss": 1.17809114, + "memory(GiB)": 85.12, + "step": 1445, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.64592113, + "epoch": 1.14, + "learning_rate": 9.945540352263467e-05, + "loss": 1.19367371, + "memory(GiB)": 85.12, + "step": 1450, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.65893774, + "epoch": 1.14, + "learning_rate": 9.944742723634995e-05, + "loss": 1.15910034, + "memory(GiB)": 85.12, + "step": 1455, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.63832693, + "epoch": 1.14, + "learning_rate": 9.943939328719638e-05, + "loss": 1.21491098, + "memory(GiB)": 85.12, + "step": 1460, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.65693431, + "epoch": 1.15, + "learning_rate": 9.943130168454276e-05, + "loss": 1.17500277, + "memory(GiB)": 85.12, + "step": 1465, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.66517062, + "epoch": 1.15, + "learning_rate": 9.942315243782504e-05, + "loss": 1.13439531, + "memory(GiB)": 85.12, + "step": 1470, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.65051446, + "epoch": 1.16, + "learning_rate": 9.941494555654645e-05, + "loss": 1.19706593, + "memory(GiB)": 85.12, + "step": 1475, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.64909263, + "epoch": 1.16, + "learning_rate": 9.940668105027739e-05, + "loss": 1.19326334, + "memory(GiB)": 85.12, + "step": 1480, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.65501137, + "epoch": 1.16, + "learning_rate": 9.939835892865546e-05, + "loss": 1.17640152, + "memory(GiB)": 85.12, + "step": 1485, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.64871545, + "epoch": 1.17, + "learning_rate": 9.938997920138547e-05, + "loss": 1.18505135, + "memory(GiB)": 85.12, + "step": 1490, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.65266371, + "epoch": 1.17, + "learning_rate": 9.938154187823939e-05, + "loss": 1.15669746, + "memory(GiB)": 85.12, + "step": 1495, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.66334338, + "epoch": 1.18, + "learning_rate": 9.937304696905636e-05, + "loss": 1.12421255, + "memory(GiB)": 85.12, + "step": 1500, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.65040469, + "epoch": 1.18, + "learning_rate": 9.93644944837427e-05, + "loss": 1.15235605, + "memory(GiB)": 85.12, + "step": 1505, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.64324026, + "epoch": 1.18, + "learning_rate": 9.935588443227184e-05, + "loss": 1.18840065, + "memory(GiB)": 85.12, + "step": 1510, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.64157495, + "epoch": 1.19, + "learning_rate": 9.934721682468433e-05, + "loss": 1.19425764, + "memory(GiB)": 85.12, + "step": 1515, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.65195494, + "epoch": 1.19, + "learning_rate": 9.933849167108787e-05, + "loss": 1.17684612, + "memory(GiB)": 85.12, + "step": 1520, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.64985819, + "epoch": 1.2, + "learning_rate": 9.932970898165723e-05, + "loss": 1.18239994, + "memory(GiB)": 85.12, + "step": 1525, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.65231233, + "epoch": 1.2, + "learning_rate": 9.932086876663435e-05, + "loss": 1.16985979, + "memory(GiB)": 85.12, + "step": 1530, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.65894971, + "epoch": 1.2, + "learning_rate": 9.931197103632817e-05, + "loss": 1.11519146, + "memory(GiB)": 85.12, + "step": 1535, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.64390192, + "epoch": 1.21, + "learning_rate": 9.930301580111472e-05, + "loss": 1.23124371, + "memory(GiB)": 85.12, + "step": 1540, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.66011586, + "epoch": 1.21, + "learning_rate": 9.929400307143712e-05, + "loss": 1.13707018, + "memory(GiB)": 85.12, + "step": 1545, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.65588398, + "epoch": 1.21, + "learning_rate": 9.928493285780552e-05, + "loss": 1.15754347, + "memory(GiB)": 85.12, + "step": 1550, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.64823947, + "epoch": 1.22, + "learning_rate": 9.927580517079712e-05, + "loss": 1.2073925, + "memory(GiB)": 85.12, + "step": 1555, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.65572858, + "epoch": 1.22, + "learning_rate": 9.926662002105608e-05, + "loss": 1.16998863, + "memory(GiB)": 85.12, + "step": 1560, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.66158614, + "epoch": 1.23, + "learning_rate": 9.925737741929367e-05, + "loss": 1.1435194, + "memory(GiB)": 85.12, + "step": 1565, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.64395499, + "epoch": 1.23, + "learning_rate": 9.924807737628807e-05, + "loss": 1.21585579, + "memory(GiB)": 85.12, + "step": 1570, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.64430709, + "epoch": 1.23, + "learning_rate": 9.923871990288448e-05, + "loss": 1.17890778, + "memory(GiB)": 85.12, + "step": 1575, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.64855452, + "epoch": 1.24, + "learning_rate": 9.922930500999508e-05, + "loss": 1.1541831, + "memory(GiB)": 85.12, + "step": 1580, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.65893106, + "epoch": 1.24, + "learning_rate": 9.9219832708599e-05, + "loss": 1.13851299, + "memory(GiB)": 85.12, + "step": 1585, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.65967436, + "epoch": 1.25, + "learning_rate": 9.921030300974232e-05, + "loss": 1.12484913, + "memory(GiB)": 85.12, + "step": 1590, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.64779596, + "epoch": 1.25, + "learning_rate": 9.920071592453804e-05, + "loss": 1.19642706, + "memory(GiB)": 85.12, + "step": 1595, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.64805603, + "epoch": 1.25, + "learning_rate": 9.919107146416608e-05, + "loss": 1.18528366, + "memory(GiB)": 85.12, + "step": 1600, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.6561811, + "epoch": 1.26, + "learning_rate": 9.918136963987333e-05, + "loss": 1.16669703, + "memory(GiB)": 85.12, + "step": 1605, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66756039, + "epoch": 1.26, + "learning_rate": 9.917161046297346e-05, + "loss": 1.11620274, + "memory(GiB)": 85.12, + "step": 1610, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.65261698, + "epoch": 1.27, + "learning_rate": 9.916179394484713e-05, + "loss": 1.15845966, + "memory(GiB)": 85.12, + "step": 1615, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66402683, + "epoch": 1.27, + "learning_rate": 9.915192009694179e-05, + "loss": 1.11538677, + "memory(GiB)": 85.12, + "step": 1620, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.65500402, + "epoch": 1.27, + "learning_rate": 9.91419889307718e-05, + "loss": 1.17441206, + "memory(GiB)": 85.12, + "step": 1625, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.65412102, + "epoch": 1.28, + "learning_rate": 9.913200045791834e-05, + "loss": 1.16534052, + "memory(GiB)": 85.12, + "step": 1630, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.64114447, + "epoch": 1.28, + "learning_rate": 9.912195469002941e-05, + "loss": 1.18363466, + "memory(GiB)": 85.12, + "step": 1635, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.65664382, + "epoch": 1.29, + "learning_rate": 9.911185163881984e-05, + "loss": 1.17111397, + "memory(GiB)": 85.12, + "step": 1640, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.65938292, + "epoch": 1.29, + "learning_rate": 9.910169131607123e-05, + "loss": 1.12585945, + "memory(GiB)": 85.12, + "step": 1645, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.64360905, + "epoch": 1.29, + "learning_rate": 9.909147373363202e-05, + "loss": 1.1944355, + "memory(GiB)": 85.12, + "step": 1650, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.6680274, + "epoch": 1.3, + "learning_rate": 9.908119890341737e-05, + "loss": 1.10261869, + "memory(GiB)": 85.12, + "step": 1655, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.6460361, + "epoch": 1.3, + "learning_rate": 9.907086683740924e-05, + "loss": 1.17447681, + "memory(GiB)": 85.12, + "step": 1660, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.65601449, + "epoch": 1.3, + "learning_rate": 9.906047754765629e-05, + "loss": 1.14073763, + "memory(GiB)": 85.12, + "step": 1665, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.65036592, + "epoch": 1.31, + "learning_rate": 9.905003104627397e-05, + "loss": 1.17392483, + "memory(GiB)": 85.12, + "step": 1670, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.65944376, + "epoch": 1.31, + "learning_rate": 9.90395273454444e-05, + "loss": 1.13640366, + "memory(GiB)": 85.12, + "step": 1675, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.65229316, + "epoch": 1.32, + "learning_rate": 9.902896645741639e-05, + "loss": 1.17808762, + "memory(GiB)": 85.12, + "step": 1680, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.6533186, + "epoch": 1.32, + "learning_rate": 9.901834839450553e-05, + "loss": 1.1593545, + "memory(GiB)": 85.12, + "step": 1685, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.64995356, + "epoch": 1.32, + "learning_rate": 9.900767316909396e-05, + "loss": 1.18070507, + "memory(GiB)": 85.12, + "step": 1690, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.65651155, + "epoch": 1.33, + "learning_rate": 9.899694079363058e-05, + "loss": 1.12338991, + "memory(GiB)": 85.12, + "step": 1695, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.65021605, + "epoch": 1.33, + "learning_rate": 9.898615128063086e-05, + "loss": 1.19300032, + "memory(GiB)": 85.12, + "step": 1700, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.63957796, + "epoch": 1.34, + "learning_rate": 9.897530464267699e-05, + "loss": 1.21851835, + "memory(GiB)": 85.12, + "step": 1705, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.66472945, + "epoch": 1.34, + "learning_rate": 9.896440089241767e-05, + "loss": 1.13589916, + "memory(GiB)": 85.12, + "step": 1710, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.65348268, + "epoch": 1.34, + "learning_rate": 9.895344004256827e-05, + "loss": 1.15424995, + "memory(GiB)": 85.12, + "step": 1715, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.6559926, + "epoch": 1.35, + "learning_rate": 9.894242210591073e-05, + "loss": 1.15576687, + "memory(GiB)": 85.12, + "step": 1720, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.65292115, + "epoch": 1.35, + "learning_rate": 9.893134709529359e-05, + "loss": 1.18022537, + "memory(GiB)": 85.12, + "step": 1725, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.66411386, + "epoch": 1.36, + "learning_rate": 9.892021502363187e-05, + "loss": 1.11516771, + "memory(GiB)": 85.12, + "step": 1730, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.63985095, + "epoch": 1.36, + "learning_rate": 9.89090259039072e-05, + "loss": 1.22506847, + "memory(GiB)": 85.12, + "step": 1735, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.64176164, + "epoch": 1.36, + "learning_rate": 9.889777974916774e-05, + "loss": 1.20334921, + "memory(GiB)": 85.12, + "step": 1740, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.63934112, + "epoch": 1.37, + "learning_rate": 9.888647657252809e-05, + "loss": 1.23192434, + "memory(GiB)": 85.12, + "step": 1745, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.65798678, + "epoch": 1.37, + "learning_rate": 9.887511638716942e-05, + "loss": 1.15708418, + "memory(GiB)": 85.12, + "step": 1750, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.6396348, + "epoch": 1.38, + "learning_rate": 9.886369920633937e-05, + "loss": 1.22064037, + "memory(GiB)": 85.12, + "step": 1755, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.64947276, + "epoch": 1.38, + "learning_rate": 9.885222504335199e-05, + "loss": 1.18840179, + "memory(GiB)": 85.12, + "step": 1760, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.65698085, + "epoch": 1.38, + "learning_rate": 9.884069391158784e-05, + "loss": 1.16323624, + "memory(GiB)": 85.12, + "step": 1765, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.65098138, + "epoch": 1.39, + "learning_rate": 9.88291058244939e-05, + "loss": 1.16516037, + "memory(GiB)": 85.12, + "step": 1770, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.65393229, + "epoch": 1.39, + "learning_rate": 9.881746079558353e-05, + "loss": 1.16837893, + "memory(GiB)": 85.12, + "step": 1775, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.64121981, + "epoch": 1.39, + "learning_rate": 9.880575883843655e-05, + "loss": 1.21210432, + "memory(GiB)": 85.12, + "step": 1780, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.66884899, + "epoch": 1.4, + "learning_rate": 9.879399996669911e-05, + "loss": 1.12667084, + "memory(GiB)": 85.12, + "step": 1785, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.65376649, + "epoch": 1.4, + "learning_rate": 9.878218419408379e-05, + "loss": 1.1607131, + "memory(GiB)": 85.12, + "step": 1790, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.64392834, + "epoch": 1.41, + "learning_rate": 9.877031153436949e-05, + "loss": 1.20786915, + "memory(GiB)": 85.12, + "step": 1795, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.65759964, + "epoch": 1.41, + "learning_rate": 9.875838200140142e-05, + "loss": 1.15941496, + "memory(GiB)": 85.12, + "step": 1800, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.65850377, + "epoch": 1.41, + "learning_rate": 9.874639560909117e-05, + "loss": 1.15575294, + "memory(GiB)": 85.12, + "step": 1805, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.65355692, + "epoch": 1.42, + "learning_rate": 9.873435237141664e-05, + "loss": 1.14666672, + "memory(GiB)": 85.12, + "step": 1810, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.64999933, + "epoch": 1.42, + "learning_rate": 9.872225230242194e-05, + "loss": 1.16884727, + "memory(GiB)": 85.12, + "step": 1815, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.65628324, + "epoch": 1.43, + "learning_rate": 9.871009541621752e-05, + "loss": 1.12916546, + "memory(GiB)": 85.12, + "step": 1820, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.65538239, + "epoch": 1.43, + "learning_rate": 9.869788172698006e-05, + "loss": 1.1587122, + "memory(GiB)": 85.12, + "step": 1825, + "train_speed(iter/s)": 0.035241 + }, + { + "acc": 0.65519543, + "epoch": 1.43, + "learning_rate": 9.868561124895254e-05, + "loss": 1.1943471, + "memory(GiB)": 85.12, + "step": 1830, + "train_speed(iter/s)": 0.035235 + }, + { + "acc": 0.65793271, + "epoch": 1.44, + "learning_rate": 9.867328399644407e-05, + "loss": 1.15140657, + "memory(GiB)": 85.12, + "step": 1835, + "train_speed(iter/s)": 0.035236 + }, + { + "acc": 0.64654632, + "epoch": 1.44, + "learning_rate": 9.866089998383004e-05, + "loss": 1.19984751, + "memory(GiB)": 85.12, + "step": 1840, + "train_speed(iter/s)": 0.035238 + }, + { + "acc": 0.6539813, + "epoch": 1.45, + "learning_rate": 9.864845922555198e-05, + "loss": 1.16101418, + "memory(GiB)": 85.12, + "step": 1845, + "train_speed(iter/s)": 0.035239 + }, + { + "acc": 0.65071869, + "epoch": 1.45, + "learning_rate": 9.863596173611764e-05, + "loss": 1.1871336, + "memory(GiB)": 85.12, + "step": 1850, + "train_speed(iter/s)": 0.035241 + }, + { + "acc": 0.64802847, + "epoch": 1.45, + "learning_rate": 9.862340753010089e-05, + "loss": 1.17077522, + "memory(GiB)": 85.12, + "step": 1855, + "train_speed(iter/s)": 0.035238 + }, + { + "acc": 0.6609251, + "epoch": 1.46, + "learning_rate": 9.861079662214177e-05, + "loss": 1.12675228, + "memory(GiB)": 85.12, + "step": 1860, + "train_speed(iter/s)": 0.035236 + }, + { + "acc": 0.652352, + "epoch": 1.46, + "learning_rate": 9.85981290269464e-05, + "loss": 1.17576647, + "memory(GiB)": 85.12, + "step": 1865, + "train_speed(iter/s)": 0.035233 + }, + { + "acc": 0.65460477, + "epoch": 1.47, + "learning_rate": 9.858540475928706e-05, + "loss": 1.17708349, + "memory(GiB)": 85.12, + "step": 1870, + "train_speed(iter/s)": 0.035235 + }, + { + "acc": 0.6584599, + "epoch": 1.47, + "learning_rate": 9.857262383400207e-05, + "loss": 1.13749962, + "memory(GiB)": 85.12, + "step": 1875, + "train_speed(iter/s)": 0.035233 + }, + { + "acc": 0.6569325, + "epoch": 1.47, + "learning_rate": 9.855978626599585e-05, + "loss": 1.14678946, + "memory(GiB)": 85.12, + "step": 1880, + "train_speed(iter/s)": 0.035235 + }, + { + "acc": 0.64635262, + "epoch": 1.48, + "learning_rate": 9.854689207023887e-05, + "loss": 1.17655993, + "memory(GiB)": 85.12, + "step": 1885, + "train_speed(iter/s)": 0.035236 + }, + { + "acc": 0.65549922, + "epoch": 1.48, + "learning_rate": 9.853394126176763e-05, + "loss": 1.14425611, + "memory(GiB)": 85.12, + "step": 1890, + "train_speed(iter/s)": 0.035237 + }, + { + "acc": 0.66941037, + "epoch": 1.49, + "learning_rate": 9.852093385568466e-05, + "loss": 1.08940992, + "memory(GiB)": 85.12, + "step": 1895, + "train_speed(iter/s)": 0.035238 + }, + { + "acc": 0.6526649, + "epoch": 1.49, + "learning_rate": 9.850786986715846e-05, + "loss": 1.15163832, + "memory(GiB)": 85.12, + "step": 1900, + "train_speed(iter/s)": 0.03524 + }, + { + "acc": 0.65831594, + "epoch": 1.49, + "learning_rate": 9.849474931142353e-05, + "loss": 1.12980242, + "memory(GiB)": 85.12, + "step": 1905, + "train_speed(iter/s)": 0.035242 + }, + { + "acc": 0.66181054, + "epoch": 1.5, + "learning_rate": 9.848157220378038e-05, + "loss": 1.14682779, + "memory(GiB)": 85.12, + "step": 1910, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.64066448, + "epoch": 1.5, + "learning_rate": 9.846833855959539e-05, + "loss": 1.22032328, + "memory(GiB)": 85.12, + "step": 1915, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.6627368, + "epoch": 1.5, + "learning_rate": 9.845504839430091e-05, + "loss": 1.11947041, + "memory(GiB)": 85.12, + "step": 1920, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.65865917, + "epoch": 1.51, + "learning_rate": 9.844170172339521e-05, + "loss": 1.16217585, + "memory(GiB)": 85.12, + "step": 1925, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.65347538, + "epoch": 1.51, + "learning_rate": 9.842829856244247e-05, + "loss": 1.16731787, + "memory(GiB)": 85.12, + "step": 1930, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.64839239, + "epoch": 1.52, + "learning_rate": 9.841483892707268e-05, + "loss": 1.18971329, + "memory(GiB)": 85.12, + "step": 1935, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.66025143, + "epoch": 1.52, + "learning_rate": 9.840132283298172e-05, + "loss": 1.12929058, + "memory(GiB)": 85.12, + "step": 1940, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.65671482, + "epoch": 1.52, + "learning_rate": 9.838775029593135e-05, + "loss": 1.12444181, + "memory(GiB)": 85.12, + "step": 1945, + "train_speed(iter/s)": 0.035239 + }, + { + "acc": 0.65074253, + "epoch": 1.53, + "learning_rate": 9.837412133174911e-05, + "loss": 1.14755917, + "memory(GiB)": 85.12, + "step": 1950, + "train_speed(iter/s)": 0.035241 + }, + { + "acc": 0.64375038, + "epoch": 1.53, + "learning_rate": 9.836043595632832e-05, + "loss": 1.17292156, + "memory(GiB)": 85.12, + "step": 1955, + "train_speed(iter/s)": 0.035239 + }, + { + "acc": 0.65182729, + "epoch": 1.54, + "learning_rate": 9.834669418562811e-05, + "loss": 1.17811108, + "memory(GiB)": 85.12, + "step": 1960, + "train_speed(iter/s)": 0.035237 + }, + { + "acc": 0.65077796, + "epoch": 1.54, + "learning_rate": 9.833289603567341e-05, + "loss": 1.1634614, + "memory(GiB)": 85.12, + "step": 1965, + "train_speed(iter/s)": 0.035238 + }, + { + "acc": 0.64500337, + "epoch": 1.54, + "learning_rate": 9.831904152255486e-05, + "loss": 1.18109503, + "memory(GiB)": 85.12, + "step": 1970, + "train_speed(iter/s)": 0.035239 + }, + { + "acc": 0.65929651, + "epoch": 1.55, + "learning_rate": 9.830513066242882e-05, + "loss": 1.14037209, + "memory(GiB)": 85.12, + "step": 1975, + "train_speed(iter/s)": 0.035236 + }, + { + "acc": 0.66190724, + "epoch": 1.55, + "learning_rate": 9.829116347151737e-05, + "loss": 1.16235342, + "memory(GiB)": 85.12, + "step": 1980, + "train_speed(iter/s)": 0.035237 + }, + { + "acc": 0.65755038, + "epoch": 1.56, + "learning_rate": 9.827713996610826e-05, + "loss": 1.16937008, + "memory(GiB)": 85.12, + "step": 1985, + "train_speed(iter/s)": 0.035239 + }, + { + "acc": 0.64079676, + "epoch": 1.56, + "learning_rate": 9.826306016255498e-05, + "loss": 1.19097614, + "memory(GiB)": 85.12, + "step": 1990, + "train_speed(iter/s)": 0.035241 + }, + { + "acc": 0.66023755, + "epoch": 1.56, + "learning_rate": 9.824892407727656e-05, + "loss": 1.12927694, + "memory(GiB)": 85.12, + "step": 1995, + "train_speed(iter/s)": 0.035242 + }, + { + "acc": 0.65292602, + "epoch": 1.57, + "learning_rate": 9.823473172675777e-05, + "loss": 1.16442251, + "memory(GiB)": 85.12, + "step": 2000, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.65421362, + "epoch": 1.57, + "learning_rate": 9.822048312754893e-05, + "loss": 1.16522408, + "memory(GiB)": 85.12, + "step": 2005, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.65284295, + "epoch": 1.58, + "learning_rate": 9.820617829626598e-05, + "loss": 1.17013979, + "memory(GiB)": 85.12, + "step": 2010, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.64222498, + "epoch": 1.58, + "learning_rate": 9.819181724959044e-05, + "loss": 1.23573723, + "memory(GiB)": 85.12, + "step": 2015, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.66108804, + "epoch": 1.58, + "learning_rate": 9.817740000426932e-05, + "loss": 1.13777189, + "memory(GiB)": 85.12, + "step": 2020, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.66587753, + "epoch": 1.59, + "learning_rate": 9.816292657711527e-05, + "loss": 1.11172771, + "memory(GiB)": 85.12, + "step": 2025, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.65847149, + "epoch": 1.59, + "learning_rate": 9.814839698500641e-05, + "loss": 1.14090157, + "memory(GiB)": 85.12, + "step": 2030, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.65127797, + "epoch": 1.59, + "learning_rate": 9.813381124488631e-05, + "loss": 1.16807508, + "memory(GiB)": 85.12, + "step": 2035, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66447649, + "epoch": 1.6, + "learning_rate": 9.811916937376409e-05, + "loss": 1.16490545, + "memory(GiB)": 85.12, + "step": 2040, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.65319815, + "epoch": 1.6, + "learning_rate": 9.810447138871426e-05, + "loss": 1.15913305, + "memory(GiB)": 85.12, + "step": 2045, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.65084143, + "epoch": 1.61, + "learning_rate": 9.808971730687684e-05, + "loss": 1.15471087, + "memory(GiB)": 85.12, + "step": 2050, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.65464187, + "epoch": 1.61, + "learning_rate": 9.80749071454572e-05, + "loss": 1.14399872, + "memory(GiB)": 85.12, + "step": 2055, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.65305209, + "epoch": 1.61, + "learning_rate": 9.806004092172616e-05, + "loss": 1.15933371, + "memory(GiB)": 85.12, + "step": 2060, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.64206853, + "epoch": 1.62, + "learning_rate": 9.804511865301989e-05, + "loss": 1.18681612, + "memory(GiB)": 85.12, + "step": 2065, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.65472612, + "epoch": 1.62, + "learning_rate": 9.803014035673987e-05, + "loss": 1.17128534, + "memory(GiB)": 85.12, + "step": 2070, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.66416421, + "epoch": 1.63, + "learning_rate": 9.801510605035303e-05, + "loss": 1.12266273, + "memory(GiB)": 85.12, + "step": 2075, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.65653076, + "epoch": 1.63, + "learning_rate": 9.800001575139152e-05, + "loss": 1.11308479, + "memory(GiB)": 85.12, + "step": 2080, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.65959449, + "epoch": 1.63, + "learning_rate": 9.798486947745282e-05, + "loss": 1.12792482, + "memory(GiB)": 85.12, + "step": 2085, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.65187888, + "epoch": 1.64, + "learning_rate": 9.796966724619967e-05, + "loss": 1.16060781, + "memory(GiB)": 85.12, + "step": 2090, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.65552635, + "epoch": 1.64, + "learning_rate": 9.79544090753601e-05, + "loss": 1.13989534, + "memory(GiB)": 85.12, + "step": 2095, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.65974436, + "epoch": 1.65, + "learning_rate": 9.793909498272733e-05, + "loss": 1.1274127, + "memory(GiB)": 85.12, + "step": 2100, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.66153345, + "epoch": 1.65, + "learning_rate": 9.792372498615981e-05, + "loss": 1.13705215, + "memory(GiB)": 85.12, + "step": 2105, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.64935384, + "epoch": 1.65, + "learning_rate": 9.790829910358122e-05, + "loss": 1.17313042, + "memory(GiB)": 85.12, + "step": 2110, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.66233768, + "epoch": 1.66, + "learning_rate": 9.789281735298032e-05, + "loss": 1.09848804, + "memory(GiB)": 85.12, + "step": 2115, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.67054353, + "epoch": 1.66, + "learning_rate": 9.787727975241111e-05, + "loss": 1.11139088, + "memory(GiB)": 85.12, + "step": 2120, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.66447287, + "epoch": 1.67, + "learning_rate": 9.786168631999269e-05, + "loss": 1.13588085, + "memory(GiB)": 85.12, + "step": 2125, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.65222578, + "epoch": 1.67, + "learning_rate": 9.784603707390922e-05, + "loss": 1.17907152, + "memory(GiB)": 85.12, + "step": 2130, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.65970678, + "epoch": 1.67, + "learning_rate": 9.783033203241006e-05, + "loss": 1.14419537, + "memory(GiB)": 85.12, + "step": 2135, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.65925775, + "epoch": 1.68, + "learning_rate": 9.78145712138095e-05, + "loss": 1.1633584, + "memory(GiB)": 85.12, + "step": 2140, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.66307669, + "epoch": 1.68, + "learning_rate": 9.779875463648698e-05, + "loss": 1.12448187, + "memory(GiB)": 85.12, + "step": 2145, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.65571504, + "epoch": 1.68, + "learning_rate": 9.77828823188869e-05, + "loss": 1.20079041, + "memory(GiB)": 85.12, + "step": 2150, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.66194067, + "epoch": 1.69, + "learning_rate": 9.77669542795187e-05, + "loss": 1.10752869, + "memory(GiB)": 85.12, + "step": 2155, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.6523849, + "epoch": 1.69, + "learning_rate": 9.775097053695677e-05, + "loss": 1.15065937, + "memory(GiB)": 85.12, + "step": 2160, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.64300294, + "epoch": 1.7, + "learning_rate": 9.773493110984047e-05, + "loss": 1.17376556, + "memory(GiB)": 85.12, + "step": 2165, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.65471711, + "epoch": 1.7, + "learning_rate": 9.77188360168741e-05, + "loss": 1.16260157, + "memory(GiB)": 85.12, + "step": 2170, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.66860032, + "epoch": 1.7, + "learning_rate": 9.770268527682687e-05, + "loss": 1.09885559, + "memory(GiB)": 85.12, + "step": 2175, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.6501049, + "epoch": 1.71, + "learning_rate": 9.76864789085329e-05, + "loss": 1.17552853, + "memory(GiB)": 85.12, + "step": 2180, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.64576588, + "epoch": 1.71, + "learning_rate": 9.767021693089116e-05, + "loss": 1.19620943, + "memory(GiB)": 85.12, + "step": 2185, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.66087532, + "epoch": 1.72, + "learning_rate": 9.765389936286545e-05, + "loss": 1.1152669, + "memory(GiB)": 85.12, + "step": 2190, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.66009383, + "epoch": 1.72, + "learning_rate": 9.763752622348445e-05, + "loss": 1.13289509, + "memory(GiB)": 85.12, + "step": 2195, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.66180921, + "epoch": 1.72, + "learning_rate": 9.762109753184159e-05, + "loss": 1.1201334, + "memory(GiB)": 85.12, + "step": 2200, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.65290222, + "epoch": 1.73, + "learning_rate": 9.760461330709513e-05, + "loss": 1.1867281, + "memory(GiB)": 85.12, + "step": 2205, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.65245934, + "epoch": 1.73, + "learning_rate": 9.758807356846804e-05, + "loss": 1.1851923, + "memory(GiB)": 85.12, + "step": 2210, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.6554101, + "epoch": 1.74, + "learning_rate": 9.757147833524808e-05, + "loss": 1.12435026, + "memory(GiB)": 85.12, + "step": 2215, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.66130071, + "epoch": 1.74, + "learning_rate": 9.755482762678768e-05, + "loss": 1.11828518, + "memory(GiB)": 85.12, + "step": 2220, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.65428829, + "epoch": 1.74, + "learning_rate": 9.753812146250398e-05, + "loss": 1.17555447, + "memory(GiB)": 85.12, + "step": 2225, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.65815506, + "epoch": 1.75, + "learning_rate": 9.75213598618788e-05, + "loss": 1.14015465, + "memory(GiB)": 85.12, + "step": 2230, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.6582922, + "epoch": 1.75, + "learning_rate": 9.750454284445859e-05, + "loss": 1.13366365, + "memory(GiB)": 85.12, + "step": 2235, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.64991212, + "epoch": 1.76, + "learning_rate": 9.748767042985442e-05, + "loss": 1.17165375, + "memory(GiB)": 85.12, + "step": 2240, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.65585694, + "epoch": 1.76, + "learning_rate": 9.7470742637742e-05, + "loss": 1.15650015, + "memory(GiB)": 85.12, + "step": 2245, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.65717545, + "epoch": 1.76, + "learning_rate": 9.745375948786158e-05, + "loss": 1.15424328, + "memory(GiB)": 85.12, + "step": 2250, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.65874028, + "epoch": 1.77, + "learning_rate": 9.743672100001793e-05, + "loss": 1.14350729, + "memory(GiB)": 85.12, + "step": 2255, + "train_speed(iter/s)": 0.035279 + }, + { + "acc": 0.64995842, + "epoch": 1.77, + "learning_rate": 9.741962719408047e-05, + "loss": 1.16558609, + "memory(GiB)": 85.12, + "step": 2260, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.6412828, + "epoch": 1.78, + "learning_rate": 9.7402478089983e-05, + "loss": 1.24160509, + "memory(GiB)": 85.12, + "step": 2265, + "train_speed(iter/s)": 0.035279 + }, + { + "acc": 0.66066208, + "epoch": 1.78, + "learning_rate": 9.738527370772387e-05, + "loss": 1.13935509, + "memory(GiB)": 85.12, + "step": 2270, + "train_speed(iter/s)": 0.03528 + }, + { + "acc": 0.65170732, + "epoch": 1.78, + "learning_rate": 9.73680140673659e-05, + "loss": 1.15450306, + "memory(GiB)": 85.12, + "step": 2275, + "train_speed(iter/s)": 0.035282 + }, + { + "acc": 0.65960178, + "epoch": 1.79, + "learning_rate": 9.735069918903635e-05, + "loss": 1.13573933, + "memory(GiB)": 85.12, + "step": 2280, + "train_speed(iter/s)": 0.035283 + }, + { + "acc": 0.66337166, + "epoch": 1.79, + "learning_rate": 9.733332909292684e-05, + "loss": 1.15319395, + "memory(GiB)": 85.12, + "step": 2285, + "train_speed(iter/s)": 0.035284 + }, + { + "acc": 0.66128883, + "epoch": 1.79, + "learning_rate": 9.731590379929345e-05, + "loss": 1.158424, + "memory(GiB)": 85.12, + "step": 2290, + "train_speed(iter/s)": 0.035285 + }, + { + "acc": 0.65605984, + "epoch": 1.8, + "learning_rate": 9.729842332845657e-05, + "loss": 1.15069437, + "memory(GiB)": 85.12, + "step": 2295, + "train_speed(iter/s)": 0.035282 + }, + { + "acc": 0.63679175, + "epoch": 1.8, + "learning_rate": 9.7280887700801e-05, + "loss": 1.2136096, + "memory(GiB)": 85.12, + "step": 2300, + "train_speed(iter/s)": 0.035283 + }, + { + "acc": 0.64580112, + "epoch": 1.81, + "learning_rate": 9.726329693677578e-05, + "loss": 1.19345636, + "memory(GiB)": 85.12, + "step": 2305, + "train_speed(iter/s)": 0.035284 + }, + { + "acc": 0.65686107, + "epoch": 1.81, + "learning_rate": 9.724565105689432e-05, + "loss": 1.13980618, + "memory(GiB)": 85.12, + "step": 2310, + "train_speed(iter/s)": 0.03528 + }, + { + "acc": 0.66553755, + "epoch": 1.81, + "learning_rate": 9.722795008173427e-05, + "loss": 1.1280262, + "memory(GiB)": 85.12, + "step": 2315, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.65331993, + "epoch": 1.82, + "learning_rate": 9.721019403193753e-05, + "loss": 1.16992741, + "memory(GiB)": 85.12, + "step": 2320, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.65903072, + "epoch": 1.82, + "learning_rate": 9.719238292821022e-05, + "loss": 1.15253115, + "memory(GiB)": 85.12, + "step": 2325, + "train_speed(iter/s)": 0.035282 + }, + { + "acc": 0.65842004, + "epoch": 1.83, + "learning_rate": 9.71745167913227e-05, + "loss": 1.15765343, + "memory(GiB)": 85.12, + "step": 2330, + "train_speed(iter/s)": 0.035282 + }, + { + "acc": 0.64939175, + "epoch": 1.83, + "learning_rate": 9.715659564210944e-05, + "loss": 1.1643466, + "memory(GiB)": 85.12, + "step": 2335, + "train_speed(iter/s)": 0.035284 + }, + { + "acc": 0.66922626, + "epoch": 1.83, + "learning_rate": 9.713861950146912e-05, + "loss": 1.1116375, + "memory(GiB)": 85.12, + "step": 2340, + "train_speed(iter/s)": 0.035279 + }, + { + "acc": 0.64719138, + "epoch": 1.84, + "learning_rate": 9.712058839036451e-05, + "loss": 1.20366507, + "memory(GiB)": 85.12, + "step": 2345, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.65068803, + "epoch": 1.84, + "learning_rate": 9.71025023298225e-05, + "loss": 1.14096384, + "memory(GiB)": 85.12, + "step": 2350, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.6618053, + "epoch": 1.85, + "learning_rate": 9.708436134093408e-05, + "loss": 1.13345575, + "memory(GiB)": 85.12, + "step": 2355, + "train_speed(iter/s)": 0.035273 + }, + { + "acc": 0.65803671, + "epoch": 1.85, + "learning_rate": 9.706616544485428e-05, + "loss": 1.14154787, + "memory(GiB)": 85.12, + "step": 2360, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.65964813, + "epoch": 1.85, + "learning_rate": 9.70479146628021e-05, + "loss": 1.11802235, + "memory(GiB)": 85.12, + "step": 2365, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.6604876, + "epoch": 1.86, + "learning_rate": 9.702960901606064e-05, + "loss": 1.1343956, + "memory(GiB)": 85.12, + "step": 2370, + "train_speed(iter/s)": 0.035273 + }, + { + "acc": 0.66209579, + "epoch": 1.86, + "learning_rate": 9.701124852597692e-05, + "loss": 1.11040306, + "memory(GiB)": 85.12, + "step": 2375, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.66260943, + "epoch": 1.87, + "learning_rate": 9.699283321396195e-05, + "loss": 1.12860765, + "memory(GiB)": 85.12, + "step": 2380, + "train_speed(iter/s)": 0.035273 + }, + { + "acc": 0.66986876, + "epoch": 1.87, + "learning_rate": 9.697436310149066e-05, + "loss": 1.09946795, + "memory(GiB)": 85.12, + "step": 2385, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.67240357, + "epoch": 1.87, + "learning_rate": 9.695583821010184e-05, + "loss": 1.12718344, + "memory(GiB)": 85.12, + "step": 2390, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.65421391, + "epoch": 1.88, + "learning_rate": 9.693725856139824e-05, + "loss": 1.14639235, + "memory(GiB)": 85.12, + "step": 2395, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.64448304, + "epoch": 1.88, + "learning_rate": 9.69186241770464e-05, + "loss": 1.21207218, + "memory(GiB)": 85.12, + "step": 2400, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.66120443, + "epoch": 1.88, + "learning_rate": 9.689993507877673e-05, + "loss": 1.12814407, + "memory(GiB)": 85.12, + "step": 2405, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.65368838, + "epoch": 1.89, + "learning_rate": 9.68811912883834e-05, + "loss": 1.15181837, + "memory(GiB)": 85.12, + "step": 2410, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.65113912, + "epoch": 1.89, + "learning_rate": 9.686239282772442e-05, + "loss": 1.16465178, + "memory(GiB)": 85.12, + "step": 2415, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.65074034, + "epoch": 1.9, + "learning_rate": 9.68435397187215e-05, + "loss": 1.16231499, + "memory(GiB)": 85.12, + "step": 2420, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.65878544, + "epoch": 1.9, + "learning_rate": 9.68246319833601e-05, + "loss": 1.14520617, + "memory(GiB)": 85.12, + "step": 2425, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.64476914, + "epoch": 1.9, + "learning_rate": 9.68056696436894e-05, + "loss": 1.20143547, + "memory(GiB)": 85.12, + "step": 2430, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.64212346, + "epoch": 1.91, + "learning_rate": 9.678665272182221e-05, + "loss": 1.22368813, + "memory(GiB)": 85.12, + "step": 2435, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.65965805, + "epoch": 1.91, + "learning_rate": 9.676758123993504e-05, + "loss": 1.12663536, + "memory(GiB)": 85.12, + "step": 2440, + "train_speed(iter/s)": 0.035279 + }, + { + "acc": 0.66423464, + "epoch": 1.92, + "learning_rate": 9.674845522026799e-05, + "loss": 1.12610149, + "memory(GiB)": 85.12, + "step": 2445, + "train_speed(iter/s)": 0.03528 + }, + { + "acc": 0.66499667, + "epoch": 1.92, + "learning_rate": 9.672927468512476e-05, + "loss": 1.1370595, + "memory(GiB)": 85.12, + "step": 2450, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.65996475, + "epoch": 1.92, + "learning_rate": 9.671003965687266e-05, + "loss": 1.14243317, + "memory(GiB)": 85.12, + "step": 2455, + "train_speed(iter/s)": 0.035282 + }, + { + "acc": 0.66339087, + "epoch": 1.93, + "learning_rate": 9.669075015794252e-05, + "loss": 1.11444197, + "memory(GiB)": 85.12, + "step": 2460, + "train_speed(iter/s)": 0.035283 + }, + { + "acc": 0.65870614, + "epoch": 1.93, + "learning_rate": 9.667140621082867e-05, + "loss": 1.1119628, + "memory(GiB)": 85.12, + "step": 2465, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.66413293, + "epoch": 1.94, + "learning_rate": 9.665200783808897e-05, + "loss": 1.12356319, + "memory(GiB)": 85.12, + "step": 2470, + "train_speed(iter/s)": 0.035279 + }, + { + "acc": 0.65245867, + "epoch": 1.94, + "learning_rate": 9.663255506234474e-05, + "loss": 1.15396376, + "memory(GiB)": 85.12, + "step": 2475, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.65678835, + "epoch": 1.94, + "learning_rate": 9.661304790628073e-05, + "loss": 1.16068563, + "memory(GiB)": 85.12, + "step": 2480, + "train_speed(iter/s)": 0.035279 + }, + { + "acc": 0.65423017, + "epoch": 1.95, + "learning_rate": 9.659348639264512e-05, + "loss": 1.16609097, + "memory(GiB)": 85.12, + "step": 2485, + "train_speed(iter/s)": 0.03528 + }, + { + "acc": 0.67275624, + "epoch": 1.95, + "learning_rate": 9.657387054424945e-05, + "loss": 1.09347582, + "memory(GiB)": 85.12, + "step": 2490, + "train_speed(iter/s)": 0.035282 + }, + { + "acc": 0.67117119, + "epoch": 1.96, + "learning_rate": 9.655420038396868e-05, + "loss": 1.11645508, + "memory(GiB)": 85.12, + "step": 2495, + "train_speed(iter/s)": 0.035283 + }, + { + "acc": 0.66794109, + "epoch": 1.96, + "learning_rate": 9.653447593474102e-05, + "loss": 1.10899277, + "memory(GiB)": 85.12, + "step": 2500, + "train_speed(iter/s)": 0.035284 + }, + { + "acc": 0.64761634, + "epoch": 1.96, + "learning_rate": 9.651469721956807e-05, + "loss": 1.18057451, + "memory(GiB)": 85.12, + "step": 2505, + "train_speed(iter/s)": 0.035282 + }, + { + "acc": 0.65636969, + "epoch": 1.97, + "learning_rate": 9.649486426151468e-05, + "loss": 1.15329361, + "memory(GiB)": 85.12, + "step": 2510, + "train_speed(iter/s)": 0.035283 + }, + { + "acc": 0.66316671, + "epoch": 1.97, + "learning_rate": 9.647497708370894e-05, + "loss": 1.13145294, + "memory(GiB)": 85.12, + "step": 2515, + "train_speed(iter/s)": 0.035284 + }, + { + "acc": 0.66438212, + "epoch": 1.97, + "learning_rate": 9.64550357093422e-05, + "loss": 1.10747566, + "memory(GiB)": 85.12, + "step": 2520, + "train_speed(iter/s)": 0.035285 + }, + { + "acc": 0.66867604, + "epoch": 1.98, + "learning_rate": 9.643504016166897e-05, + "loss": 1.10191135, + "memory(GiB)": 85.12, + "step": 2525, + "train_speed(iter/s)": 0.035284 + }, + { + "acc": 0.65627751, + "epoch": 1.98, + "learning_rate": 9.6414990464007e-05, + "loss": 1.14236097, + "memory(GiB)": 85.12, + "step": 2530, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.6637876, + "epoch": 1.99, + "learning_rate": 9.639488663973708e-05, + "loss": 1.13195019, + "memory(GiB)": 85.12, + "step": 2535, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.66524153, + "epoch": 1.99, + "learning_rate": 9.637472871230322e-05, + "loss": 1.11181889, + "memory(GiB)": 85.12, + "step": 2540, + "train_speed(iter/s)": 0.035279 + }, + { + "acc": 0.66480532, + "epoch": 1.99, + "learning_rate": 9.635451670521249e-05, + "loss": 1.11331224, + "memory(GiB)": 85.12, + "step": 2545, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.66729088, + "epoch": 2.0, + "learning_rate": 9.633425064203503e-05, + "loss": 1.09206867, + "memory(GiB)": 85.12, + "step": 2550, + "train_speed(iter/s)": 0.035276 + }, + { + "epoch": 2.0, + "eval_acc": 0.6775607312797396, + "eval_loss": 1.057321310043335, + "eval_runtime": 85.2273, + "eval_samples_per_second": 1.091, + "eval_steps_per_second": 1.091, + "step": 2552 + }, + { + "acc": 0.67183886, + "epoch": 2.0, + "learning_rate": 9.631393054640398e-05, + "loss": 1.07645693, + "memory(GiB)": 85.12, + "step": 2555, + "train_speed(iter/s)": 0.035237 + }, + { + "acc": 0.66320515, + "epoch": 2.01, + "learning_rate": 9.629355644201553e-05, + "loss": 1.10909252, + "memory(GiB)": 85.12, + "step": 2560, + "train_speed(iter/s)": 0.035233 + }, + { + "acc": 0.66873536, + "epoch": 2.01, + "learning_rate": 9.627312835262885e-05, + "loss": 1.09901686, + "memory(GiB)": 85.12, + "step": 2565, + "train_speed(iter/s)": 0.035231 + }, + { + "acc": 0.66333971, + "epoch": 2.01, + "learning_rate": 9.625264630206602e-05, + "loss": 1.11735725, + "memory(GiB)": 85.12, + "step": 2570, + "train_speed(iter/s)": 0.035232 + }, + { + "acc": 0.65929585, + "epoch": 2.02, + "learning_rate": 9.623211031421212e-05, + "loss": 1.12093697, + "memory(GiB)": 85.12, + "step": 2575, + "train_speed(iter/s)": 0.035233 + }, + { + "acc": 0.66388683, + "epoch": 2.02, + "learning_rate": 9.621152041301507e-05, + "loss": 1.11663198, + "memory(GiB)": 85.12, + "step": 2580, + "train_speed(iter/s)": 0.035231 + }, + { + "acc": 0.65557284, + "epoch": 2.03, + "learning_rate": 9.619087662248569e-05, + "loss": 1.13853168, + "memory(GiB)": 85.12, + "step": 2585, + "train_speed(iter/s)": 0.035232 + }, + { + "acc": 0.6557622, + "epoch": 2.03, + "learning_rate": 9.61701789666976e-05, + "loss": 1.14238157, + "memory(GiB)": 85.12, + "step": 2590, + "train_speed(iter/s)": 0.035234 + }, + { + "acc": 0.67138915, + "epoch": 2.03, + "learning_rate": 9.614942746978733e-05, + "loss": 1.0764698, + "memory(GiB)": 85.12, + "step": 2595, + "train_speed(iter/s)": 0.035232 + }, + { + "acc": 0.65261388, + "epoch": 2.04, + "learning_rate": 9.612862215595406e-05, + "loss": 1.1417222, + "memory(GiB)": 85.12, + "step": 2600, + "train_speed(iter/s)": 0.035233 + }, + { + "acc": 0.66247792, + "epoch": 2.04, + "learning_rate": 9.610776304945986e-05, + "loss": 1.13462439, + "memory(GiB)": 85.12, + "step": 2605, + "train_speed(iter/s)": 0.035235 + }, + { + "acc": 0.67750583, + "epoch": 2.05, + "learning_rate": 9.608685017462944e-05, + "loss": 1.0703146, + "memory(GiB)": 85.12, + "step": 2610, + "train_speed(iter/s)": 0.035236 + }, + { + "acc": 0.66992674, + "epoch": 2.05, + "learning_rate": 9.606588355585025e-05, + "loss": 1.10587053, + "memory(GiB)": 85.12, + "step": 2615, + "train_speed(iter/s)": 0.035237 + }, + { + "acc": 0.66086774, + "epoch": 2.05, + "learning_rate": 9.604486321757242e-05, + "loss": 1.13685856, + "memory(GiB)": 85.12, + "step": 2620, + "train_speed(iter/s)": 0.035238 + }, + { + "acc": 0.67727141, + "epoch": 2.06, + "learning_rate": 9.60237891843087e-05, + "loss": 1.06712856, + "memory(GiB)": 85.12, + "step": 2625, + "train_speed(iter/s)": 0.035239 + }, + { + "acc": 0.66415348, + "epoch": 2.06, + "learning_rate": 9.600266148063448e-05, + "loss": 1.16380205, + "memory(GiB)": 85.12, + "step": 2630, + "train_speed(iter/s)": 0.03524 + }, + { + "acc": 0.65921197, + "epoch": 2.07, + "learning_rate": 9.598148013118771e-05, + "loss": 1.11800652, + "memory(GiB)": 85.12, + "step": 2635, + "train_speed(iter/s)": 0.035242 + }, + { + "acc": 0.6564095, + "epoch": 2.07, + "learning_rate": 9.596024516066893e-05, + "loss": 1.14403868, + "memory(GiB)": 85.12, + "step": 2640, + "train_speed(iter/s)": 0.03524 + }, + { + "acc": 0.66070809, + "epoch": 2.07, + "learning_rate": 9.593895659384117e-05, + "loss": 1.1139576, + "memory(GiB)": 85.12, + "step": 2645, + "train_speed(iter/s)": 0.035241 + }, + { + "acc": 0.6555994, + "epoch": 2.08, + "learning_rate": 9.591761445553e-05, + "loss": 1.12918062, + "memory(GiB)": 85.12, + "step": 2650, + "train_speed(iter/s)": 0.035239 + }, + { + "acc": 0.66035252, + "epoch": 2.08, + "learning_rate": 9.589621877062346e-05, + "loss": 1.11460495, + "memory(GiB)": 85.12, + "step": 2655, + "train_speed(iter/s)": 0.03524 + }, + { + "acc": 0.66998568, + "epoch": 2.08, + "learning_rate": 9.5874769564072e-05, + "loss": 1.07866659, + "memory(GiB)": 85.12, + "step": 2660, + "train_speed(iter/s)": 0.035239 + }, + { + "acc": 0.65154204, + "epoch": 2.09, + "learning_rate": 9.585326686088851e-05, + "loss": 1.14091015, + "memory(GiB)": 85.12, + "step": 2665, + "train_speed(iter/s)": 0.035238 + }, + { + "acc": 0.6603961, + "epoch": 2.09, + "learning_rate": 9.583171068614827e-05, + "loss": 1.12223263, + "memory(GiB)": 85.12, + "step": 2670, + "train_speed(iter/s)": 0.035237 + }, + { + "acc": 0.66768351, + "epoch": 2.1, + "learning_rate": 9.58101010649889e-05, + "loss": 1.11046982, + "memory(GiB)": 85.12, + "step": 2675, + "train_speed(iter/s)": 0.035238 + }, + { + "acc": 0.66048141, + "epoch": 2.1, + "learning_rate": 9.578843802261036e-05, + "loss": 1.1147171, + "memory(GiB)": 85.12, + "step": 2680, + "train_speed(iter/s)": 0.035239 + }, + { + "acc": 0.66307225, + "epoch": 2.1, + "learning_rate": 9.576672158427485e-05, + "loss": 1.14018593, + "memory(GiB)": 85.12, + "step": 2685, + "train_speed(iter/s)": 0.03524 + }, + { + "acc": 0.66853991, + "epoch": 2.11, + "learning_rate": 9.574495177530693e-05, + "loss": 1.12644806, + "memory(GiB)": 85.12, + "step": 2690, + "train_speed(iter/s)": 0.035242 + }, + { + "acc": 0.67141595, + "epoch": 2.11, + "learning_rate": 9.572312862109335e-05, + "loss": 1.10125408, + "memory(GiB)": 85.12, + "step": 2695, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.66562076, + "epoch": 2.12, + "learning_rate": 9.570125214708309e-05, + "loss": 1.11102467, + "memory(GiB)": 85.12, + "step": 2700, + "train_speed(iter/s)": 0.035241 + }, + { + "acc": 0.6675756, + "epoch": 2.12, + "learning_rate": 9.567932237878726e-05, + "loss": 1.10656528, + "memory(GiB)": 85.12, + "step": 2705, + "train_speed(iter/s)": 0.035242 + }, + { + "acc": 0.67014441, + "epoch": 2.12, + "learning_rate": 9.565733934177915e-05, + "loss": 1.07535477, + "memory(GiB)": 85.12, + "step": 2710, + "train_speed(iter/s)": 0.035241 + }, + { + "acc": 0.67194867, + "epoch": 2.13, + "learning_rate": 9.563530306169415e-05, + "loss": 1.06938372, + "memory(GiB)": 85.12, + "step": 2715, + "train_speed(iter/s)": 0.035242 + }, + { + "acc": 0.65955586, + "epoch": 2.13, + "learning_rate": 9.56132135642298e-05, + "loss": 1.14082947, + "memory(GiB)": 85.12, + "step": 2720, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.65607781, + "epoch": 2.14, + "learning_rate": 9.559107087514562e-05, + "loss": 1.12005463, + "memory(GiB)": 85.12, + "step": 2725, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.66828775, + "epoch": 2.14, + "learning_rate": 9.556887502026324e-05, + "loss": 1.10912933, + "memory(GiB)": 85.12, + "step": 2730, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.65105276, + "epoch": 2.14, + "learning_rate": 9.554662602546622e-05, + "loss": 1.1439889, + "memory(GiB)": 85.12, + "step": 2735, + "train_speed(iter/s)": 0.035242 + }, + { + "acc": 0.66353316, + "epoch": 2.15, + "learning_rate": 9.552432391670009e-05, + "loss": 1.11523571, + "memory(GiB)": 85.12, + "step": 2740, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.65910487, + "epoch": 2.15, + "learning_rate": 9.550196871997237e-05, + "loss": 1.10913839, + "memory(GiB)": 85.12, + "step": 2745, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.66159306, + "epoch": 2.16, + "learning_rate": 9.547956046135247e-05, + "loss": 1.15435734, + "memory(GiB)": 85.12, + "step": 2750, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.65181875, + "epoch": 2.16, + "learning_rate": 9.545709916697164e-05, + "loss": 1.16566019, + "memory(GiB)": 85.12, + "step": 2755, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.66318617, + "epoch": 2.16, + "learning_rate": 9.543458486302301e-05, + "loss": 1.10774937, + "memory(GiB)": 85.12, + "step": 2760, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.6740345, + "epoch": 2.17, + "learning_rate": 9.541201757576154e-05, + "loss": 1.1062582, + "memory(GiB)": 85.12, + "step": 2765, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.64558797, + "epoch": 2.17, + "learning_rate": 9.538939733150394e-05, + "loss": 1.18151665, + "memory(GiB)": 85.12, + "step": 2770, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.64788785, + "epoch": 2.17, + "learning_rate": 9.53667241566287e-05, + "loss": 1.15782328, + "memory(GiB)": 85.12, + "step": 2775, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.65736341, + "epoch": 2.18, + "learning_rate": 9.534399807757606e-05, + "loss": 1.14570007, + "memory(GiB)": 85.12, + "step": 2780, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.66911092, + "epoch": 2.18, + "learning_rate": 9.532121912084787e-05, + "loss": 1.09435015, + "memory(GiB)": 85.12, + "step": 2785, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.6581212, + "epoch": 2.19, + "learning_rate": 9.529838731300774e-05, + "loss": 1.15303545, + "memory(GiB)": 85.12, + "step": 2790, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.67710958, + "epoch": 2.19, + "learning_rate": 9.527550268068081e-05, + "loss": 1.0725668, + "memory(GiB)": 85.12, + "step": 2795, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.65953379, + "epoch": 2.19, + "learning_rate": 9.525256525055395e-05, + "loss": 1.10951128, + "memory(GiB)": 85.12, + "step": 2800, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.65992632, + "epoch": 2.2, + "learning_rate": 9.522957504937549e-05, + "loss": 1.12102213, + "memory(GiB)": 85.12, + "step": 2805, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.66131835, + "epoch": 2.2, + "learning_rate": 9.520653210395534e-05, + "loss": 1.08475084, + "memory(GiB)": 85.12, + "step": 2810, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.66965961, + "epoch": 2.21, + "learning_rate": 9.518343644116493e-05, + "loss": 1.08618603, + "memory(GiB)": 85.12, + "step": 2815, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.65925741, + "epoch": 2.21, + "learning_rate": 9.516028808793714e-05, + "loss": 1.1315218, + "memory(GiB)": 85.12, + "step": 2820, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.66214266, + "epoch": 2.21, + "learning_rate": 9.51370870712663e-05, + "loss": 1.1035429, + "memory(GiB)": 85.12, + "step": 2825, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.66194062, + "epoch": 2.22, + "learning_rate": 9.511383341820815e-05, + "loss": 1.1275753, + "memory(GiB)": 85.12, + "step": 2830, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.66372776, + "epoch": 2.22, + "learning_rate": 9.509052715587985e-05, + "loss": 1.10870571, + "memory(GiB)": 85.12, + "step": 2835, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.66669927, + "epoch": 2.23, + "learning_rate": 9.506716831145988e-05, + "loss": 1.14299908, + "memory(GiB)": 85.12, + "step": 2840, + "train_speed(iter/s)": 0.035242 + }, + { + "acc": 0.66881795, + "epoch": 2.23, + "learning_rate": 9.504375691218802e-05, + "loss": 1.07950726, + "memory(GiB)": 85.12, + "step": 2845, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.67085314, + "epoch": 2.23, + "learning_rate": 9.502029298536535e-05, + "loss": 1.07588711, + "memory(GiB)": 85.12, + "step": 2850, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.65394874, + "epoch": 2.24, + "learning_rate": 9.499677655835421e-05, + "loss": 1.15423851, + "memory(GiB)": 85.12, + "step": 2855, + "train_speed(iter/s)": 0.035242 + }, + { + "acc": 0.64597754, + "epoch": 2.24, + "learning_rate": 9.49732076585782e-05, + "loss": 1.15842009, + "memory(GiB)": 85.12, + "step": 2860, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.67262759, + "epoch": 2.25, + "learning_rate": 9.494958631352204e-05, + "loss": 1.11735392, + "memory(GiB)": 85.12, + "step": 2865, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.66508193, + "epoch": 2.25, + "learning_rate": 9.492591255073164e-05, + "loss": 1.09670143, + "memory(GiB)": 85.12, + "step": 2870, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.67350874, + "epoch": 2.25, + "learning_rate": 9.490218639781407e-05, + "loss": 1.0795311, + "memory(GiB)": 85.12, + "step": 2875, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.66628647, + "epoch": 2.26, + "learning_rate": 9.487840788243744e-05, + "loss": 1.12455454, + "memory(GiB)": 85.12, + "step": 2880, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.6715724, + "epoch": 2.26, + "learning_rate": 9.485457703233094e-05, + "loss": 1.11749763, + "memory(GiB)": 85.12, + "step": 2885, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.6443604, + "epoch": 2.26, + "learning_rate": 9.483069387528482e-05, + "loss": 1.18035517, + "memory(GiB)": 85.12, + "step": 2890, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.6723536, + "epoch": 2.27, + "learning_rate": 9.480675843915028e-05, + "loss": 1.08084173, + "memory(GiB)": 85.12, + "step": 2895, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.66256552, + "epoch": 2.27, + "learning_rate": 9.478277075183955e-05, + "loss": 1.08778219, + "memory(GiB)": 85.12, + "step": 2900, + "train_speed(iter/s)": 0.035241 + }, + { + "acc": 0.65755138, + "epoch": 2.28, + "learning_rate": 9.47587308413257e-05, + "loss": 1.14270163, + "memory(GiB)": 85.12, + "step": 2905, + "train_speed(iter/s)": 0.03524 + }, + { + "acc": 0.64939547, + "epoch": 2.28, + "learning_rate": 9.473463873564275e-05, + "loss": 1.1697154, + "memory(GiB)": 85.12, + "step": 2910, + "train_speed(iter/s)": 0.035239 + }, + { + "acc": 0.67200222, + "epoch": 2.28, + "learning_rate": 9.471049446288564e-05, + "loss": 1.12756548, + "memory(GiB)": 85.12, + "step": 2915, + "train_speed(iter/s)": 0.03524 + }, + { + "acc": 0.66369948, + "epoch": 2.29, + "learning_rate": 9.468629805121005e-05, + "loss": 1.134799, + "memory(GiB)": 85.12, + "step": 2920, + "train_speed(iter/s)": 0.03524 + }, + { + "acc": 0.65741105, + "epoch": 2.29, + "learning_rate": 9.466204952883252e-05, + "loss": 1.12181864, + "memory(GiB)": 85.12, + "step": 2925, + "train_speed(iter/s)": 0.035237 + }, + { + "acc": 0.67293525, + "epoch": 2.3, + "learning_rate": 9.463774892403033e-05, + "loss": 1.08120308, + "memory(GiB)": 85.12, + "step": 2930, + "train_speed(iter/s)": 0.035235 + }, + { + "acc": 0.65945473, + "epoch": 2.3, + "learning_rate": 9.461339626514153e-05, + "loss": 1.12144451, + "memory(GiB)": 85.12, + "step": 2935, + "train_speed(iter/s)": 0.035236 + }, + { + "acc": 0.65807548, + "epoch": 2.3, + "learning_rate": 9.458899158056482e-05, + "loss": 1.1355731, + "memory(GiB)": 85.12, + "step": 2940, + "train_speed(iter/s)": 0.035237 + }, + { + "acc": 0.66247325, + "epoch": 2.31, + "learning_rate": 9.456453489875963e-05, + "loss": 1.11652193, + "memory(GiB)": 85.12, + "step": 2945, + "train_speed(iter/s)": 0.035238 + }, + { + "acc": 0.67103438, + "epoch": 2.31, + "learning_rate": 9.454002624824598e-05, + "loss": 1.10103321, + "memory(GiB)": 85.12, + "step": 2950, + "train_speed(iter/s)": 0.035239 + }, + { + "acc": 0.6699152, + "epoch": 2.32, + "learning_rate": 9.451546565760452e-05, + "loss": 1.07716627, + "memory(GiB)": 85.12, + "step": 2955, + "train_speed(iter/s)": 0.03524 + }, + { + "acc": 0.66332569, + "epoch": 2.32, + "learning_rate": 9.449085315547645e-05, + "loss": 1.1355279, + "memory(GiB)": 85.12, + "step": 2960, + "train_speed(iter/s)": 0.035242 + }, + { + "acc": 0.6688993, + "epoch": 2.32, + "learning_rate": 9.446618877056353e-05, + "loss": 1.08996553, + "memory(GiB)": 85.12, + "step": 2965, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.64563627, + "epoch": 2.33, + "learning_rate": 9.444147253162799e-05, + "loss": 1.18632555, + "memory(GiB)": 85.12, + "step": 2970, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.66807208, + "epoch": 2.33, + "learning_rate": 9.441670446749253e-05, + "loss": 1.09000006, + "memory(GiB)": 85.12, + "step": 2975, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.67623868, + "epoch": 2.34, + "learning_rate": 9.439188460704035e-05, + "loss": 1.06963615, + "memory(GiB)": 85.12, + "step": 2980, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67707462, + "epoch": 2.34, + "learning_rate": 9.436701297921499e-05, + "loss": 1.06638432, + "memory(GiB)": 85.12, + "step": 2985, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.64844503, + "epoch": 2.34, + "learning_rate": 9.434208961302037e-05, + "loss": 1.15902214, + "memory(GiB)": 85.12, + "step": 2990, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.66300759, + "epoch": 2.35, + "learning_rate": 9.431711453752074e-05, + "loss": 1.11802444, + "memory(GiB)": 85.12, + "step": 2995, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.66209593, + "epoch": 2.35, + "learning_rate": 9.429208778184066e-05, + "loss": 1.12048893, + "memory(GiB)": 85.12, + "step": 3000, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.65673513, + "epoch": 2.36, + "learning_rate": 9.426700937516498e-05, + "loss": 1.12821989, + "memory(GiB)": 85.12, + "step": 3005, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67200632, + "epoch": 2.36, + "learning_rate": 9.424187934673872e-05, + "loss": 1.08947983, + "memory(GiB)": 85.12, + "step": 3010, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.66114106, + "epoch": 2.36, + "learning_rate": 9.421669772586716e-05, + "loss": 1.1081459, + "memory(GiB)": 85.12, + "step": 3015, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.6648818, + "epoch": 2.37, + "learning_rate": 9.419146454191572e-05, + "loss": 1.09442472, + "memory(GiB)": 85.12, + "step": 3020, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.65600429, + "epoch": 2.37, + "learning_rate": 9.416617982430994e-05, + "loss": 1.11577091, + "memory(GiB)": 85.12, + "step": 3025, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.66453032, + "epoch": 2.37, + "learning_rate": 9.414084360253547e-05, + "loss": 1.11425781, + "memory(GiB)": 85.12, + "step": 3030, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.65060616, + "epoch": 2.38, + "learning_rate": 9.411545590613803e-05, + "loss": 1.13694382, + "memory(GiB)": 85.12, + "step": 3035, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.65530295, + "epoch": 2.38, + "learning_rate": 9.409001676472335e-05, + "loss": 1.16810818, + "memory(GiB)": 85.12, + "step": 3040, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.66546168, + "epoch": 2.39, + "learning_rate": 9.406452620795714e-05, + "loss": 1.11694679, + "memory(GiB)": 85.12, + "step": 3045, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.673035, + "epoch": 2.39, + "learning_rate": 9.40389842655651e-05, + "loss": 1.067171, + "memory(GiB)": 85.12, + "step": 3050, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.6619174, + "epoch": 2.39, + "learning_rate": 9.401339096733283e-05, + "loss": 1.14351206, + "memory(GiB)": 85.12, + "step": 3055, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.66858106, + "epoch": 2.4, + "learning_rate": 9.398774634310583e-05, + "loss": 1.10405941, + "memory(GiB)": 85.12, + "step": 3060, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.66761208, + "epoch": 2.4, + "learning_rate": 9.396205042278946e-05, + "loss": 1.09991446, + "memory(GiB)": 85.12, + "step": 3065, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66788816, + "epoch": 2.41, + "learning_rate": 9.393630323634888e-05, + "loss": 1.10259113, + "memory(GiB)": 85.12, + "step": 3070, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66071186, + "epoch": 2.41, + "learning_rate": 9.391050481380903e-05, + "loss": 1.13652515, + "memory(GiB)": 85.12, + "step": 3075, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66306629, + "epoch": 2.41, + "learning_rate": 9.388465518525464e-05, + "loss": 1.1402585, + "memory(GiB)": 85.12, + "step": 3080, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66560416, + "epoch": 2.42, + "learning_rate": 9.385875438083008e-05, + "loss": 1.12412586, + "memory(GiB)": 85.12, + "step": 3085, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.65981436, + "epoch": 2.42, + "learning_rate": 9.383280243073948e-05, + "loss": 1.12645855, + "memory(GiB)": 85.12, + "step": 3090, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66078367, + "epoch": 2.43, + "learning_rate": 9.380679936524656e-05, + "loss": 1.13504505, + "memory(GiB)": 85.12, + "step": 3095, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66190977, + "epoch": 2.43, + "learning_rate": 9.378074521467469e-05, + "loss": 1.1244626, + "memory(GiB)": 85.12, + "step": 3100, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.65778189, + "epoch": 2.43, + "learning_rate": 9.375464000940676e-05, + "loss": 1.16129856, + "memory(GiB)": 85.12, + "step": 3105, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66200366, + "epoch": 2.44, + "learning_rate": 9.37284837798852e-05, + "loss": 1.12543259, + "memory(GiB)": 85.12, + "step": 3110, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66628103, + "epoch": 2.44, + "learning_rate": 9.370227655661203e-05, + "loss": 1.11405783, + "memory(GiB)": 85.12, + "step": 3115, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.66346598, + "epoch": 2.45, + "learning_rate": 9.367601837014864e-05, + "loss": 1.11039734, + "memory(GiB)": 85.12, + "step": 3120, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.66284456, + "epoch": 2.45, + "learning_rate": 9.364970925111587e-05, + "loss": 1.14555759, + "memory(GiB)": 85.12, + "step": 3125, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.65998135, + "epoch": 2.45, + "learning_rate": 9.362334923019397e-05, + "loss": 1.13087349, + "memory(GiB)": 85.12, + "step": 3130, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.66171045, + "epoch": 2.46, + "learning_rate": 9.359693833812255e-05, + "loss": 1.13765631, + "memory(GiB)": 85.12, + "step": 3135, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.66532288, + "epoch": 2.46, + "learning_rate": 9.357047660570056e-05, + "loss": 1.11209698, + "memory(GiB)": 85.12, + "step": 3140, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.67781734, + "epoch": 2.46, + "learning_rate": 9.354396406378618e-05, + "loss": 1.05338039, + "memory(GiB)": 85.12, + "step": 3145, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.66491776, + "epoch": 2.47, + "learning_rate": 9.35174007432969e-05, + "loss": 1.1117816, + "memory(GiB)": 85.12, + "step": 3150, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.65880842, + "epoch": 2.47, + "learning_rate": 9.34907866752094e-05, + "loss": 1.11286173, + "memory(GiB)": 85.12, + "step": 3155, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.66765223, + "epoch": 2.48, + "learning_rate": 9.346412189055955e-05, + "loss": 1.10158033, + "memory(GiB)": 85.12, + "step": 3160, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.65851316, + "epoch": 2.48, + "learning_rate": 9.343740642044232e-05, + "loss": 1.1240366, + "memory(GiB)": 85.12, + "step": 3165, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.66850886, + "epoch": 2.48, + "learning_rate": 9.341064029601188e-05, + "loss": 1.09285021, + "memory(GiB)": 85.12, + "step": 3170, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67183051, + "epoch": 2.49, + "learning_rate": 9.338382354848135e-05, + "loss": 1.08079424, + "memory(GiB)": 85.12, + "step": 3175, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.67254267, + "epoch": 2.49, + "learning_rate": 9.335695620912298e-05, + "loss": 1.10049, + "memory(GiB)": 85.12, + "step": 3180, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67040033, + "epoch": 2.5, + "learning_rate": 9.333003830926799e-05, + "loss": 1.09397783, + "memory(GiB)": 85.12, + "step": 3185, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.67311201, + "epoch": 2.5, + "learning_rate": 9.330306988030651e-05, + "loss": 1.07896299, + "memory(GiB)": 85.12, + "step": 3190, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.67690806, + "epoch": 2.5, + "learning_rate": 9.327605095368769e-05, + "loss": 1.10143909, + "memory(GiB)": 85.12, + "step": 3195, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.66498342, + "epoch": 2.51, + "learning_rate": 9.324898156091948e-05, + "loss": 1.11493244, + "memory(GiB)": 85.12, + "step": 3200, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.67564311, + "epoch": 2.51, + "learning_rate": 9.322186173356873e-05, + "loss": 1.08120804, + "memory(GiB)": 85.12, + "step": 3205, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.66903639, + "epoch": 2.52, + "learning_rate": 9.31946915032611e-05, + "loss": 1.08918238, + "memory(GiB)": 85.12, + "step": 3210, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.67794003, + "epoch": 2.52, + "learning_rate": 9.316747090168101e-05, + "loss": 1.07291193, + "memory(GiB)": 85.12, + "step": 3215, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67113781, + "epoch": 2.52, + "learning_rate": 9.314019996057161e-05, + "loss": 1.08330698, + "memory(GiB)": 85.12, + "step": 3220, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.66346536, + "epoch": 2.53, + "learning_rate": 9.31128787117348e-05, + "loss": 1.13062449, + "memory(GiB)": 85.12, + "step": 3225, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.66941795, + "epoch": 2.53, + "learning_rate": 9.308550718703111e-05, + "loss": 1.11006641, + "memory(GiB)": 85.12, + "step": 3230, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.65443735, + "epoch": 2.54, + "learning_rate": 9.305808541837969e-05, + "loss": 1.12953062, + "memory(GiB)": 85.12, + "step": 3235, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.66207342, + "epoch": 2.54, + "learning_rate": 9.30306134377583e-05, + "loss": 1.10449133, + "memory(GiB)": 85.12, + "step": 3240, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.67379289, + "epoch": 2.54, + "learning_rate": 9.300309127720326e-05, + "loss": 1.07863102, + "memory(GiB)": 85.12, + "step": 3245, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.66185699, + "epoch": 2.55, + "learning_rate": 9.297551896880938e-05, + "loss": 1.10122509, + "memory(GiB)": 85.12, + "step": 3250, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.66234708, + "epoch": 2.55, + "learning_rate": 9.294789654473002e-05, + "loss": 1.14223385, + "memory(GiB)": 85.12, + "step": 3255, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.65402446, + "epoch": 2.55, + "learning_rate": 9.292022403717688e-05, + "loss": 1.13783741, + "memory(GiB)": 85.12, + "step": 3260, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.65600824, + "epoch": 2.56, + "learning_rate": 9.289250147842014e-05, + "loss": 1.15849085, + "memory(GiB)": 85.12, + "step": 3265, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.66205072, + "epoch": 2.56, + "learning_rate": 9.286472890078832e-05, + "loss": 1.1514534, + "memory(GiB)": 85.12, + "step": 3270, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.6654716, + "epoch": 2.57, + "learning_rate": 9.283690633666826e-05, + "loss": 1.11500664, + "memory(GiB)": 85.12, + "step": 3275, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.65892515, + "epoch": 2.57, + "learning_rate": 9.280903381850511e-05, + "loss": 1.13781528, + "memory(GiB)": 85.12, + "step": 3280, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.66229644, + "epoch": 2.57, + "learning_rate": 9.278111137880228e-05, + "loss": 1.11094999, + "memory(GiB)": 85.12, + "step": 3285, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67452283, + "epoch": 2.58, + "learning_rate": 9.275313905012135e-05, + "loss": 1.0776885, + "memory(GiB)": 85.12, + "step": 3290, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.66590748, + "epoch": 2.58, + "learning_rate": 9.272511686508215e-05, + "loss": 1.08773432, + "memory(GiB)": 85.12, + "step": 3295, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67609005, + "epoch": 2.59, + "learning_rate": 9.269704485636259e-05, + "loss": 1.06893425, + "memory(GiB)": 85.12, + "step": 3300, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.66203904, + "epoch": 2.59, + "learning_rate": 9.26689230566987e-05, + "loss": 1.13459358, + "memory(GiB)": 85.12, + "step": 3305, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.64897304, + "epoch": 2.59, + "learning_rate": 9.264075149888459e-05, + "loss": 1.15744686, + "memory(GiB)": 85.12, + "step": 3310, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.66094275, + "epoch": 2.6, + "learning_rate": 9.261253021577236e-05, + "loss": 1.15568581, + "memory(GiB)": 85.12, + "step": 3315, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.66136718, + "epoch": 2.6, + "learning_rate": 9.258425924027212e-05, + "loss": 1.10611639, + "memory(GiB)": 85.12, + "step": 3320, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.66478972, + "epoch": 2.61, + "learning_rate": 9.255593860535194e-05, + "loss": 1.10232067, + "memory(GiB)": 85.12, + "step": 3325, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.65793505, + "epoch": 2.61, + "learning_rate": 9.252756834403778e-05, + "loss": 1.14366322, + "memory(GiB)": 85.12, + "step": 3330, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.66237917, + "epoch": 2.61, + "learning_rate": 9.249914848941348e-05, + "loss": 1.12491646, + "memory(GiB)": 85.12, + "step": 3335, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.66611323, + "epoch": 2.62, + "learning_rate": 9.24706790746207e-05, + "loss": 1.09040012, + "memory(GiB)": 85.12, + "step": 3340, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.66260924, + "epoch": 2.62, + "learning_rate": 9.244216013285894e-05, + "loss": 1.10959892, + "memory(GiB)": 85.12, + "step": 3345, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.65978012, + "epoch": 2.63, + "learning_rate": 9.241359169738537e-05, + "loss": 1.11563673, + "memory(GiB)": 85.12, + "step": 3350, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.66533618, + "epoch": 2.63, + "learning_rate": 9.238497380151495e-05, + "loss": 1.10536203, + "memory(GiB)": 85.12, + "step": 3355, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.65439553, + "epoch": 2.63, + "learning_rate": 9.235630647862031e-05, + "loss": 1.1373312, + "memory(GiB)": 85.12, + "step": 3360, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.66412206, + "epoch": 2.64, + "learning_rate": 9.232758976213167e-05, + "loss": 1.09817734, + "memory(GiB)": 85.12, + "step": 3365, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.66531549, + "epoch": 2.64, + "learning_rate": 9.229882368553692e-05, + "loss": 1.10946426, + "memory(GiB)": 85.12, + "step": 3370, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.66333899, + "epoch": 2.64, + "learning_rate": 9.227000828238146e-05, + "loss": 1.0850071, + "memory(GiB)": 85.12, + "step": 3375, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.66884055, + "epoch": 2.65, + "learning_rate": 9.224114358626823e-05, + "loss": 1.09241247, + "memory(GiB)": 85.12, + "step": 3380, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.6862587, + "epoch": 2.65, + "learning_rate": 9.221222963085765e-05, + "loss": 1.06248035, + "memory(GiB)": 85.12, + "step": 3385, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.66075082, + "epoch": 2.66, + "learning_rate": 9.218326644986758e-05, + "loss": 1.1324152, + "memory(GiB)": 85.12, + "step": 3390, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67370081, + "epoch": 2.66, + "learning_rate": 9.215425407707329e-05, + "loss": 1.08881779, + "memory(GiB)": 85.12, + "step": 3395, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.65856786, + "epoch": 2.66, + "learning_rate": 9.212519254630742e-05, + "loss": 1.12874718, + "memory(GiB)": 85.12, + "step": 3400, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.65734329, + "epoch": 2.67, + "learning_rate": 9.20960818914599e-05, + "loss": 1.15164032, + "memory(GiB)": 85.12, + "step": 3405, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67054696, + "epoch": 2.67, + "learning_rate": 9.206692214647803e-05, + "loss": 1.10470772, + "memory(GiB)": 85.12, + "step": 3410, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.67578359, + "epoch": 2.68, + "learning_rate": 9.203771334536626e-05, + "loss": 1.08748617, + "memory(GiB)": 85.12, + "step": 3415, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.66613045, + "epoch": 2.68, + "learning_rate": 9.200845552218626e-05, + "loss": 1.12501793, + "memory(GiB)": 85.12, + "step": 3420, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.66724539, + "epoch": 2.68, + "learning_rate": 9.197914871105696e-05, + "loss": 1.11535177, + "memory(GiB)": 85.12, + "step": 3425, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.66614714, + "epoch": 2.69, + "learning_rate": 9.194979294615432e-05, + "loss": 1.11667767, + "memory(GiB)": 85.12, + "step": 3430, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67240911, + "epoch": 2.69, + "learning_rate": 9.192038826171138e-05, + "loss": 1.10492306, + "memory(GiB)": 85.12, + "step": 3435, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.67085233, + "epoch": 2.7, + "learning_rate": 9.189093469201833e-05, + "loss": 1.11214399, + "memory(GiB)": 85.12, + "step": 3440, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.66316462, + "epoch": 2.7, + "learning_rate": 9.186143227142225e-05, + "loss": 1.11524057, + "memory(GiB)": 85.12, + "step": 3445, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.67732773, + "epoch": 2.7, + "learning_rate": 9.183188103432729e-05, + "loss": 1.06655407, + "memory(GiB)": 85.12, + "step": 3450, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.65240765, + "epoch": 2.71, + "learning_rate": 9.180228101519443e-05, + "loss": 1.15858974, + "memory(GiB)": 85.12, + "step": 3455, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.66337948, + "epoch": 2.71, + "learning_rate": 9.17726322485416e-05, + "loss": 1.12186775, + "memory(GiB)": 85.12, + "step": 3460, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.67122927, + "epoch": 2.72, + "learning_rate": 9.174293476894356e-05, + "loss": 1.08822432, + "memory(GiB)": 85.12, + "step": 3465, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.65668936, + "epoch": 2.72, + "learning_rate": 9.171318861103188e-05, + "loss": 1.14420862, + "memory(GiB)": 85.12, + "step": 3470, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.65638585, + "epoch": 2.72, + "learning_rate": 9.16833938094949e-05, + "loss": 1.13455896, + "memory(GiB)": 85.12, + "step": 3475, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.68326602, + "epoch": 2.73, + "learning_rate": 9.165355039907766e-05, + "loss": 1.05959072, + "memory(GiB)": 85.12, + "step": 3480, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.64972272, + "epoch": 2.73, + "learning_rate": 9.162365841458192e-05, + "loss": 1.13724003, + "memory(GiB)": 85.12, + "step": 3485, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.66329112, + "epoch": 2.74, + "learning_rate": 9.159371789086606e-05, + "loss": 1.10761976, + "memory(GiB)": 85.12, + "step": 3490, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.65560665, + "epoch": 2.74, + "learning_rate": 9.156372886284507e-05, + "loss": 1.14408922, + "memory(GiB)": 85.12, + "step": 3495, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.66591907, + "epoch": 2.74, + "learning_rate": 9.15336913654905e-05, + "loss": 1.1079567, + "memory(GiB)": 85.12, + "step": 3500, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.65924816, + "epoch": 2.75, + "learning_rate": 9.150360543383042e-05, + "loss": 1.12748995, + "memory(GiB)": 85.12, + "step": 3505, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.66098423, + "epoch": 2.75, + "learning_rate": 9.147347110294941e-05, + "loss": 1.13659554, + "memory(GiB)": 85.12, + "step": 3510, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.66335301, + "epoch": 2.75, + "learning_rate": 9.144328840798848e-05, + "loss": 1.09939146, + "memory(GiB)": 85.12, + "step": 3515, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.65486746, + "epoch": 2.76, + "learning_rate": 9.141305738414499e-05, + "loss": 1.14847898, + "memory(GiB)": 85.12, + "step": 3520, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.65917635, + "epoch": 2.76, + "learning_rate": 9.138277806667271e-05, + "loss": 1.14824829, + "memory(GiB)": 85.12, + "step": 3525, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.67288303, + "epoch": 2.77, + "learning_rate": 9.135245049088173e-05, + "loss": 1.05631142, + "memory(GiB)": 85.12, + "step": 3530, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.67359824, + "epoch": 2.77, + "learning_rate": 9.132207469213836e-05, + "loss": 1.06311998, + "memory(GiB)": 85.12, + "step": 3535, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.6569242, + "epoch": 2.77, + "learning_rate": 9.129165070586523e-05, + "loss": 1.12648764, + "memory(GiB)": 85.12, + "step": 3540, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.67112989, + "epoch": 2.78, + "learning_rate": 9.12611785675411e-05, + "loss": 1.10996456, + "memory(GiB)": 85.12, + "step": 3545, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.66401305, + "epoch": 2.78, + "learning_rate": 9.123065831270091e-05, + "loss": 1.11186533, + "memory(GiB)": 85.12, + "step": 3550, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.6686008, + "epoch": 2.79, + "learning_rate": 9.120008997693569e-05, + "loss": 1.08463211, + "memory(GiB)": 85.12, + "step": 3555, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.66068683, + "epoch": 2.79, + "learning_rate": 9.116947359589255e-05, + "loss": 1.12286921, + "memory(GiB)": 85.12, + "step": 3560, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.67136216, + "epoch": 2.79, + "learning_rate": 9.113880920527463e-05, + "loss": 1.10150156, + "memory(GiB)": 85.12, + "step": 3565, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.6669219, + "epoch": 2.8, + "learning_rate": 9.110809684084107e-05, + "loss": 1.11419725, + "memory(GiB)": 85.12, + "step": 3570, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.65082202, + "epoch": 2.8, + "learning_rate": 9.107733653840692e-05, + "loss": 1.17740231, + "memory(GiB)": 85.12, + "step": 3575, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.66411786, + "epoch": 2.81, + "learning_rate": 9.104652833384317e-05, + "loss": 1.10019693, + "memory(GiB)": 85.12, + "step": 3580, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.67525086, + "epoch": 2.81, + "learning_rate": 9.101567226307664e-05, + "loss": 1.06568289, + "memory(GiB)": 85.12, + "step": 3585, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.66854186, + "epoch": 2.81, + "learning_rate": 9.098476836208997e-05, + "loss": 1.10631943, + "memory(GiB)": 85.12, + "step": 3590, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.66509161, + "epoch": 2.82, + "learning_rate": 9.095381666692164e-05, + "loss": 1.10477247, + "memory(GiB)": 85.12, + "step": 3595, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.66377773, + "epoch": 2.82, + "learning_rate": 9.092281721366575e-05, + "loss": 1.09822521, + "memory(GiB)": 85.12, + "step": 3600, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.66034365, + "epoch": 2.83, + "learning_rate": 9.089177003847218e-05, + "loss": 1.15647116, + "memory(GiB)": 85.12, + "step": 3605, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.67322073, + "epoch": 2.83, + "learning_rate": 9.086067517754646e-05, + "loss": 1.09445295, + "memory(GiB)": 85.12, + "step": 3610, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.6580318, + "epoch": 2.83, + "learning_rate": 9.082953266714968e-05, + "loss": 1.14048214, + "memory(GiB)": 85.12, + "step": 3615, + "train_speed(iter/s)": 0.035273 + }, + { + "acc": 0.66556892, + "epoch": 2.84, + "learning_rate": 9.079834254359854e-05, + "loss": 1.10402184, + "memory(GiB)": 85.12, + "step": 3620, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.67437925, + "epoch": 2.84, + "learning_rate": 9.076710484326522e-05, + "loss": 1.09448032, + "memory(GiB)": 85.12, + "step": 3625, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.66442852, + "epoch": 2.84, + "learning_rate": 9.073581960257742e-05, + "loss": 1.14604816, + "memory(GiB)": 85.12, + "step": 3630, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.66115713, + "epoch": 2.85, + "learning_rate": 9.070448685801829e-05, + "loss": 1.07533693, + "memory(GiB)": 85.12, + "step": 3635, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.67220783, + "epoch": 2.85, + "learning_rate": 9.067310664612631e-05, + "loss": 1.11974783, + "memory(GiB)": 85.12, + "step": 3640, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.67338705, + "epoch": 2.86, + "learning_rate": 9.06416790034954e-05, + "loss": 1.11144781, + "memory(GiB)": 85.12, + "step": 3645, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.66914973, + "epoch": 2.86, + "learning_rate": 9.06102039667747e-05, + "loss": 1.08288326, + "memory(GiB)": 85.12, + "step": 3650, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.66724291, + "epoch": 2.86, + "learning_rate": 9.057868157266873e-05, + "loss": 1.09231892, + "memory(GiB)": 85.12, + "step": 3655, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.66573162, + "epoch": 2.87, + "learning_rate": 9.054711185793712e-05, + "loss": 1.13242044, + "memory(GiB)": 85.12, + "step": 3660, + "train_speed(iter/s)": 0.035279 + }, + { + "acc": 0.66399899, + "epoch": 2.87, + "learning_rate": 9.051549485939472e-05, + "loss": 1.08897943, + "memory(GiB)": 85.12, + "step": 3665, + "train_speed(iter/s)": 0.03528 + }, + { + "acc": 0.65977526, + "epoch": 2.88, + "learning_rate": 9.048383061391159e-05, + "loss": 1.12547369, + "memory(GiB)": 85.12, + "step": 3670, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.65447931, + "epoch": 2.88, + "learning_rate": 9.045211915841279e-05, + "loss": 1.16677542, + "memory(GiB)": 85.12, + "step": 3675, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.67030063, + "epoch": 2.88, + "learning_rate": 9.04203605298785e-05, + "loss": 1.12552681, + "memory(GiB)": 85.12, + "step": 3680, + "train_speed(iter/s)": 0.035282 + }, + { + "acc": 0.6612977, + "epoch": 2.89, + "learning_rate": 9.038855476534385e-05, + "loss": 1.1319479, + "memory(GiB)": 85.12, + "step": 3685, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.6624711, + "epoch": 2.89, + "learning_rate": 9.035670190189902e-05, + "loss": 1.09792414, + "memory(GiB)": 85.12, + "step": 3690, + "train_speed(iter/s)": 0.03528 + }, + { + "acc": 0.67158294, + "epoch": 2.9, + "learning_rate": 9.032480197668902e-05, + "loss": 1.10716963, + "memory(GiB)": 85.12, + "step": 3695, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.66656246, + "epoch": 2.9, + "learning_rate": 9.029285502691382e-05, + "loss": 1.1186574, + "memory(GiB)": 85.12, + "step": 3700, + "train_speed(iter/s)": 0.035282 + }, + { + "acc": 0.66599183, + "epoch": 2.9, + "learning_rate": 9.026086108982819e-05, + "loss": 1.09721441, + "memory(GiB)": 85.12, + "step": 3705, + "train_speed(iter/s)": 0.035283 + }, + { + "acc": 0.67279172, + "epoch": 2.91, + "learning_rate": 9.022882020274167e-05, + "loss": 1.09024601, + "memory(GiB)": 85.12, + "step": 3710, + "train_speed(iter/s)": 0.035283 + }, + { + "acc": 0.65694847, + "epoch": 2.91, + "learning_rate": 9.019673240301862e-05, + "loss": 1.12552786, + "memory(GiB)": 85.12, + "step": 3715, + "train_speed(iter/s)": 0.035282 + }, + { + "acc": 0.67931943, + "epoch": 2.92, + "learning_rate": 9.016459772807804e-05, + "loss": 1.08509073, + "memory(GiB)": 85.12, + "step": 3720, + "train_speed(iter/s)": 0.035283 + }, + { + "acc": 0.67091622, + "epoch": 2.92, + "learning_rate": 9.013241621539364e-05, + "loss": 1.08755741, + "memory(GiB)": 85.12, + "step": 3725, + "train_speed(iter/s)": 0.035283 + }, + { + "acc": 0.66849947, + "epoch": 2.92, + "learning_rate": 9.01001879024937e-05, + "loss": 1.11163702, + "memory(GiB)": 85.12, + "step": 3730, + "train_speed(iter/s)": 0.035284 + }, + { + "acc": 0.66142335, + "epoch": 2.93, + "learning_rate": 9.006791282696113e-05, + "loss": 1.11982279, + "memory(GiB)": 85.12, + "step": 3735, + "train_speed(iter/s)": 0.035283 + }, + { + "acc": 0.66708808, + "epoch": 2.93, + "learning_rate": 9.003559102643335e-05, + "loss": 1.12939367, + "memory(GiB)": 85.12, + "step": 3740, + "train_speed(iter/s)": 0.035284 + }, + { + "acc": 0.65388312, + "epoch": 2.93, + "learning_rate": 9.000322253860225e-05, + "loss": 1.13458776, + "memory(GiB)": 85.12, + "step": 3745, + "train_speed(iter/s)": 0.035282 + }, + { + "acc": 0.66235805, + "epoch": 2.94, + "learning_rate": 8.997080740121417e-05, + "loss": 1.12270813, + "memory(GiB)": 85.12, + "step": 3750, + "train_speed(iter/s)": 0.035283 + }, + { + "acc": 0.67565556, + "epoch": 2.94, + "learning_rate": 8.993834565206989e-05, + "loss": 1.08064222, + "memory(GiB)": 85.12, + "step": 3755, + "train_speed(iter/s)": 0.035284 + }, + { + "acc": 0.67491579, + "epoch": 2.95, + "learning_rate": 8.99058373290245e-05, + "loss": 1.07194624, + "memory(GiB)": 85.12, + "step": 3760, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.66467142, + "epoch": 2.95, + "learning_rate": 8.987328246998742e-05, + "loss": 1.1253994, + "memory(GiB)": 85.12, + "step": 3765, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.65193849, + "epoch": 2.95, + "learning_rate": 8.984068111292232e-05, + "loss": 1.16464453, + "memory(GiB)": 85.12, + "step": 3770, + "train_speed(iter/s)": 0.035281 + }, + { + "acc": 0.66975975, + "epoch": 2.96, + "learning_rate": 8.980803329584712e-05, + "loss": 1.07693071, + "memory(GiB)": 85.12, + "step": 3775, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.65947042, + "epoch": 2.96, + "learning_rate": 8.977533905683393e-05, + "loss": 1.12480698, + "memory(GiB)": 85.12, + "step": 3780, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.65800185, + "epoch": 2.97, + "learning_rate": 8.974259843400894e-05, + "loss": 1.1366251, + "memory(GiB)": 85.12, + "step": 3785, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.67165961, + "epoch": 2.97, + "learning_rate": 8.970981146555247e-05, + "loss": 1.10998592, + "memory(GiB)": 85.12, + "step": 3790, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.67054834, + "epoch": 2.97, + "learning_rate": 8.967697818969889e-05, + "loss": 1.08357964, + "memory(GiB)": 85.12, + "step": 3795, + "train_speed(iter/s)": 0.035273 + }, + { + "acc": 0.65625038, + "epoch": 2.98, + "learning_rate": 8.964409864473656e-05, + "loss": 1.13358316, + "memory(GiB)": 85.12, + "step": 3800, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.66279435, + "epoch": 2.98, + "learning_rate": 8.961117286900777e-05, + "loss": 1.10367622, + "memory(GiB)": 85.12, + "step": 3805, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.65385418, + "epoch": 2.99, + "learning_rate": 8.957820090090877e-05, + "loss": 1.14131985, + "memory(GiB)": 85.12, + "step": 3810, + "train_speed(iter/s)": 0.035273 + }, + { + "acc": 0.6783905, + "epoch": 2.99, + "learning_rate": 8.954518277888966e-05, + "loss": 1.06005888, + "memory(GiB)": 85.12, + "step": 3815, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.66349859, + "epoch": 2.99, + "learning_rate": 8.951211854145434e-05, + "loss": 1.11471567, + "memory(GiB)": 85.12, + "step": 3820, + "train_speed(iter/s)": 0.035273 + }, + { + "acc": 0.65545945, + "epoch": 3.0, + "learning_rate": 8.947900822716053e-05, + "loss": 1.18849239, + "memory(GiB)": 85.12, + "step": 3825, + "train_speed(iter/s)": 0.035274 + }, + { + "epoch": 3.0, + "eval_acc": 0.6859504132231405, + "eval_loss": 1.0273702144622803, + "eval_runtime": 85.0825, + "eval_samples_per_second": 1.093, + "eval_steps_per_second": 1.093, + "step": 3828 + }, + { + "acc": 0.66745429, + "epoch": 3.0, + "learning_rate": 8.944585187461963e-05, + "loss": 1.09518471, + "memory(GiB)": 85.12, + "step": 3830, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.6809526, + "epoch": 3.01, + "learning_rate": 8.941264952249674e-05, + "loss": 1.05991888, + "memory(GiB)": 85.12, + "step": 3835, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67283616, + "epoch": 3.01, + "learning_rate": 8.937940120951068e-05, + "loss": 1.10407562, + "memory(GiB)": 85.12, + "step": 3840, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.66963844, + "epoch": 3.01, + "learning_rate": 8.934610697443375e-05, + "loss": 1.08318148, + "memory(GiB)": 85.12, + "step": 3845, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.67286458, + "epoch": 3.02, + "learning_rate": 8.931276685609187e-05, + "loss": 1.09335747, + "memory(GiB)": 85.12, + "step": 3850, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.68292265, + "epoch": 3.02, + "learning_rate": 8.927938089336444e-05, + "loss": 1.03646545, + "memory(GiB)": 85.12, + "step": 3855, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67601447, + "epoch": 3.03, + "learning_rate": 8.924594912518435e-05, + "loss": 1.03905144, + "memory(GiB)": 85.12, + "step": 3860, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.68262358, + "epoch": 3.03, + "learning_rate": 8.921247159053787e-05, + "loss": 1.04506426, + "memory(GiB)": 85.12, + "step": 3865, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67279882, + "epoch": 3.03, + "learning_rate": 8.917894832846467e-05, + "loss": 1.07862301, + "memory(GiB)": 85.12, + "step": 3870, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.65542626, + "epoch": 3.04, + "learning_rate": 8.914537937805776e-05, + "loss": 1.15798473, + "memory(GiB)": 85.12, + "step": 3875, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.68080411, + "epoch": 3.04, + "learning_rate": 8.911176477846334e-05, + "loss": 1.03082008, + "memory(GiB)": 85.12, + "step": 3880, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.66806016, + "epoch": 3.04, + "learning_rate": 8.907810456888097e-05, + "loss": 1.07673035, + "memory(GiB)": 85.12, + "step": 3885, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.67421083, + "epoch": 3.05, + "learning_rate": 8.90443987885633e-05, + "loss": 1.11088667, + "memory(GiB)": 85.12, + "step": 3890, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.65671282, + "epoch": 3.05, + "learning_rate": 8.901064747681616e-05, + "loss": 1.14146271, + "memory(GiB)": 85.12, + "step": 3895, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.67231884, + "epoch": 3.06, + "learning_rate": 8.897685067299846e-05, + "loss": 1.08252373, + "memory(GiB)": 85.12, + "step": 3900, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.677108, + "epoch": 3.06, + "learning_rate": 8.89430084165222e-05, + "loss": 1.05940466, + "memory(GiB)": 85.12, + "step": 3905, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.68133845, + "epoch": 3.06, + "learning_rate": 8.890912074685236e-05, + "loss": 1.02218704, + "memory(GiB)": 85.12, + "step": 3910, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67558608, + "epoch": 3.07, + "learning_rate": 8.887518770350685e-05, + "loss": 1.06314049, + "memory(GiB)": 85.12, + "step": 3915, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.66001134, + "epoch": 3.07, + "learning_rate": 8.884120932605653e-05, + "loss": 1.12467155, + "memory(GiB)": 85.12, + "step": 3920, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.67021894, + "epoch": 3.08, + "learning_rate": 8.880718565412511e-05, + "loss": 1.07008371, + "memory(GiB)": 85.12, + "step": 3925, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.6668273, + "epoch": 3.08, + "learning_rate": 8.877311672738913e-05, + "loss": 1.09233027, + "memory(GiB)": 85.12, + "step": 3930, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66963663, + "epoch": 3.08, + "learning_rate": 8.87390025855779e-05, + "loss": 1.07303791, + "memory(GiB)": 85.12, + "step": 3935, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.67969995, + "epoch": 3.09, + "learning_rate": 8.870484326847345e-05, + "loss": 1.06793242, + "memory(GiB)": 85.12, + "step": 3940, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66840816, + "epoch": 3.09, + "learning_rate": 8.867063881591049e-05, + "loss": 1.08742456, + "memory(GiB)": 85.12, + "step": 3945, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66912541, + "epoch": 3.1, + "learning_rate": 8.863638926777639e-05, + "loss": 1.09435921, + "memory(GiB)": 85.12, + "step": 3950, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66795225, + "epoch": 3.1, + "learning_rate": 8.860209466401106e-05, + "loss": 1.1207943, + "memory(GiB)": 85.12, + "step": 3955, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66590056, + "epoch": 3.1, + "learning_rate": 8.856775504460702e-05, + "loss": 1.07821617, + "memory(GiB)": 85.12, + "step": 3960, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.67165837, + "epoch": 3.11, + "learning_rate": 8.853337044960918e-05, + "loss": 1.08614855, + "memory(GiB)": 85.12, + "step": 3965, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66955404, + "epoch": 3.11, + "learning_rate": 8.849894091911503e-05, + "loss": 1.09462404, + "memory(GiB)": 85.12, + "step": 3970, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.68378253, + "epoch": 3.12, + "learning_rate": 8.846446649327437e-05, + "loss": 1.03468351, + "memory(GiB)": 85.12, + "step": 3975, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66864328, + "epoch": 3.12, + "learning_rate": 8.842994721228936e-05, + "loss": 1.07628345, + "memory(GiB)": 85.12, + "step": 3980, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.65653524, + "epoch": 3.12, + "learning_rate": 8.839538311641451e-05, + "loss": 1.13923292, + "memory(GiB)": 85.12, + "step": 3985, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.67333913, + "epoch": 3.13, + "learning_rate": 8.836077424595657e-05, + "loss": 1.07015495, + "memory(GiB)": 85.12, + "step": 3990, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.67381926, + "epoch": 3.13, + "learning_rate": 8.832612064127448e-05, + "loss": 1.06587915, + "memory(GiB)": 85.12, + "step": 3995, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66329141, + "epoch": 3.13, + "learning_rate": 8.829142234277936e-05, + "loss": 1.10319233, + "memory(GiB)": 85.12, + "step": 4000, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.68063631, + "epoch": 3.14, + "learning_rate": 8.825667939093449e-05, + "loss": 1.07182493, + "memory(GiB)": 85.12, + "step": 4005, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66322513, + "epoch": 3.14, + "learning_rate": 8.822189182625516e-05, + "loss": 1.12689009, + "memory(GiB)": 85.12, + "step": 4010, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.67340002, + "epoch": 3.15, + "learning_rate": 8.818705968930874e-05, + "loss": 1.07936611, + "memory(GiB)": 85.12, + "step": 4015, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66903429, + "epoch": 3.15, + "learning_rate": 8.815218302071452e-05, + "loss": 1.10216112, + "memory(GiB)": 85.12, + "step": 4020, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66168714, + "epoch": 3.15, + "learning_rate": 8.811726186114377e-05, + "loss": 1.117346, + "memory(GiB)": 85.12, + "step": 4025, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66286302, + "epoch": 3.16, + "learning_rate": 8.808229625131963e-05, + "loss": 1.10122309, + "memory(GiB)": 85.12, + "step": 4030, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.67067194, + "epoch": 3.16, + "learning_rate": 8.804728623201706e-05, + "loss": 1.09093103, + "memory(GiB)": 85.12, + "step": 4035, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66602073, + "epoch": 3.17, + "learning_rate": 8.801223184406283e-05, + "loss": 1.0800581, + "memory(GiB)": 85.12, + "step": 4040, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.67198768, + "epoch": 3.17, + "learning_rate": 8.797713312833541e-05, + "loss": 1.05843801, + "memory(GiB)": 85.12, + "step": 4045, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.67939711, + "epoch": 3.17, + "learning_rate": 8.794199012576502e-05, + "loss": 1.04836597, + "memory(GiB)": 85.12, + "step": 4050, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.65837584, + "epoch": 3.18, + "learning_rate": 8.790680287733349e-05, + "loss": 1.11400471, + "memory(GiB)": 85.12, + "step": 4055, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.68302851, + "epoch": 3.18, + "learning_rate": 8.787157142407422e-05, + "loss": 1.04046106, + "memory(GiB)": 85.12, + "step": 4060, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.66324239, + "epoch": 3.19, + "learning_rate": 8.783629580707225e-05, + "loss": 1.13401537, + "memory(GiB)": 85.12, + "step": 4065, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66952362, + "epoch": 3.19, + "learning_rate": 8.780097606746404e-05, + "loss": 1.10449305, + "memory(GiB)": 85.12, + "step": 4070, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.6671958, + "epoch": 3.19, + "learning_rate": 8.77656122464375e-05, + "loss": 1.09806108, + "memory(GiB)": 85.12, + "step": 4075, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.66547656, + "epoch": 3.2, + "learning_rate": 8.773020438523202e-05, + "loss": 1.10695295, + "memory(GiB)": 85.12, + "step": 4080, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.66510615, + "epoch": 3.2, + "learning_rate": 8.769475252513826e-05, + "loss": 1.13047438, + "memory(GiB)": 85.12, + "step": 4085, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.66734986, + "epoch": 3.21, + "learning_rate": 8.765925670749824e-05, + "loss": 1.09393444, + "memory(GiB)": 85.12, + "step": 4090, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.67996044, + "epoch": 3.21, + "learning_rate": 8.762371697370523e-05, + "loss": 1.06481838, + "memory(GiB)": 85.12, + "step": 4095, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.6714335, + "epoch": 3.21, + "learning_rate": 8.75881333652037e-05, + "loss": 1.10486279, + "memory(GiB)": 85.12, + "step": 4100, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.67522779, + "epoch": 3.22, + "learning_rate": 8.755250592348933e-05, + "loss": 1.07262745, + "memory(GiB)": 85.12, + "step": 4105, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.67835989, + "epoch": 3.22, + "learning_rate": 8.751683469010887e-05, + "loss": 1.05402975, + "memory(GiB)": 85.12, + "step": 4110, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.6806859, + "epoch": 3.22, + "learning_rate": 8.748111970666011e-05, + "loss": 1.01472445, + "memory(GiB)": 85.12, + "step": 4115, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.67549682, + "epoch": 3.23, + "learning_rate": 8.744536101479195e-05, + "loss": 1.10141077, + "memory(GiB)": 85.12, + "step": 4120, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.67967439, + "epoch": 3.23, + "learning_rate": 8.74095586562042e-05, + "loss": 1.07214756, + "memory(GiB)": 85.12, + "step": 4125, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66929421, + "epoch": 3.24, + "learning_rate": 8.737371267264757e-05, + "loss": 1.09711647, + "memory(GiB)": 85.12, + "step": 4130, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66597781, + "epoch": 3.24, + "learning_rate": 8.733782310592369e-05, + "loss": 1.09240694, + "memory(GiB)": 85.12, + "step": 4135, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.68246903, + "epoch": 3.24, + "learning_rate": 8.7301889997885e-05, + "loss": 1.04920025, + "memory(GiB)": 85.12, + "step": 4140, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66437941, + "epoch": 3.25, + "learning_rate": 8.726591339043472e-05, + "loss": 1.12496767, + "memory(GiB)": 85.12, + "step": 4145, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66526365, + "epoch": 3.25, + "learning_rate": 8.722989332552681e-05, + "loss": 1.12980747, + "memory(GiB)": 85.12, + "step": 4150, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.65992541, + "epoch": 3.26, + "learning_rate": 8.719382984516581e-05, + "loss": 1.1206811, + "memory(GiB)": 85.12, + "step": 4155, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66868792, + "epoch": 3.26, + "learning_rate": 8.715772299140705e-05, + "loss": 1.09060631, + "memory(GiB)": 85.12, + "step": 4160, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66484022, + "epoch": 3.26, + "learning_rate": 8.712157280635634e-05, + "loss": 1.09277716, + "memory(GiB)": 85.12, + "step": 4165, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.67286205, + "epoch": 3.27, + "learning_rate": 8.708537933216999e-05, + "loss": 1.093993, + "memory(GiB)": 85.12, + "step": 4170, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66262193, + "epoch": 3.27, + "learning_rate": 8.704914261105488e-05, + "loss": 1.10075159, + "memory(GiB)": 85.12, + "step": 4175, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.6764492, + "epoch": 3.28, + "learning_rate": 8.701286268526827e-05, + "loss": 1.05436106, + "memory(GiB)": 85.12, + "step": 4180, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66421719, + "epoch": 3.28, + "learning_rate": 8.69765395971178e-05, + "loss": 1.12668571, + "memory(GiB)": 85.12, + "step": 4185, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66706834, + "epoch": 3.28, + "learning_rate": 8.694017338896149e-05, + "loss": 1.0926302, + "memory(GiB)": 85.12, + "step": 4190, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.67663679, + "epoch": 3.29, + "learning_rate": 8.690376410320758e-05, + "loss": 1.04948025, + "memory(GiB)": 85.12, + "step": 4195, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66628132, + "epoch": 3.29, + "learning_rate": 8.686731178231459e-05, + "loss": 1.08100224, + "memory(GiB)": 85.12, + "step": 4200, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.67406206, + "epoch": 3.3, + "learning_rate": 8.68308164687912e-05, + "loss": 1.07763453, + "memory(GiB)": 85.12, + "step": 4205, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66126747, + "epoch": 3.3, + "learning_rate": 8.679427820519625e-05, + "loss": 1.11055937, + "memory(GiB)": 85.12, + "step": 4210, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.65678339, + "epoch": 3.3, + "learning_rate": 8.675769703413863e-05, + "loss": 1.12440147, + "memory(GiB)": 85.12, + "step": 4215, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66874876, + "epoch": 3.31, + "learning_rate": 8.672107299827732e-05, + "loss": 1.10668583, + "memory(GiB)": 85.12, + "step": 4220, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.6592135, + "epoch": 3.31, + "learning_rate": 8.668440614032124e-05, + "loss": 1.11936607, + "memory(GiB)": 85.12, + "step": 4225, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.67665238, + "epoch": 3.32, + "learning_rate": 8.664769650302926e-05, + "loss": 1.07830534, + "memory(GiB)": 85.12, + "step": 4230, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.66774406, + "epoch": 3.32, + "learning_rate": 8.661094412921014e-05, + "loss": 1.10304251, + "memory(GiB)": 85.12, + "step": 4235, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.68347654, + "epoch": 3.32, + "learning_rate": 8.657414906172247e-05, + "loss": 1.0130826, + "memory(GiB)": 85.12, + "step": 4240, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.65933442, + "epoch": 3.33, + "learning_rate": 8.653731134347464e-05, + "loss": 1.09179993, + "memory(GiB)": 85.12, + "step": 4245, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.66771193, + "epoch": 3.33, + "learning_rate": 8.650043101742478e-05, + "loss": 1.09305897, + "memory(GiB)": 85.12, + "step": 4250, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.65708385, + "epoch": 3.33, + "learning_rate": 8.646350812658069e-05, + "loss": 1.12786036, + "memory(GiB)": 85.12, + "step": 4255, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.67746892, + "epoch": 3.34, + "learning_rate": 8.642654271399979e-05, + "loss": 1.06408577, + "memory(GiB)": 85.12, + "step": 4260, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.66043816, + "epoch": 3.34, + "learning_rate": 8.638953482278915e-05, + "loss": 1.12865877, + "memory(GiB)": 85.12, + "step": 4265, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67736588, + "epoch": 3.35, + "learning_rate": 8.63524844961053e-05, + "loss": 1.05247889, + "memory(GiB)": 85.12, + "step": 4270, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67974887, + "epoch": 3.35, + "learning_rate": 8.631539177715433e-05, + "loss": 1.06267481, + "memory(GiB)": 85.12, + "step": 4275, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67399712, + "epoch": 3.35, + "learning_rate": 8.62782567091917e-05, + "loss": 1.07866135, + "memory(GiB)": 85.12, + "step": 4280, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.68216677, + "epoch": 3.36, + "learning_rate": 8.624107933552229e-05, + "loss": 1.04396935, + "memory(GiB)": 85.12, + "step": 4285, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.65968661, + "epoch": 3.36, + "learning_rate": 8.620385969950031e-05, + "loss": 1.09188328, + "memory(GiB)": 85.12, + "step": 4290, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67161303, + "epoch": 3.37, + "learning_rate": 8.616659784452925e-05, + "loss": 1.08730984, + "memory(GiB)": 85.12, + "step": 4295, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67344618, + "epoch": 3.37, + "learning_rate": 8.612929381406183e-05, + "loss": 1.06549606, + "memory(GiB)": 85.12, + "step": 4300, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.67476172, + "epoch": 3.37, + "learning_rate": 8.609194765159996e-05, + "loss": 1.08071413, + "memory(GiB)": 85.12, + "step": 4305, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67202482, + "epoch": 3.38, + "learning_rate": 8.605455940069468e-05, + "loss": 1.09838877, + "memory(GiB)": 85.12, + "step": 4310, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67025285, + "epoch": 3.38, + "learning_rate": 8.601712910494612e-05, + "loss": 1.07920656, + "memory(GiB)": 85.12, + "step": 4315, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.67839556, + "epoch": 3.39, + "learning_rate": 8.597965680800342e-05, + "loss": 1.06685658, + "memory(GiB)": 85.12, + "step": 4320, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.67582765, + "epoch": 3.39, + "learning_rate": 8.594214255356472e-05, + "loss": 1.07480011, + "memory(GiB)": 85.12, + "step": 4325, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.6812016, + "epoch": 3.39, + "learning_rate": 8.590458638537706e-05, + "loss": 1.0496768, + "memory(GiB)": 85.12, + "step": 4330, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.68081627, + "epoch": 3.4, + "learning_rate": 8.58669883472364e-05, + "loss": 1.03980932, + "memory(GiB)": 85.12, + "step": 4335, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.66322517, + "epoch": 3.4, + "learning_rate": 8.58293484829875e-05, + "loss": 1.10416336, + "memory(GiB)": 85.12, + "step": 4340, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67386465, + "epoch": 3.41, + "learning_rate": 8.579166683652388e-05, + "loss": 1.06861038, + "memory(GiB)": 85.12, + "step": 4345, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.67121987, + "epoch": 3.41, + "learning_rate": 8.575394345178782e-05, + "loss": 1.09512386, + "memory(GiB)": 85.12, + "step": 4350, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.66761322, + "epoch": 3.41, + "learning_rate": 8.571617837277027e-05, + "loss": 1.085077, + "memory(GiB)": 85.12, + "step": 4355, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.68047543, + "epoch": 3.42, + "learning_rate": 8.567837164351075e-05, + "loss": 1.06838808, + "memory(GiB)": 85.12, + "step": 4360, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.68031921, + "epoch": 3.42, + "learning_rate": 8.56405233080974e-05, + "loss": 1.06294222, + "memory(GiB)": 85.12, + "step": 4365, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66957574, + "epoch": 3.42, + "learning_rate": 8.560263341066689e-05, + "loss": 1.09513245, + "memory(GiB)": 85.12, + "step": 4370, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66336145, + "epoch": 3.43, + "learning_rate": 8.55647019954043e-05, + "loss": 1.09110317, + "memory(GiB)": 85.12, + "step": 4375, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.67718058, + "epoch": 3.43, + "learning_rate": 8.552672910654317e-05, + "loss": 1.07627859, + "memory(GiB)": 85.12, + "step": 4380, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66453261, + "epoch": 3.44, + "learning_rate": 8.548871478836542e-05, + "loss": 1.11319456, + "memory(GiB)": 85.12, + "step": 4385, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.68078775, + "epoch": 3.44, + "learning_rate": 8.545065908520123e-05, + "loss": 1.04445333, + "memory(GiB)": 85.12, + "step": 4390, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.67710371, + "epoch": 3.44, + "learning_rate": 8.541256204142905e-05, + "loss": 1.04607086, + "memory(GiB)": 85.12, + "step": 4395, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.67265029, + "epoch": 3.45, + "learning_rate": 8.53744237014756e-05, + "loss": 1.07834921, + "memory(GiB)": 85.12, + "step": 4400, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.67727437, + "epoch": 3.45, + "learning_rate": 8.533624410981567e-05, + "loss": 1.06278925, + "memory(GiB)": 85.12, + "step": 4405, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.66954894, + "epoch": 3.46, + "learning_rate": 8.529802331097223e-05, + "loss": 1.08836241, + "memory(GiB)": 85.12, + "step": 4410, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67296772, + "epoch": 3.46, + "learning_rate": 8.525976134951626e-05, + "loss": 1.06974878, + "memory(GiB)": 85.12, + "step": 4415, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.66709204, + "epoch": 3.46, + "learning_rate": 8.522145827006675e-05, + "loss": 1.06892281, + "memory(GiB)": 85.12, + "step": 4420, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.68249197, + "epoch": 3.47, + "learning_rate": 8.518311411729068e-05, + "loss": 1.05143995, + "memory(GiB)": 85.12, + "step": 4425, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.65883646, + "epoch": 3.47, + "learning_rate": 8.514472893590285e-05, + "loss": 1.10914154, + "memory(GiB)": 85.12, + "step": 4430, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.67034783, + "epoch": 3.48, + "learning_rate": 8.510630277066594e-05, + "loss": 1.06353884, + "memory(GiB)": 85.12, + "step": 4435, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.67994227, + "epoch": 3.48, + "learning_rate": 8.506783566639045e-05, + "loss": 1.07216578, + "memory(GiB)": 85.12, + "step": 4440, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67223606, + "epoch": 3.48, + "learning_rate": 8.502932766793462e-05, + "loss": 1.06527033, + "memory(GiB)": 85.12, + "step": 4445, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.67538528, + "epoch": 3.49, + "learning_rate": 8.49907788202043e-05, + "loss": 1.04301472, + "memory(GiB)": 85.12, + "step": 4450, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.68265018, + "epoch": 3.49, + "learning_rate": 8.49521891681531e-05, + "loss": 1.05498791, + "memory(GiB)": 85.12, + "step": 4455, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.67961888, + "epoch": 3.5, + "learning_rate": 8.491355875678211e-05, + "loss": 1.06846848, + "memory(GiB)": 85.12, + "step": 4460, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66131968, + "epoch": 3.5, + "learning_rate": 8.487488763114e-05, + "loss": 1.09861307, + "memory(GiB)": 85.12, + "step": 4465, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.66888275, + "epoch": 3.5, + "learning_rate": 8.483617583632292e-05, + "loss": 1.08226824, + "memory(GiB)": 85.12, + "step": 4470, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.67894645, + "epoch": 3.51, + "learning_rate": 8.47974234174744e-05, + "loss": 1.07799463, + "memory(GiB)": 85.12, + "step": 4475, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66169238, + "epoch": 3.51, + "learning_rate": 8.475863041978543e-05, + "loss": 1.10147877, + "memory(GiB)": 85.12, + "step": 4480, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.66542506, + "epoch": 3.51, + "learning_rate": 8.471979688849424e-05, + "loss": 1.09339066, + "memory(GiB)": 85.12, + "step": 4485, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.66197405, + "epoch": 3.52, + "learning_rate": 8.468092286888634e-05, + "loss": 1.11503725, + "memory(GiB)": 85.12, + "step": 4490, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66685591, + "epoch": 3.52, + "learning_rate": 8.464200840629451e-05, + "loss": 1.07581072, + "memory(GiB)": 85.12, + "step": 4495, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.67047796, + "epoch": 3.53, + "learning_rate": 8.460305354609863e-05, + "loss": 1.07585983, + "memory(GiB)": 85.12, + "step": 4500, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66793823, + "epoch": 3.53, + "learning_rate": 8.456405833372572e-05, + "loss": 1.10174761, + "memory(GiB)": 85.12, + "step": 4505, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.67698441, + "epoch": 3.53, + "learning_rate": 8.452502281464986e-05, + "loss": 1.0740612, + "memory(GiB)": 85.12, + "step": 4510, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.68095536, + "epoch": 3.54, + "learning_rate": 8.448594703439213e-05, + "loss": 1.05643806, + "memory(GiB)": 85.12, + "step": 4515, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.67278633, + "epoch": 3.54, + "learning_rate": 8.444683103852051e-05, + "loss": 1.06161213, + "memory(GiB)": 85.12, + "step": 4520, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.65890441, + "epoch": 3.55, + "learning_rate": 8.440767487264997e-05, + "loss": 1.12471752, + "memory(GiB)": 85.12, + "step": 4525, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.65354443, + "epoch": 3.55, + "learning_rate": 8.436847858244226e-05, + "loss": 1.13845463, + "memory(GiB)": 85.12, + "step": 4530, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.67117491, + "epoch": 3.55, + "learning_rate": 8.432924221360594e-05, + "loss": 1.06492109, + "memory(GiB)": 85.12, + "step": 4535, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.67677555, + "epoch": 3.56, + "learning_rate": 8.42899658118963e-05, + "loss": 1.06711979, + "memory(GiB)": 85.12, + "step": 4540, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.67237253, + "epoch": 3.56, + "learning_rate": 8.425064942311532e-05, + "loss": 1.064182, + "memory(GiB)": 85.12, + "step": 4545, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.67695851, + "epoch": 3.57, + "learning_rate": 8.42112930931116e-05, + "loss": 1.04225302, + "memory(GiB)": 85.12, + "step": 4550, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.67655463, + "epoch": 3.57, + "learning_rate": 8.417189686778034e-05, + "loss": 1.09111786, + "memory(GiB)": 85.12, + "step": 4555, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.67512894, + "epoch": 3.57, + "learning_rate": 8.413246079306325e-05, + "loss": 1.06245384, + "memory(GiB)": 85.12, + "step": 4560, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.6641572, + "epoch": 3.58, + "learning_rate": 8.40929849149485e-05, + "loss": 1.089466, + "memory(GiB)": 85.12, + "step": 4565, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67081413, + "epoch": 3.58, + "learning_rate": 8.405346927947071e-05, + "loss": 1.06199369, + "memory(GiB)": 85.12, + "step": 4570, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67029018, + "epoch": 3.59, + "learning_rate": 8.401391393271081e-05, + "loss": 1.10045147, + "memory(GiB)": 85.12, + "step": 4575, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.66921582, + "epoch": 3.59, + "learning_rate": 8.397431892079608e-05, + "loss": 1.09112959, + "memory(GiB)": 85.12, + "step": 4580, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.66224871, + "epoch": 3.59, + "learning_rate": 8.393468428990005e-05, + "loss": 1.09612188, + "memory(GiB)": 85.12, + "step": 4585, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.68403311, + "epoch": 3.6, + "learning_rate": 8.389501008624244e-05, + "loss": 1.06564083, + "memory(GiB)": 85.12, + "step": 4590, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67375054, + "epoch": 3.6, + "learning_rate": 8.385529635608913e-05, + "loss": 1.08890104, + "memory(GiB)": 85.12, + "step": 4595, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.66678824, + "epoch": 3.61, + "learning_rate": 8.381554314575206e-05, + "loss": 1.07385893, + "memory(GiB)": 85.12, + "step": 4600, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.67612591, + "epoch": 3.61, + "learning_rate": 8.377575050158929e-05, + "loss": 1.07160501, + "memory(GiB)": 85.12, + "step": 4605, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.66233315, + "epoch": 3.61, + "learning_rate": 8.373591847000476e-05, + "loss": 1.12124157, + "memory(GiB)": 85.12, + "step": 4610, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.66368275, + "epoch": 3.62, + "learning_rate": 8.369604709744842e-05, + "loss": 1.12185488, + "memory(GiB)": 85.12, + "step": 4615, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67620149, + "epoch": 3.62, + "learning_rate": 8.365613643041606e-05, + "loss": 1.0558217, + "memory(GiB)": 85.12, + "step": 4620, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67760687, + "epoch": 3.62, + "learning_rate": 8.361618651544932e-05, + "loss": 1.06278458, + "memory(GiB)": 85.12, + "step": 4625, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67772574, + "epoch": 3.63, + "learning_rate": 8.357619739913557e-05, + "loss": 1.05741844, + "memory(GiB)": 85.12, + "step": 4630, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.68779845, + "epoch": 3.63, + "learning_rate": 8.353616912810793e-05, + "loss": 1.03406744, + "memory(GiB)": 85.12, + "step": 4635, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.67806859, + "epoch": 3.64, + "learning_rate": 8.349610174904517e-05, + "loss": 1.07612123, + "memory(GiB)": 85.12, + "step": 4640, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.65736609, + "epoch": 3.64, + "learning_rate": 8.345599530867166e-05, + "loss": 1.11663847, + "memory(GiB)": 85.12, + "step": 4645, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.66991482, + "epoch": 3.64, + "learning_rate": 8.341584985375733e-05, + "loss": 1.08915453, + "memory(GiB)": 85.12, + "step": 4650, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.6729054, + "epoch": 3.65, + "learning_rate": 8.337566543111756e-05, + "loss": 1.06626625, + "memory(GiB)": 85.12, + "step": 4655, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.6768261, + "epoch": 3.65, + "learning_rate": 8.333544208761326e-05, + "loss": 1.04373264, + "memory(GiB)": 85.12, + "step": 4660, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.66950965, + "epoch": 3.66, + "learning_rate": 8.329517987015067e-05, + "loss": 1.11504803, + "memory(GiB)": 85.12, + "step": 4665, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.67875061, + "epoch": 3.66, + "learning_rate": 8.325487882568138e-05, + "loss": 1.05688515, + "memory(GiB)": 85.12, + "step": 4670, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66837807, + "epoch": 3.66, + "learning_rate": 8.321453900120223e-05, + "loss": 1.07563877, + "memory(GiB)": 85.12, + "step": 4675, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.68004751, + "epoch": 3.67, + "learning_rate": 8.31741604437553e-05, + "loss": 1.06593189, + "memory(GiB)": 85.12, + "step": 4680, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.68714409, + "epoch": 3.67, + "learning_rate": 8.313374320042785e-05, + "loss": 1.03218222, + "memory(GiB)": 85.12, + "step": 4685, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.67838907, + "epoch": 3.68, + "learning_rate": 8.309328731835228e-05, + "loss": 1.05175476, + "memory(GiB)": 85.12, + "step": 4690, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66823692, + "epoch": 3.68, + "learning_rate": 8.305279284470595e-05, + "loss": 1.08211584, + "memory(GiB)": 85.12, + "step": 4695, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.65161371, + "epoch": 3.68, + "learning_rate": 8.301225982671133e-05, + "loss": 1.13491449, + "memory(GiB)": 85.12, + "step": 4700, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.67203436, + "epoch": 3.69, + "learning_rate": 8.29716883116358e-05, + "loss": 1.08132124, + "memory(GiB)": 85.12, + "step": 4705, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.66406636, + "epoch": 3.69, + "learning_rate": 8.293107834679159e-05, + "loss": 1.11655197, + "memory(GiB)": 85.12, + "step": 4710, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.67401237, + "epoch": 3.7, + "learning_rate": 8.289042997953585e-05, + "loss": 1.09698009, + "memory(GiB)": 85.12, + "step": 4715, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.66169372, + "epoch": 3.7, + "learning_rate": 8.284974325727043e-05, + "loss": 1.13821402, + "memory(GiB)": 85.12, + "step": 4720, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.66252351, + "epoch": 3.7, + "learning_rate": 8.280901822744198e-05, + "loss": 1.12066956, + "memory(GiB)": 85.12, + "step": 4725, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.66670227, + "epoch": 3.71, + "learning_rate": 8.276825493754176e-05, + "loss": 1.07495327, + "memory(GiB)": 85.12, + "step": 4730, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.67759528, + "epoch": 3.71, + "learning_rate": 8.27274534351057e-05, + "loss": 1.06112547, + "memory(GiB)": 85.12, + "step": 4735, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.66779099, + "epoch": 3.71, + "learning_rate": 8.268661376771425e-05, + "loss": 1.07864561, + "memory(GiB)": 85.12, + "step": 4740, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.69414735, + "epoch": 3.72, + "learning_rate": 8.264573598299238e-05, + "loss": 1.01207485, + "memory(GiB)": 85.12, + "step": 4745, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.67531772, + "epoch": 3.72, + "learning_rate": 8.260482012860957e-05, + "loss": 1.05929461, + "memory(GiB)": 85.12, + "step": 4750, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.67257948, + "epoch": 3.73, + "learning_rate": 8.256386625227955e-05, + "loss": 1.09518213, + "memory(GiB)": 85.12, + "step": 4755, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.67445426, + "epoch": 3.73, + "learning_rate": 8.252287440176053e-05, + "loss": 1.09971008, + "memory(GiB)": 85.12, + "step": 4760, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66840658, + "epoch": 3.73, + "learning_rate": 8.248184462485493e-05, + "loss": 1.10356541, + "memory(GiB)": 85.12, + "step": 4765, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66937456, + "epoch": 3.74, + "learning_rate": 8.244077696940944e-05, + "loss": 1.09562979, + "memory(GiB)": 85.12, + "step": 4770, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.66603575, + "epoch": 3.74, + "learning_rate": 8.239967148331488e-05, + "loss": 1.08866987, + "memory(GiB)": 85.12, + "step": 4775, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.66549649, + "epoch": 3.75, + "learning_rate": 8.235852821450622e-05, + "loss": 1.11980772, + "memory(GiB)": 85.12, + "step": 4780, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.66984949, + "epoch": 3.75, + "learning_rate": 8.231734721096246e-05, + "loss": 1.10109053, + "memory(GiB)": 85.12, + "step": 4785, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.65589933, + "epoch": 3.75, + "learning_rate": 8.227612852070665e-05, + "loss": 1.13999405, + "memory(GiB)": 85.12, + "step": 4790, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.65890169, + "epoch": 3.76, + "learning_rate": 8.223487219180573e-05, + "loss": 1.12389488, + "memory(GiB)": 85.12, + "step": 4795, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.68056979, + "epoch": 3.76, + "learning_rate": 8.219357827237056e-05, + "loss": 1.06554289, + "memory(GiB)": 85.12, + "step": 4800, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.67042527, + "epoch": 3.77, + "learning_rate": 8.215224681055585e-05, + "loss": 1.11027889, + "memory(GiB)": 85.12, + "step": 4805, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.67441998, + "epoch": 3.77, + "learning_rate": 8.211087785456005e-05, + "loss": 1.06829157, + "memory(GiB)": 85.12, + "step": 4810, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.67253809, + "epoch": 3.77, + "learning_rate": 8.206947145262541e-05, + "loss": 1.08531771, + "memory(GiB)": 85.12, + "step": 4815, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.66685085, + "epoch": 3.78, + "learning_rate": 8.202802765303776e-05, + "loss": 1.10216599, + "memory(GiB)": 85.12, + "step": 4820, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.66690621, + "epoch": 3.78, + "learning_rate": 8.198654650412659e-05, + "loss": 1.10417929, + "memory(GiB)": 85.12, + "step": 4825, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.67201991, + "epoch": 3.79, + "learning_rate": 8.194502805426494e-05, + "loss": 1.0637537, + "memory(GiB)": 85.12, + "step": 4830, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.66858087, + "epoch": 3.79, + "learning_rate": 8.190347235186932e-05, + "loss": 1.07713585, + "memory(GiB)": 85.12, + "step": 4835, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.69056597, + "epoch": 3.79, + "learning_rate": 8.186187944539973e-05, + "loss": 1.01855545, + "memory(GiB)": 85.12, + "step": 4840, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.67178264, + "epoch": 3.8, + "learning_rate": 8.18202493833595e-05, + "loss": 1.08217745, + "memory(GiB)": 85.12, + "step": 4845, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.67628303, + "epoch": 3.8, + "learning_rate": 8.177858221429536e-05, + "loss": 1.05854607, + "memory(GiB)": 85.12, + "step": 4850, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.67847629, + "epoch": 3.8, + "learning_rate": 8.173687798679723e-05, + "loss": 1.06340704, + "memory(GiB)": 85.12, + "step": 4855, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.65820971, + "epoch": 3.81, + "learning_rate": 8.169513674949829e-05, + "loss": 1.13936968, + "memory(GiB)": 85.12, + "step": 4860, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.68040419, + "epoch": 3.81, + "learning_rate": 8.16533585510749e-05, + "loss": 1.04977999, + "memory(GiB)": 85.12, + "step": 4865, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.66686277, + "epoch": 3.82, + "learning_rate": 8.161154344024646e-05, + "loss": 1.10527754, + "memory(GiB)": 85.12, + "step": 4870, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.6596796, + "epoch": 3.82, + "learning_rate": 8.156969146577548e-05, + "loss": 1.11985264, + "memory(GiB)": 85.12, + "step": 4875, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.66763144, + "epoch": 3.82, + "learning_rate": 8.152780267646743e-05, + "loss": 1.08364487, + "memory(GiB)": 85.12, + "step": 4880, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.67398977, + "epoch": 3.83, + "learning_rate": 8.148587712117068e-05, + "loss": 1.07214155, + "memory(GiB)": 85.12, + "step": 4885, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.68034143, + "epoch": 3.83, + "learning_rate": 8.144391484877655e-05, + "loss": 1.0558445, + "memory(GiB)": 85.12, + "step": 4890, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.67083588, + "epoch": 3.84, + "learning_rate": 8.14019159082191e-05, + "loss": 1.0750145, + "memory(GiB)": 85.12, + "step": 4895, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.66514192, + "epoch": 3.84, + "learning_rate": 8.135988034847521e-05, + "loss": 1.10588713, + "memory(GiB)": 85.12, + "step": 4900, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.67397079, + "epoch": 3.84, + "learning_rate": 8.13178082185644e-05, + "loss": 1.05777845, + "memory(GiB)": 85.12, + "step": 4905, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.66549215, + "epoch": 3.85, + "learning_rate": 8.12756995675489e-05, + "loss": 1.09327412, + "memory(GiB)": 85.12, + "step": 4910, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.6661788, + "epoch": 3.85, + "learning_rate": 8.12335544445335e-05, + "loss": 1.09228296, + "memory(GiB)": 85.12, + "step": 4915, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.68010106, + "epoch": 3.86, + "learning_rate": 8.119137289866551e-05, + "loss": 1.06913862, + "memory(GiB)": 85.12, + "step": 4920, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.66819158, + "epoch": 3.86, + "learning_rate": 8.114915497913473e-05, + "loss": 1.09536047, + "memory(GiB)": 85.12, + "step": 4925, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.67470613, + "epoch": 3.86, + "learning_rate": 8.11069007351734e-05, + "loss": 1.06130228, + "memory(GiB)": 85.12, + "step": 4930, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.67194877, + "epoch": 3.87, + "learning_rate": 8.106461021605607e-05, + "loss": 1.06740732, + "memory(GiB)": 85.12, + "step": 4935, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67518115, + "epoch": 3.87, + "learning_rate": 8.102228347109962e-05, + "loss": 1.09367285, + "memory(GiB)": 85.12, + "step": 4940, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.68004875, + "epoch": 3.88, + "learning_rate": 8.097992054966317e-05, + "loss": 1.05609674, + "memory(GiB)": 85.12, + "step": 4945, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.65617743, + "epoch": 3.88, + "learning_rate": 8.093752150114804e-05, + "loss": 1.11393881, + "memory(GiB)": 85.12, + "step": 4950, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.66117711, + "epoch": 3.88, + "learning_rate": 8.089508637499765e-05, + "loss": 1.12456036, + "memory(GiB)": 85.12, + "step": 4955, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.66858802, + "epoch": 3.89, + "learning_rate": 8.085261522069752e-05, + "loss": 1.1040514, + "memory(GiB)": 85.12, + "step": 4960, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.68399754, + "epoch": 3.89, + "learning_rate": 8.081010808777517e-05, + "loss": 1.01230736, + "memory(GiB)": 85.12, + "step": 4965, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67364149, + "epoch": 3.89, + "learning_rate": 8.07675650258001e-05, + "loss": 1.05729342, + "memory(GiB)": 85.12, + "step": 4970, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.66905775, + "epoch": 3.9, + "learning_rate": 8.072498608438363e-05, + "loss": 1.10162392, + "memory(GiB)": 85.12, + "step": 4975, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67838883, + "epoch": 3.9, + "learning_rate": 8.068237131317904e-05, + "loss": 1.03004761, + "memory(GiB)": 85.12, + "step": 4980, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.6654243, + "epoch": 3.91, + "learning_rate": 8.063972076188132e-05, + "loss": 1.09254379, + "memory(GiB)": 85.12, + "step": 4985, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.6707293, + "epoch": 3.91, + "learning_rate": 8.059703448022715e-05, + "loss": 1.06519508, + "memory(GiB)": 85.12, + "step": 4990, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67212982, + "epoch": 3.91, + "learning_rate": 8.055431251799499e-05, + "loss": 1.09433947, + "memory(GiB)": 85.12, + "step": 4995, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.65150967, + "epoch": 3.92, + "learning_rate": 8.051155492500478e-05, + "loss": 1.13897228, + "memory(GiB)": 85.12, + "step": 5000, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67477775, + "epoch": 3.92, + "learning_rate": 8.046876175111813e-05, + "loss": 1.07944336, + "memory(GiB)": 85.12, + "step": 5005, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.68616705, + "epoch": 3.93, + "learning_rate": 8.042593304623803e-05, + "loss": 1.05852995, + "memory(GiB)": 85.12, + "step": 5010, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.68202815, + "epoch": 3.93, + "learning_rate": 8.0383068860309e-05, + "loss": 1.05745516, + "memory(GiB)": 85.12, + "step": 5015, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.65037746, + "epoch": 3.93, + "learning_rate": 8.034016924331686e-05, + "loss": 1.1531786, + "memory(GiB)": 85.12, + "step": 5020, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.67385054, + "epoch": 3.94, + "learning_rate": 8.029723424528884e-05, + "loss": 1.07016153, + "memory(GiB)": 85.12, + "step": 5025, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67603893, + "epoch": 3.94, + "learning_rate": 8.025426391629329e-05, + "loss": 1.06786518, + "memory(GiB)": 85.12, + "step": 5030, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.66948352, + "epoch": 3.95, + "learning_rate": 8.021125830643991e-05, + "loss": 1.09177036, + "memory(GiB)": 85.12, + "step": 5035, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67932153, + "epoch": 3.95, + "learning_rate": 8.016821746587947e-05, + "loss": 1.08133307, + "memory(GiB)": 85.12, + "step": 5040, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.66458163, + "epoch": 3.95, + "learning_rate": 8.01251414448038e-05, + "loss": 1.0914546, + "memory(GiB)": 85.12, + "step": 5045, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.65853357, + "epoch": 3.96, + "learning_rate": 8.00820302934458e-05, + "loss": 1.12704811, + "memory(GiB)": 85.12, + "step": 5050, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.66499863, + "epoch": 3.96, + "learning_rate": 8.003888406207932e-05, + "loss": 1.1266118, + "memory(GiB)": 85.12, + "step": 5055, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.66503258, + "epoch": 3.97, + "learning_rate": 7.999570280101912e-05, + "loss": 1.10257483, + "memory(GiB)": 85.12, + "step": 5060, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67188749, + "epoch": 3.97, + "learning_rate": 7.995248656062081e-05, + "loss": 1.0711792, + "memory(GiB)": 85.12, + "step": 5065, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.66948404, + "epoch": 3.97, + "learning_rate": 7.99092353912808e-05, + "loss": 1.09926367, + "memory(GiB)": 85.12, + "step": 5070, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.67658277, + "epoch": 3.98, + "learning_rate": 7.986594934343621e-05, + "loss": 1.06956873, + "memory(GiB)": 85.12, + "step": 5075, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67322578, + "epoch": 3.98, + "learning_rate": 7.982262846756488e-05, + "loss": 1.07685366, + "memory(GiB)": 85.12, + "step": 5080, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.66474819, + "epoch": 3.99, + "learning_rate": 7.977927281418518e-05, + "loss": 1.12432756, + "memory(GiB)": 85.12, + "step": 5085, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67171674, + "epoch": 3.99, + "learning_rate": 7.973588243385612e-05, + "loss": 1.09030991, + "memory(GiB)": 85.12, + "step": 5090, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.68104601, + "epoch": 3.99, + "learning_rate": 7.969245737717718e-05, + "loss": 1.03982849, + "memory(GiB)": 85.12, + "step": 5095, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67410316, + "epoch": 4.0, + "learning_rate": 7.964899769478827e-05, + "loss": 1.07849312, + "memory(GiB)": 85.12, + "step": 5100, + "train_speed(iter/s)": 0.035265 + }, + { + "epoch": 4.0, + "eval_acc": 0.6908339594290007, + "eval_loss": 0.9954748749732971, + "eval_runtime": 84.9758, + "eval_samples_per_second": 1.094, + "eval_steps_per_second": 1.094, + "step": 5104 + }, + { + "acc": 0.68671699, + "epoch": 4.0, + "learning_rate": 7.960550343736965e-05, + "loss": 1.04726982, + "memory(GiB)": 85.12, + "step": 5105, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.68219004, + "epoch": 4.0, + "learning_rate": 7.956197465564197e-05, + "loss": 1.02617922, + "memory(GiB)": 85.12, + "step": 5110, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.68375196, + "epoch": 4.01, + "learning_rate": 7.951841140036606e-05, + "loss": 1.01291771, + "memory(GiB)": 85.12, + "step": 5115, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.66873488, + "epoch": 4.01, + "learning_rate": 7.947481372234303e-05, + "loss": 1.0916275, + "memory(GiB)": 85.12, + "step": 5120, + "train_speed(iter/s)": 0.035242 + }, + { + "acc": 0.67837057, + "epoch": 4.02, + "learning_rate": 7.943118167241406e-05, + "loss": 1.07693701, + "memory(GiB)": 85.12, + "step": 5125, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.66617122, + "epoch": 4.02, + "learning_rate": 7.938751530146047e-05, + "loss": 1.08172598, + "memory(GiB)": 85.12, + "step": 5130, + "train_speed(iter/s)": 0.035242 + }, + { + "acc": 0.68194261, + "epoch": 4.02, + "learning_rate": 7.934381466040356e-05, + "loss": 1.04084024, + "memory(GiB)": 85.12, + "step": 5135, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.68469057, + "epoch": 4.03, + "learning_rate": 7.930007980020464e-05, + "loss": 1.02090225, + "memory(GiB)": 85.12, + "step": 5140, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.68128481, + "epoch": 4.03, + "learning_rate": 7.925631077186487e-05, + "loss": 1.04276752, + "memory(GiB)": 85.12, + "step": 5145, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.67515278, + "epoch": 4.04, + "learning_rate": 7.92125076264253e-05, + "loss": 1.05011406, + "memory(GiB)": 85.12, + "step": 5150, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.68157773, + "epoch": 4.04, + "learning_rate": 7.916867041496674e-05, + "loss": 1.08598862, + "memory(GiB)": 85.12, + "step": 5155, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.67785926, + "epoch": 4.04, + "learning_rate": 7.912479918860974e-05, + "loss": 1.05256701, + "memory(GiB)": 85.12, + "step": 5160, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.68705544, + "epoch": 4.05, + "learning_rate": 7.908089399851448e-05, + "loss": 1.04519091, + "memory(GiB)": 85.12, + "step": 5165, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.67090273, + "epoch": 4.05, + "learning_rate": 7.903695489588083e-05, + "loss": 1.08642006, + "memory(GiB)": 85.12, + "step": 5170, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67382889, + "epoch": 4.06, + "learning_rate": 7.899298193194811e-05, + "loss": 1.07724438, + "memory(GiB)": 85.12, + "step": 5175, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.69452157, + "epoch": 4.06, + "learning_rate": 7.894897515799518e-05, + "loss": 0.98748884, + "memory(GiB)": 85.12, + "step": 5180, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.66965647, + "epoch": 4.06, + "learning_rate": 7.890493462534034e-05, + "loss": 1.06002617, + "memory(GiB)": 85.12, + "step": 5185, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.6691617, + "epoch": 4.07, + "learning_rate": 7.886086038534122e-05, + "loss": 1.05882254, + "memory(GiB)": 85.12, + "step": 5190, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.67491331, + "epoch": 4.07, + "learning_rate": 7.881675248939476e-05, + "loss": 1.06821861, + "memory(GiB)": 85.12, + "step": 5195, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.68093872, + "epoch": 4.08, + "learning_rate": 7.877261098893719e-05, + "loss": 1.04844933, + "memory(GiB)": 85.12, + "step": 5200, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.67266617, + "epoch": 4.08, + "learning_rate": 7.872843593544388e-05, + "loss": 1.07000122, + "memory(GiB)": 85.12, + "step": 5205, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67317371, + "epoch": 4.08, + "learning_rate": 7.868422738042935e-05, + "loss": 1.09329424, + "memory(GiB)": 85.12, + "step": 5210, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.66858578, + "epoch": 4.09, + "learning_rate": 7.863998537544719e-05, + "loss": 1.08054285, + "memory(GiB)": 85.12, + "step": 5215, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.66879354, + "epoch": 4.09, + "learning_rate": 7.859570997208998e-05, + "loss": 1.08357344, + "memory(GiB)": 85.12, + "step": 5220, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.66112986, + "epoch": 4.09, + "learning_rate": 7.855140122198927e-05, + "loss": 1.1116806, + "memory(GiB)": 85.12, + "step": 5225, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67013917, + "epoch": 4.1, + "learning_rate": 7.850705917681549e-05, + "loss": 1.0776103, + "memory(GiB)": 85.12, + "step": 5230, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67974381, + "epoch": 4.1, + "learning_rate": 7.846268388827789e-05, + "loss": 1.06719589, + "memory(GiB)": 85.12, + "step": 5235, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.6868772, + "epoch": 4.11, + "learning_rate": 7.841827540812447e-05, + "loss": 1.02366819, + "memory(GiB)": 85.12, + "step": 5240, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.67168031, + "epoch": 4.11, + "learning_rate": 7.837383378814197e-05, + "loss": 1.08055744, + "memory(GiB)": 85.12, + "step": 5245, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.67559457, + "epoch": 4.11, + "learning_rate": 7.832935908015578e-05, + "loss": 1.04622164, + "memory(GiB)": 85.12, + "step": 5250, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.66986046, + "epoch": 4.12, + "learning_rate": 7.828485133602981e-05, + "loss": 1.088204, + "memory(GiB)": 85.12, + "step": 5255, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.68791199, + "epoch": 4.12, + "learning_rate": 7.824031060766662e-05, + "loss": 1.02517748, + "memory(GiB)": 85.12, + "step": 5260, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.67395439, + "epoch": 4.13, + "learning_rate": 7.819573694700707e-05, + "loss": 1.04129019, + "memory(GiB)": 85.12, + "step": 5265, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.67076187, + "epoch": 4.13, + "learning_rate": 7.815113040603057e-05, + "loss": 1.09466763, + "memory(GiB)": 85.12, + "step": 5270, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.68308926, + "epoch": 4.13, + "learning_rate": 7.810649103675478e-05, + "loss": 1.01522207, + "memory(GiB)": 85.12, + "step": 5275, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.66764603, + "epoch": 4.14, + "learning_rate": 7.80618188912357e-05, + "loss": 1.10394659, + "memory(GiB)": 85.12, + "step": 5280, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.6767786, + "epoch": 4.14, + "learning_rate": 7.801711402156752e-05, + "loss": 1.06015568, + "memory(GiB)": 85.12, + "step": 5285, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.69466491, + "epoch": 4.15, + "learning_rate": 7.797237647988259e-05, + "loss": 1.00177612, + "memory(GiB)": 85.12, + "step": 5290, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.6722373, + "epoch": 4.15, + "learning_rate": 7.792760631835138e-05, + "loss": 1.09138288, + "memory(GiB)": 85.12, + "step": 5295, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.68212829, + "epoch": 4.15, + "learning_rate": 7.788280358918239e-05, + "loss": 1.0298542, + "memory(GiB)": 85.12, + "step": 5300, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.68358073, + "epoch": 4.16, + "learning_rate": 7.783796834462208e-05, + "loss": 1.04165964, + "memory(GiB)": 85.12, + "step": 5305, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67844944, + "epoch": 4.16, + "learning_rate": 7.779310063695486e-05, + "loss": 1.06625547, + "memory(GiB)": 85.12, + "step": 5310, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67359776, + "epoch": 4.17, + "learning_rate": 7.7748200518503e-05, + "loss": 1.08562546, + "memory(GiB)": 85.12, + "step": 5315, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.68680096, + "epoch": 4.17, + "learning_rate": 7.77032680416265e-05, + "loss": 1.01254492, + "memory(GiB)": 85.12, + "step": 5320, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67717962, + "epoch": 4.17, + "learning_rate": 7.765830325872318e-05, + "loss": 1.03918819, + "memory(GiB)": 85.12, + "step": 5325, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.68738456, + "epoch": 4.18, + "learning_rate": 7.761330622222849e-05, + "loss": 1.01561775, + "memory(GiB)": 85.12, + "step": 5330, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.66343398, + "epoch": 4.18, + "learning_rate": 7.75682769846155e-05, + "loss": 1.08996143, + "memory(GiB)": 85.12, + "step": 5335, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.68137712, + "epoch": 4.18, + "learning_rate": 7.75232155983948e-05, + "loss": 1.06495123, + "memory(GiB)": 85.12, + "step": 5340, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67577925, + "epoch": 4.19, + "learning_rate": 7.747812211611454e-05, + "loss": 1.0828968, + "memory(GiB)": 85.12, + "step": 5345, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.6752552, + "epoch": 4.19, + "learning_rate": 7.743299659036023e-05, + "loss": 1.05694551, + "memory(GiB)": 85.12, + "step": 5350, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67123895, + "epoch": 4.2, + "learning_rate": 7.73878390737548e-05, + "loss": 1.07847023, + "memory(GiB)": 85.12, + "step": 5355, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.68291116, + "epoch": 4.2, + "learning_rate": 7.734264961895843e-05, + "loss": 1.05738621, + "memory(GiB)": 85.12, + "step": 5360, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.68122358, + "epoch": 4.2, + "learning_rate": 7.72974282786686e-05, + "loss": 1.03916407, + "memory(GiB)": 85.12, + "step": 5365, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67760181, + "epoch": 4.21, + "learning_rate": 7.725217510561993e-05, + "loss": 1.05562716, + "memory(GiB)": 85.12, + "step": 5370, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.6754374, + "epoch": 4.21, + "learning_rate": 7.72068901525842e-05, + "loss": 1.04640961, + "memory(GiB)": 85.12, + "step": 5375, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.6791121, + "epoch": 4.22, + "learning_rate": 7.716157347237022e-05, + "loss": 1.04016552, + "memory(GiB)": 85.12, + "step": 5380, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67782917, + "epoch": 4.22, + "learning_rate": 7.71162251178238e-05, + "loss": 1.05207224, + "memory(GiB)": 85.12, + "step": 5385, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.66641583, + "epoch": 4.22, + "learning_rate": 7.707084514182772e-05, + "loss": 1.09165554, + "memory(GiB)": 85.12, + "step": 5390, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.66752553, + "epoch": 4.23, + "learning_rate": 7.702543359730158e-05, + "loss": 1.08811426, + "memory(GiB)": 85.12, + "step": 5395, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67671351, + "epoch": 4.23, + "learning_rate": 7.697999053720185e-05, + "loss": 1.03720827, + "memory(GiB)": 85.12, + "step": 5400, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67424908, + "epoch": 4.24, + "learning_rate": 7.693451601452173e-05, + "loss": 1.05379906, + "memory(GiB)": 85.12, + "step": 5405, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67244482, + "epoch": 4.24, + "learning_rate": 7.688901008229107e-05, + "loss": 1.09903154, + "memory(GiB)": 85.12, + "step": 5410, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67872109, + "epoch": 4.24, + "learning_rate": 7.684347279357642e-05, + "loss": 1.06607389, + "memory(GiB)": 85.12, + "step": 5415, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67639685, + "epoch": 4.25, + "learning_rate": 7.679790420148084e-05, + "loss": 1.06388321, + "memory(GiB)": 85.12, + "step": 5420, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.66824012, + "epoch": 4.25, + "learning_rate": 7.675230435914394e-05, + "loss": 1.08031435, + "memory(GiB)": 85.12, + "step": 5425, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.67619176, + "epoch": 4.26, + "learning_rate": 7.670667331974171e-05, + "loss": 1.05007677, + "memory(GiB)": 85.12, + "step": 5430, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.66911144, + "epoch": 4.26, + "learning_rate": 7.666101113648658e-05, + "loss": 1.06066961, + "memory(GiB)": 85.12, + "step": 5435, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.69307575, + "epoch": 4.26, + "learning_rate": 7.661531786262728e-05, + "loss": 1.00854187, + "memory(GiB)": 85.12, + "step": 5440, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67891207, + "epoch": 4.27, + "learning_rate": 7.656959355144879e-05, + "loss": 1.05966101, + "memory(GiB)": 85.12, + "step": 5445, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67878551, + "epoch": 4.27, + "learning_rate": 7.652383825627226e-05, + "loss": 1.0579504, + "memory(GiB)": 85.12, + "step": 5450, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.66832333, + "epoch": 4.28, + "learning_rate": 7.647805203045504e-05, + "loss": 1.08938274, + "memory(GiB)": 85.12, + "step": 5455, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67352867, + "epoch": 4.28, + "learning_rate": 7.643223492739048e-05, + "loss": 1.07296839, + "memory(GiB)": 85.12, + "step": 5460, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.68797994, + "epoch": 4.28, + "learning_rate": 7.638638700050796e-05, + "loss": 1.03214712, + "memory(GiB)": 85.12, + "step": 5465, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67085338, + "epoch": 4.29, + "learning_rate": 7.634050830327282e-05, + "loss": 1.07635412, + "memory(GiB)": 85.12, + "step": 5470, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67759714, + "epoch": 4.29, + "learning_rate": 7.629459888918627e-05, + "loss": 1.06018639, + "memory(GiB)": 85.12, + "step": 5475, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67132416, + "epoch": 4.29, + "learning_rate": 7.624865881178535e-05, + "loss": 1.08437328, + "memory(GiB)": 85.12, + "step": 5480, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.6840992, + "epoch": 4.3, + "learning_rate": 7.620268812464284e-05, + "loss": 1.03912249, + "memory(GiB)": 85.12, + "step": 5485, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.68261967, + "epoch": 4.3, + "learning_rate": 7.615668688136724e-05, + "loss": 1.07205544, + "memory(GiB)": 85.12, + "step": 5490, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.68234777, + "epoch": 4.31, + "learning_rate": 7.611065513560264e-05, + "loss": 1.03525667, + "memory(GiB)": 85.12, + "step": 5495, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.6711832, + "epoch": 4.31, + "learning_rate": 7.606459294102876e-05, + "loss": 1.08924789, + "memory(GiB)": 85.12, + "step": 5500, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.68011131, + "epoch": 4.31, + "learning_rate": 7.601850035136078e-05, + "loss": 1.03413877, + "memory(GiB)": 85.12, + "step": 5505, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.67553554, + "epoch": 4.32, + "learning_rate": 7.597237742034938e-05, + "loss": 1.05302973, + "memory(GiB)": 85.12, + "step": 5510, + "train_speed(iter/s)": 0.035243 + }, + { + "acc": 0.68478012, + "epoch": 4.32, + "learning_rate": 7.59262242017805e-05, + "loss": 1.01622705, + "memory(GiB)": 85.12, + "step": 5515, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.67683172, + "epoch": 4.33, + "learning_rate": 7.588004074947556e-05, + "loss": 1.07921562, + "memory(GiB)": 85.12, + "step": 5520, + "train_speed(iter/s)": 0.035244 + }, + { + "acc": 0.67525272, + "epoch": 4.33, + "learning_rate": 7.583382711729114e-05, + "loss": 1.06174011, + "memory(GiB)": 85.12, + "step": 5525, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.69840279, + "epoch": 4.33, + "learning_rate": 7.578758335911901e-05, + "loss": 0.98411026, + "memory(GiB)": 85.12, + "step": 5530, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.68036718, + "epoch": 4.34, + "learning_rate": 7.574130952888614e-05, + "loss": 1.07546577, + "memory(GiB)": 85.12, + "step": 5535, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.67380986, + "epoch": 4.34, + "learning_rate": 7.569500568055448e-05, + "loss": 1.06016655, + "memory(GiB)": 85.12, + "step": 5540, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.67196646, + "epoch": 4.35, + "learning_rate": 7.564867186812105e-05, + "loss": 1.07078009, + "memory(GiB)": 85.12, + "step": 5545, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.68534956, + "epoch": 4.35, + "learning_rate": 7.560230814561781e-05, + "loss": 1.0424099, + "memory(GiB)": 85.12, + "step": 5550, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.67195516, + "epoch": 4.35, + "learning_rate": 7.555591456711157e-05, + "loss": 1.06724882, + "memory(GiB)": 85.12, + "step": 5555, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.68624582, + "epoch": 4.36, + "learning_rate": 7.550949118670395e-05, + "loss": 1.03637371, + "memory(GiB)": 85.12, + "step": 5560, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67780466, + "epoch": 4.36, + "learning_rate": 7.546303805853136e-05, + "loss": 1.0577466, + "memory(GiB)": 85.12, + "step": 5565, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.66886325, + "epoch": 4.37, + "learning_rate": 7.541655523676489e-05, + "loss": 1.08753948, + "memory(GiB)": 85.12, + "step": 5570, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.68775806, + "epoch": 4.37, + "learning_rate": 7.537004277561022e-05, + "loss": 1.02740803, + "memory(GiB)": 85.12, + "step": 5575, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67876277, + "epoch": 4.37, + "learning_rate": 7.532350072930764e-05, + "loss": 1.02870722, + "memory(GiB)": 85.12, + "step": 5580, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67490358, + "epoch": 4.38, + "learning_rate": 7.527692915213193e-05, + "loss": 1.07739782, + "memory(GiB)": 85.12, + "step": 5585, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67852893, + "epoch": 4.38, + "learning_rate": 7.52303280983923e-05, + "loss": 1.04966307, + "memory(GiB)": 85.12, + "step": 5590, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.68157201, + "epoch": 4.38, + "learning_rate": 7.518369762243232e-05, + "loss": 1.0376194, + "memory(GiB)": 85.12, + "step": 5595, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67769365, + "epoch": 4.39, + "learning_rate": 7.51370377786299e-05, + "loss": 1.05899029, + "memory(GiB)": 85.12, + "step": 5600, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.68378305, + "epoch": 4.39, + "learning_rate": 7.509034862139717e-05, + "loss": 1.06501207, + "memory(GiB)": 85.12, + "step": 5605, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.68093605, + "epoch": 4.4, + "learning_rate": 7.504363020518046e-05, + "loss": 1.05337543, + "memory(GiB)": 85.12, + "step": 5610, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.6819272, + "epoch": 4.4, + "learning_rate": 7.499688258446024e-05, + "loss": 1.03589249, + "memory(GiB)": 85.12, + "step": 5615, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.66478381, + "epoch": 4.4, + "learning_rate": 7.495010581375097e-05, + "loss": 1.11290102, + "memory(GiB)": 85.12, + "step": 5620, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.69163618, + "epoch": 4.41, + "learning_rate": 7.490329994760118e-05, + "loss": 0.99750299, + "memory(GiB)": 85.12, + "step": 5625, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.68824978, + "epoch": 4.41, + "learning_rate": 7.485646504059328e-05, + "loss": 1.0366888, + "memory(GiB)": 85.12, + "step": 5630, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.6706286, + "epoch": 4.42, + "learning_rate": 7.480960114734357e-05, + "loss": 1.09517231, + "memory(GiB)": 85.12, + "step": 5635, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.66850762, + "epoch": 4.42, + "learning_rate": 7.476270832250213e-05, + "loss": 1.09369583, + "memory(GiB)": 85.12, + "step": 5640, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.68807869, + "epoch": 4.42, + "learning_rate": 7.471578662075281e-05, + "loss": 1.04609451, + "memory(GiB)": 85.12, + "step": 5645, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67306752, + "epoch": 4.43, + "learning_rate": 7.46688360968131e-05, + "loss": 1.05134382, + "memory(GiB)": 85.12, + "step": 5650, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.68500929, + "epoch": 4.43, + "learning_rate": 7.462185680543413e-05, + "loss": 1.03070574, + "memory(GiB)": 85.12, + "step": 5655, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67167764, + "epoch": 4.44, + "learning_rate": 7.457484880140057e-05, + "loss": 1.08116226, + "memory(GiB)": 85.12, + "step": 5660, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67725725, + "epoch": 4.44, + "learning_rate": 7.452781213953054e-05, + "loss": 1.05323343, + "memory(GiB)": 85.12, + "step": 5665, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.68552766, + "epoch": 4.44, + "learning_rate": 7.448074687467564e-05, + "loss": 1.03019152, + "memory(GiB)": 85.12, + "step": 5670, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.67342439, + "epoch": 4.45, + "learning_rate": 7.443365306172076e-05, + "loss": 1.07240591, + "memory(GiB)": 85.12, + "step": 5675, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.676577, + "epoch": 4.45, + "learning_rate": 7.438653075558412e-05, + "loss": 1.0539855, + "memory(GiB)": 85.12, + "step": 5680, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67203903, + "epoch": 4.46, + "learning_rate": 7.433938001121719e-05, + "loss": 1.06724186, + "memory(GiB)": 85.12, + "step": 5685, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67773714, + "epoch": 4.46, + "learning_rate": 7.429220088360456e-05, + "loss": 1.08628159, + "memory(GiB)": 85.12, + "step": 5690, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.6735105, + "epoch": 4.46, + "learning_rate": 7.424499342776392e-05, + "loss": 1.08572884, + "memory(GiB)": 85.12, + "step": 5695, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.68572245, + "epoch": 4.47, + "learning_rate": 7.419775769874601e-05, + "loss": 1.02478113, + "memory(GiB)": 85.12, + "step": 5700, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67862587, + "epoch": 4.47, + "learning_rate": 7.415049375163455e-05, + "loss": 1.04869251, + "memory(GiB)": 85.12, + "step": 5705, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.67932053, + "epoch": 4.47, + "learning_rate": 7.410320164154614e-05, + "loss": 1.04899778, + "memory(GiB)": 85.12, + "step": 5710, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.6694593, + "epoch": 4.48, + "learning_rate": 7.405588142363026e-05, + "loss": 1.09497614, + "memory(GiB)": 85.12, + "step": 5715, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.69002652, + "epoch": 4.48, + "learning_rate": 7.40085331530691e-05, + "loss": 1.02802773, + "memory(GiB)": 85.12, + "step": 5720, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67512789, + "epoch": 4.49, + "learning_rate": 7.396115688507766e-05, + "loss": 1.0342535, + "memory(GiB)": 85.12, + "step": 5725, + "train_speed(iter/s)": 0.035245 + }, + { + "acc": 0.68088655, + "epoch": 4.49, + "learning_rate": 7.39137526749035e-05, + "loss": 1.05951233, + "memory(GiB)": 85.12, + "step": 5730, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.6625989, + "epoch": 4.49, + "learning_rate": 7.386632057782683e-05, + "loss": 1.0969574, + "memory(GiB)": 85.12, + "step": 5735, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.67329836, + "epoch": 4.5, + "learning_rate": 7.381886064916031e-05, + "loss": 1.09573812, + "memory(GiB)": 85.12, + "step": 5740, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.66656322, + "epoch": 4.5, + "learning_rate": 7.377137294424914e-05, + "loss": 1.07542658, + "memory(GiB)": 85.12, + "step": 5745, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.68572235, + "epoch": 4.51, + "learning_rate": 7.372385751847084e-05, + "loss": 1.01502714, + "memory(GiB)": 85.12, + "step": 5750, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67641573, + "epoch": 4.51, + "learning_rate": 7.367631442723531e-05, + "loss": 1.08796015, + "memory(GiB)": 85.12, + "step": 5755, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.66859589, + "epoch": 4.51, + "learning_rate": 7.362874372598465e-05, + "loss": 1.09382992, + "memory(GiB)": 85.12, + "step": 5760, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67875175, + "epoch": 4.52, + "learning_rate": 7.358114547019325e-05, + "loss": 1.03851318, + "memory(GiB)": 85.12, + "step": 5765, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.66146779, + "epoch": 4.52, + "learning_rate": 7.353351971536753e-05, + "loss": 1.13547573, + "memory(GiB)": 85.12, + "step": 5770, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.66437259, + "epoch": 4.53, + "learning_rate": 7.348586651704603e-05, + "loss": 1.09721699, + "memory(GiB)": 85.12, + "step": 5775, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.68509789, + "epoch": 4.53, + "learning_rate": 7.343818593079929e-05, + "loss": 1.04163866, + "memory(GiB)": 85.12, + "step": 5780, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.6822547, + "epoch": 4.53, + "learning_rate": 7.339047801222982e-05, + "loss": 1.0457943, + "memory(GiB)": 85.12, + "step": 5785, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67156363, + "epoch": 4.54, + "learning_rate": 7.334274281697193e-05, + "loss": 1.05314388, + "memory(GiB)": 85.12, + "step": 5790, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.68432736, + "epoch": 4.54, + "learning_rate": 7.329498040069179e-05, + "loss": 1.03443298, + "memory(GiB)": 85.12, + "step": 5795, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.69290547, + "epoch": 4.55, + "learning_rate": 7.324719081908731e-05, + "loss": 0.98860283, + "memory(GiB)": 85.12, + "step": 5800, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67324476, + "epoch": 4.55, + "learning_rate": 7.319937412788804e-05, + "loss": 1.0707695, + "memory(GiB)": 85.12, + "step": 5805, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67157531, + "epoch": 4.55, + "learning_rate": 7.315153038285522e-05, + "loss": 1.08696251, + "memory(GiB)": 85.12, + "step": 5810, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.69197111, + "epoch": 4.56, + "learning_rate": 7.310365963978157e-05, + "loss": 1.00116425, + "memory(GiB)": 85.12, + "step": 5815, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.67103248, + "epoch": 4.56, + "learning_rate": 7.305576195449131e-05, + "loss": 1.07649136, + "memory(GiB)": 85.12, + "step": 5820, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.68711181, + "epoch": 4.57, + "learning_rate": 7.30078373828401e-05, + "loss": 1.0179204, + "memory(GiB)": 85.12, + "step": 5825, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.6692019, + "epoch": 4.57, + "learning_rate": 7.29598859807149e-05, + "loss": 1.09381266, + "memory(GiB)": 85.12, + "step": 5830, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67294335, + "epoch": 4.57, + "learning_rate": 7.291190780403406e-05, + "loss": 1.07299709, + "memory(GiB)": 85.12, + "step": 5835, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.67547169, + "epoch": 4.58, + "learning_rate": 7.286390290874703e-05, + "loss": 1.057125, + "memory(GiB)": 85.12, + "step": 5840, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.67364817, + "epoch": 4.58, + "learning_rate": 7.281587135083452e-05, + "loss": 1.06245632, + "memory(GiB)": 85.12, + "step": 5845, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.67187681, + "epoch": 4.58, + "learning_rate": 7.276781318630826e-05, + "loss": 1.05665264, + "memory(GiB)": 85.12, + "step": 5850, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.66608486, + "epoch": 4.59, + "learning_rate": 7.271972847121108e-05, + "loss": 1.06528816, + "memory(GiB)": 85.12, + "step": 5855, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.66735435, + "epoch": 4.59, + "learning_rate": 7.267161726161668e-05, + "loss": 1.08798895, + "memory(GiB)": 85.12, + "step": 5860, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.68197312, + "epoch": 4.6, + "learning_rate": 7.262347961362972e-05, + "loss": 1.05005894, + "memory(GiB)": 85.12, + "step": 5865, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.69323926, + "epoch": 4.6, + "learning_rate": 7.257531558338569e-05, + "loss": 1.01712046, + "memory(GiB)": 85.12, + "step": 5870, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.67442312, + "epoch": 4.6, + "learning_rate": 7.252712522705082e-05, + "loss": 1.0723731, + "memory(GiB)": 85.12, + "step": 5875, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67650232, + "epoch": 4.61, + "learning_rate": 7.247890860082206e-05, + "loss": 1.06735249, + "memory(GiB)": 85.12, + "step": 5880, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.6892065, + "epoch": 4.61, + "learning_rate": 7.243066576092696e-05, + "loss": 1.02709103, + "memory(GiB)": 85.12, + "step": 5885, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67309418, + "epoch": 4.62, + "learning_rate": 7.238239676362372e-05, + "loss": 1.07167187, + "memory(GiB)": 85.12, + "step": 5890, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67252998, + "epoch": 4.62, + "learning_rate": 7.233410166520093e-05, + "loss": 1.0607296, + "memory(GiB)": 85.12, + "step": 5895, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.68240318, + "epoch": 4.62, + "learning_rate": 7.228578052197771e-05, + "loss": 1.04523249, + "memory(GiB)": 85.12, + "step": 5900, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.68594875, + "epoch": 4.63, + "learning_rate": 7.223743339030352e-05, + "loss": 1.04490318, + "memory(GiB)": 85.12, + "step": 5905, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.68417301, + "epoch": 4.63, + "learning_rate": 7.21890603265581e-05, + "loss": 1.02140636, + "memory(GiB)": 85.12, + "step": 5910, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.67098885, + "epoch": 4.64, + "learning_rate": 7.214066138715148e-05, + "loss": 1.07261381, + "memory(GiB)": 85.12, + "step": 5915, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.68442526, + "epoch": 4.64, + "learning_rate": 7.209223662852382e-05, + "loss": 1.02172146, + "memory(GiB)": 85.12, + "step": 5920, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.68503671, + "epoch": 4.64, + "learning_rate": 7.204378610714544e-05, + "loss": 1.03891659, + "memory(GiB)": 85.12, + "step": 5925, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.66351156, + "epoch": 4.65, + "learning_rate": 7.199530987951662e-05, + "loss": 1.09004173, + "memory(GiB)": 85.12, + "step": 5930, + "train_speed(iter/s)": 0.035246 + }, + { + "acc": 0.6784656, + "epoch": 4.65, + "learning_rate": 7.194680800216773e-05, + "loss": 1.04306164, + "memory(GiB)": 85.12, + "step": 5935, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.66837859, + "epoch": 4.66, + "learning_rate": 7.189828053165895e-05, + "loss": 1.09347191, + "memory(GiB)": 85.12, + "step": 5940, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.68225694, + "epoch": 4.66, + "learning_rate": 7.184972752458034e-05, + "loss": 1.01862364, + "memory(GiB)": 85.12, + "step": 5945, + "train_speed(iter/s)": 0.035247 + }, + { + "acc": 0.67672114, + "epoch": 4.66, + "learning_rate": 7.180114903755178e-05, + "loss": 1.05343723, + "memory(GiB)": 85.12, + "step": 5950, + "train_speed(iter/s)": 0.035248 + }, + { + "acc": 0.67013016, + "epoch": 4.67, + "learning_rate": 7.175254512722281e-05, + "loss": 1.09360905, + "memory(GiB)": 85.12, + "step": 5955, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.6726356, + "epoch": 4.67, + "learning_rate": 7.170391585027263e-05, + "loss": 1.06504726, + "memory(GiB)": 85.12, + "step": 5960, + "train_speed(iter/s)": 0.035249 + }, + { + "acc": 0.67619967, + "epoch": 4.67, + "learning_rate": 7.165526126341004e-05, + "loss": 1.07366638, + "memory(GiB)": 85.12, + "step": 5965, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.68048444, + "epoch": 4.68, + "learning_rate": 7.160658142337333e-05, + "loss": 1.0334444, + "memory(GiB)": 85.12, + "step": 5970, + "train_speed(iter/s)": 0.03525 + }, + { + "acc": 0.67715273, + "epoch": 4.68, + "learning_rate": 7.155787638693026e-05, + "loss": 1.05196743, + "memory(GiB)": 85.12, + "step": 5975, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.67561464, + "epoch": 4.69, + "learning_rate": 7.150914621087797e-05, + "loss": 1.04411011, + "memory(GiB)": 85.12, + "step": 5980, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.67420325, + "epoch": 4.69, + "learning_rate": 7.146039095204288e-05, + "loss": 1.05389662, + "memory(GiB)": 85.12, + "step": 5985, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.67851129, + "epoch": 4.69, + "learning_rate": 7.141161066728072e-05, + "loss": 1.06385975, + "memory(GiB)": 85.12, + "step": 5990, + "train_speed(iter/s)": 0.035251 + }, + { + "acc": 0.67380395, + "epoch": 4.7, + "learning_rate": 7.136280541347638e-05, + "loss": 1.0779253, + "memory(GiB)": 85.12, + "step": 5995, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.66957092, + "epoch": 4.7, + "learning_rate": 7.131397524754381e-05, + "loss": 1.09954481, + "memory(GiB)": 85.12, + "step": 6000, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.67146378, + "epoch": 4.71, + "learning_rate": 7.126512022642613e-05, + "loss": 1.06633472, + "memory(GiB)": 85.12, + "step": 6005, + "train_speed(iter/s)": 0.035252 + }, + { + "acc": 0.68832026, + "epoch": 4.71, + "learning_rate": 7.121624040709533e-05, + "loss": 1.00683203, + "memory(GiB)": 85.12, + "step": 6010, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.66239319, + "epoch": 4.71, + "learning_rate": 7.116733584655237e-05, + "loss": 1.10340586, + "memory(GiB)": 85.12, + "step": 6015, + "train_speed(iter/s)": 0.035253 + }, + { + "acc": 0.68091083, + "epoch": 4.72, + "learning_rate": 7.11184066018271e-05, + "loss": 1.04083786, + "memory(GiB)": 85.12, + "step": 6020, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.68418207, + "epoch": 4.72, + "learning_rate": 7.106945272997807e-05, + "loss": 1.03925686, + "memory(GiB)": 85.12, + "step": 6025, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.68394904, + "epoch": 4.73, + "learning_rate": 7.102047428809259e-05, + "loss": 1.03902893, + "memory(GiB)": 85.12, + "step": 6030, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.6748229, + "epoch": 4.73, + "learning_rate": 7.097147133328666e-05, + "loss": 1.05747595, + "memory(GiB)": 85.12, + "step": 6035, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.65920019, + "epoch": 4.73, + "learning_rate": 7.092244392270477e-05, + "loss": 1.11649303, + "memory(GiB)": 85.12, + "step": 6040, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.67655163, + "epoch": 4.74, + "learning_rate": 7.087339211352005e-05, + "loss": 1.05948544, + "memory(GiB)": 85.12, + "step": 6045, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.68559122, + "epoch": 4.74, + "learning_rate": 7.0824315962934e-05, + "loss": 1.01261806, + "memory(GiB)": 85.12, + "step": 6050, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.67522321, + "epoch": 4.75, + "learning_rate": 7.077521552817651e-05, + "loss": 1.06603298, + "memory(GiB)": 85.12, + "step": 6055, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.68662744, + "epoch": 4.75, + "learning_rate": 7.072609086650582e-05, + "loss": 1.03524733, + "memory(GiB)": 85.12, + "step": 6060, + "train_speed(iter/s)": 0.035254 + }, + { + "acc": 0.67083483, + "epoch": 4.75, + "learning_rate": 7.067694203520841e-05, + "loss": 1.07945566, + "memory(GiB)": 85.12, + "step": 6065, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.68091211, + "epoch": 4.76, + "learning_rate": 7.062776909159893e-05, + "loss": 1.07089872, + "memory(GiB)": 85.12, + "step": 6070, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.68336306, + "epoch": 4.76, + "learning_rate": 7.057857209302017e-05, + "loss": 1.03715401, + "memory(GiB)": 85.12, + "step": 6075, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.66016645, + "epoch": 4.76, + "learning_rate": 7.0529351096843e-05, + "loss": 1.12012835, + "memory(GiB)": 85.12, + "step": 6080, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.66435328, + "epoch": 4.77, + "learning_rate": 7.048010616046614e-05, + "loss": 1.10283918, + "memory(GiB)": 85.12, + "step": 6085, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.68103104, + "epoch": 4.77, + "learning_rate": 7.043083734131643e-05, + "loss": 1.06891689, + "memory(GiB)": 85.12, + "step": 6090, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.66451955, + "epoch": 4.78, + "learning_rate": 7.038154469684838e-05, + "loss": 1.10513678, + "memory(GiB)": 85.12, + "step": 6095, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.6662466, + "epoch": 4.78, + "learning_rate": 7.033222828454442e-05, + "loss": 1.07707005, + "memory(GiB)": 85.12, + "step": 6100, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.6689785, + "epoch": 4.78, + "learning_rate": 7.028288816191457e-05, + "loss": 1.07475443, + "memory(GiB)": 85.12, + "step": 6105, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.6744, + "epoch": 4.79, + "learning_rate": 7.023352438649662e-05, + "loss": 1.07835417, + "memory(GiB)": 85.12, + "step": 6110, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.6828012, + "epoch": 4.79, + "learning_rate": 7.018413701585587e-05, + "loss": 1.0427166, + "memory(GiB)": 85.12, + "step": 6115, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67784457, + "epoch": 4.8, + "learning_rate": 7.013472610758515e-05, + "loss": 1.03452606, + "memory(GiB)": 85.12, + "step": 6120, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.68209443, + "epoch": 4.8, + "learning_rate": 7.008529171930476e-05, + "loss": 1.05084362, + "memory(GiB)": 85.12, + "step": 6125, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.6858211, + "epoch": 4.8, + "learning_rate": 7.003583390866234e-05, + "loss": 1.00880671, + "memory(GiB)": 85.12, + "step": 6130, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67488031, + "epoch": 4.81, + "learning_rate": 6.998635273333289e-05, + "loss": 1.06708899, + "memory(GiB)": 85.12, + "step": 6135, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.68384433, + "epoch": 4.81, + "learning_rate": 6.99368482510186e-05, + "loss": 1.01454229, + "memory(GiB)": 85.12, + "step": 6140, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.68791485, + "epoch": 4.82, + "learning_rate": 6.98873205194489e-05, + "loss": 1.03954144, + "memory(GiB)": 85.12, + "step": 6145, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.67316933, + "epoch": 4.82, + "learning_rate": 6.983776959638032e-05, + "loss": 1.05018234, + "memory(GiB)": 85.12, + "step": 6150, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.6828548, + "epoch": 4.82, + "learning_rate": 6.97881955395964e-05, + "loss": 1.02742395, + "memory(GiB)": 85.12, + "step": 6155, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.67206783, + "epoch": 4.83, + "learning_rate": 6.973859840690766e-05, + "loss": 1.08918076, + "memory(GiB)": 85.12, + "step": 6160, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.67323599, + "epoch": 4.83, + "learning_rate": 6.968897825615158e-05, + "loss": 1.07985773, + "memory(GiB)": 85.12, + "step": 6165, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.67930789, + "epoch": 4.84, + "learning_rate": 6.963933514519243e-05, + "loss": 1.04254446, + "memory(GiB)": 85.12, + "step": 6170, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.69380798, + "epoch": 4.84, + "learning_rate": 6.958966913192127e-05, + "loss": 1.00592508, + "memory(GiB)": 85.12, + "step": 6175, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.67768106, + "epoch": 4.84, + "learning_rate": 6.95399802742559e-05, + "loss": 1.04340172, + "memory(GiB)": 85.12, + "step": 6180, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.66789145, + "epoch": 4.85, + "learning_rate": 6.94902686301407e-05, + "loss": 1.09427404, + "memory(GiB)": 85.12, + "step": 6185, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.65930495, + "epoch": 4.85, + "learning_rate": 6.944053425754668e-05, + "loss": 1.10919495, + "memory(GiB)": 85.12, + "step": 6190, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.68507056, + "epoch": 4.86, + "learning_rate": 6.939077721447129e-05, + "loss": 1.0415493, + "memory(GiB)": 85.12, + "step": 6195, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.67345014, + "epoch": 4.86, + "learning_rate": 6.93409975589385e-05, + "loss": 1.07625484, + "memory(GiB)": 85.12, + "step": 6200, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.66527452, + "epoch": 4.86, + "learning_rate": 6.92911953489986e-05, + "loss": 1.08373318, + "memory(GiB)": 85.12, + "step": 6205, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.68124013, + "epoch": 4.87, + "learning_rate": 6.924137064272815e-05, + "loss": 1.05780458, + "memory(GiB)": 85.12, + "step": 6210, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.67599878, + "epoch": 4.87, + "learning_rate": 6.919152349822999e-05, + "loss": 1.06544428, + "memory(GiB)": 85.12, + "step": 6215, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.67822771, + "epoch": 4.87, + "learning_rate": 6.914165397363318e-05, + "loss": 1.08813448, + "memory(GiB)": 85.12, + "step": 6220, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67499804, + "epoch": 4.88, + "learning_rate": 6.909176212709272e-05, + "loss": 1.04310112, + "memory(GiB)": 85.12, + "step": 6225, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.68530326, + "epoch": 4.88, + "learning_rate": 6.90418480167898e-05, + "loss": 1.02356024, + "memory(GiB)": 85.12, + "step": 6230, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67177353, + "epoch": 4.89, + "learning_rate": 6.899191170093148e-05, + "loss": 1.09755363, + "memory(GiB)": 85.12, + "step": 6235, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.68792162, + "epoch": 4.89, + "learning_rate": 6.894195323775078e-05, + "loss": 1.01962185, + "memory(GiB)": 85.12, + "step": 6240, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.68935556, + "epoch": 4.89, + "learning_rate": 6.889197268550648e-05, + "loss": 1.00933504, + "memory(GiB)": 85.12, + "step": 6245, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.67911396, + "epoch": 4.9, + "learning_rate": 6.884197010248314e-05, + "loss": 1.0349247, + "memory(GiB)": 85.12, + "step": 6250, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.67767315, + "epoch": 4.9, + "learning_rate": 6.879194554699106e-05, + "loss": 1.04797144, + "memory(GiB)": 85.12, + "step": 6255, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67949104, + "epoch": 4.91, + "learning_rate": 6.874189907736608e-05, + "loss": 1.0555562, + "memory(GiB)": 85.12, + "step": 6260, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.66246786, + "epoch": 4.91, + "learning_rate": 6.869183075196968e-05, + "loss": 1.08616772, + "memory(GiB)": 85.12, + "step": 6265, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67599144, + "epoch": 4.91, + "learning_rate": 6.864174062918875e-05, + "loss": 1.05308266, + "memory(GiB)": 85.12, + "step": 6270, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.68216491, + "epoch": 4.92, + "learning_rate": 6.859162876743565e-05, + "loss": 1.05794802, + "memory(GiB)": 85.12, + "step": 6275, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.68431473, + "epoch": 4.92, + "learning_rate": 6.85414952251481e-05, + "loss": 1.03531227, + "memory(GiB)": 85.12, + "step": 6280, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.66319323, + "epoch": 4.93, + "learning_rate": 6.849134006078904e-05, + "loss": 1.10098467, + "memory(GiB)": 85.12, + "step": 6285, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.66633544, + "epoch": 4.93, + "learning_rate": 6.84411633328467e-05, + "loss": 1.0953721, + "memory(GiB)": 85.12, + "step": 6290, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.69023266, + "epoch": 4.93, + "learning_rate": 6.839096509983436e-05, + "loss": 1.01781502, + "memory(GiB)": 85.12, + "step": 6295, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.68030787, + "epoch": 4.94, + "learning_rate": 6.83407454202905e-05, + "loss": 1.07228956, + "memory(GiB)": 85.12, + "step": 6300, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.66776667, + "epoch": 4.94, + "learning_rate": 6.82905043527785e-05, + "loss": 1.0892725, + "memory(GiB)": 85.12, + "step": 6305, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.68062611, + "epoch": 4.95, + "learning_rate": 6.824024195588677e-05, + "loss": 1.04044657, + "memory(GiB)": 85.12, + "step": 6310, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.6697052, + "epoch": 4.95, + "learning_rate": 6.818995828822852e-05, + "loss": 1.07682076, + "memory(GiB)": 85.12, + "step": 6315, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.67304358, + "epoch": 4.95, + "learning_rate": 6.813965340844183e-05, + "loss": 1.09571772, + "memory(GiB)": 85.12, + "step": 6320, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.68612056, + "epoch": 4.96, + "learning_rate": 6.808932737518944e-05, + "loss": 1.02444048, + "memory(GiB)": 85.12, + "step": 6325, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.67951617, + "epoch": 4.96, + "learning_rate": 6.803898024715884e-05, + "loss": 1.07699089, + "memory(GiB)": 85.12, + "step": 6330, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.66642966, + "epoch": 4.96, + "learning_rate": 6.798861208306204e-05, + "loss": 1.09788532, + "memory(GiB)": 85.12, + "step": 6335, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.68518648, + "epoch": 4.97, + "learning_rate": 6.793822294163565e-05, + "loss": 1.03690128, + "memory(GiB)": 85.12, + "step": 6340, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.68445034, + "epoch": 4.97, + "learning_rate": 6.788781288164072e-05, + "loss": 1.04183044, + "memory(GiB)": 85.12, + "step": 6345, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.67292199, + "epoch": 4.98, + "learning_rate": 6.783738196186267e-05, + "loss": 1.06181889, + "memory(GiB)": 85.12, + "step": 6350, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.66322756, + "epoch": 4.98, + "learning_rate": 6.778693024111128e-05, + "loss": 1.11033144, + "memory(GiB)": 85.12, + "step": 6355, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.6683476, + "epoch": 4.98, + "learning_rate": 6.773645777822054e-05, + "loss": 1.09244795, + "memory(GiB)": 85.12, + "step": 6360, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.67698259, + "epoch": 4.99, + "learning_rate": 6.76859646320487e-05, + "loss": 1.05961075, + "memory(GiB)": 85.12, + "step": 6365, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.69028482, + "epoch": 4.99, + "learning_rate": 6.763545086147806e-05, + "loss": 1.02525082, + "memory(GiB)": 85.12, + "step": 6370, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.67674527, + "epoch": 5.0, + "learning_rate": 6.758491652541499e-05, + "loss": 1.06389141, + "memory(GiB)": 85.12, + "step": 6375, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.66953359, + "epoch": 5.0, + "learning_rate": 6.75343616827899e-05, + "loss": 1.06004562, + "memory(GiB)": 85.12, + "step": 6380, + "train_speed(iter/s)": 0.035272 + }, + { + "epoch": 5.0, + "eval_acc": 0.6952166291009266, + "eval_loss": 0.9776778817176819, + "eval_runtime": 84.814, + "eval_samples_per_second": 1.097, + "eval_steps_per_second": 1.097, + "step": 6380 + }, + { + "acc": 0.66123757, + "epoch": 5.0, + "learning_rate": 6.748378639255701e-05, + "loss": 1.09309893, + "memory(GiB)": 85.12, + "step": 6385, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.6866303, + "epoch": 5.01, + "learning_rate": 6.74331907136945e-05, + "loss": 1.01490898, + "memory(GiB)": 85.12, + "step": 6390, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.68321543, + "epoch": 5.01, + "learning_rate": 6.73825747052042e-05, + "loss": 1.04613428, + "memory(GiB)": 85.12, + "step": 6395, + "train_speed(iter/s)": 0.035255 + }, + { + "acc": 0.69075665, + "epoch": 5.02, + "learning_rate": 6.733193842611176e-05, + "loss": 1.02780151, + "memory(GiB)": 85.12, + "step": 6400, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.69020853, + "epoch": 5.02, + "learning_rate": 6.72812819354664e-05, + "loss": 1.0208807, + "memory(GiB)": 85.12, + "step": 6405, + "train_speed(iter/s)": 0.035256 + }, + { + "acc": 0.67573576, + "epoch": 5.02, + "learning_rate": 6.723060529234095e-05, + "loss": 1.03973875, + "memory(GiB)": 85.12, + "step": 6410, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.67599573, + "epoch": 5.03, + "learning_rate": 6.717990855583171e-05, + "loss": 1.048526, + "memory(GiB)": 85.12, + "step": 6415, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.68013263, + "epoch": 5.03, + "learning_rate": 6.712919178505846e-05, + "loss": 1.04929171, + "memory(GiB)": 85.12, + "step": 6420, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.67604647, + "epoch": 5.04, + "learning_rate": 6.707845503916424e-05, + "loss": 1.02617359, + "memory(GiB)": 85.12, + "step": 6425, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.67967114, + "epoch": 5.04, + "learning_rate": 6.70276983773155e-05, + "loss": 1.05347996, + "memory(GiB)": 85.12, + "step": 6430, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.69580359, + "epoch": 5.04, + "learning_rate": 6.697692185870185e-05, + "loss": 1.0092021, + "memory(GiB)": 85.12, + "step": 6435, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.68660007, + "epoch": 5.05, + "learning_rate": 6.692612554253607e-05, + "loss": 1.00932379, + "memory(GiB)": 85.12, + "step": 6440, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67771325, + "epoch": 5.05, + "learning_rate": 6.687530948805404e-05, + "loss": 1.06307144, + "memory(GiB)": 85.12, + "step": 6445, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.68935843, + "epoch": 5.05, + "learning_rate": 6.682447375451463e-05, + "loss": 0.98286209, + "memory(GiB)": 85.12, + "step": 6450, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.68584781, + "epoch": 5.06, + "learning_rate": 6.67736184011997e-05, + "loss": 1.02812233, + "memory(GiB)": 85.12, + "step": 6455, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.68117771, + "epoch": 5.06, + "learning_rate": 6.672274348741396e-05, + "loss": 1.04238253, + "memory(GiB)": 85.12, + "step": 6460, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67923732, + "epoch": 5.07, + "learning_rate": 6.667184907248493e-05, + "loss": 1.03050461, + "memory(GiB)": 85.12, + "step": 6465, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.687995, + "epoch": 5.07, + "learning_rate": 6.662093521576285e-05, + "loss": 1.00678883, + "memory(GiB)": 85.12, + "step": 6470, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67301135, + "epoch": 5.07, + "learning_rate": 6.657000197662068e-05, + "loss": 1.06638184, + "memory(GiB)": 85.12, + "step": 6475, + "train_speed(iter/s)": 0.035257 + }, + { + "acc": 0.6931222, + "epoch": 5.08, + "learning_rate": 6.651904941445398e-05, + "loss": 1.01922817, + "memory(GiB)": 85.12, + "step": 6480, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.68098392, + "epoch": 5.08, + "learning_rate": 6.64680775886808e-05, + "loss": 1.04567719, + "memory(GiB)": 85.12, + "step": 6485, + "train_speed(iter/s)": 0.035258 + }, + { + "acc": 0.68202472, + "epoch": 5.09, + "learning_rate": 6.641708655874169e-05, + "loss": 1.04754171, + "memory(GiB)": 85.12, + "step": 6490, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67911615, + "epoch": 5.09, + "learning_rate": 6.636607638409956e-05, + "loss": 1.06853676, + "memory(GiB)": 85.12, + "step": 6495, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.68118539, + "epoch": 5.09, + "learning_rate": 6.63150471242397e-05, + "loss": 1.04338474, + "memory(GiB)": 85.12, + "step": 6500, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67814703, + "epoch": 5.1, + "learning_rate": 6.62639988386696e-05, + "loss": 1.02098122, + "memory(GiB)": 85.12, + "step": 6505, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.68116179, + "epoch": 5.1, + "learning_rate": 6.6212931586919e-05, + "loss": 1.04279861, + "memory(GiB)": 85.12, + "step": 6510, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.68639112, + "epoch": 5.11, + "learning_rate": 6.616184542853965e-05, + "loss": 1.00777941, + "memory(GiB)": 85.12, + "step": 6515, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.67839465, + "epoch": 5.11, + "learning_rate": 6.611074042310549e-05, + "loss": 1.03657656, + "memory(GiB)": 85.12, + "step": 6520, + "train_speed(iter/s)": 0.035259 + }, + { + "acc": 0.68946548, + "epoch": 5.11, + "learning_rate": 6.605961663021233e-05, + "loss": 1.01957436, + "memory(GiB)": 85.12, + "step": 6525, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.69059048, + "epoch": 5.12, + "learning_rate": 6.600847410947794e-05, + "loss": 1.01516457, + "memory(GiB)": 85.12, + "step": 6530, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.66432734, + "epoch": 5.12, + "learning_rate": 6.595731292054187e-05, + "loss": 1.08365002, + "memory(GiB)": 85.12, + "step": 6535, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.67369056, + "epoch": 5.13, + "learning_rate": 6.590613312306555e-05, + "loss": 1.0672245, + "memory(GiB)": 85.12, + "step": 6540, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.68721709, + "epoch": 5.13, + "learning_rate": 6.5854934776732e-05, + "loss": 1.00315237, + "memory(GiB)": 85.12, + "step": 6545, + "train_speed(iter/s)": 0.03526 + }, + { + "acc": 0.67401967, + "epoch": 5.13, + "learning_rate": 6.580371794124592e-05, + "loss": 1.04892883, + "memory(GiB)": 85.12, + "step": 6550, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.69203424, + "epoch": 5.14, + "learning_rate": 6.575248267633357e-05, + "loss": 1.01215448, + "memory(GiB)": 85.12, + "step": 6555, + "train_speed(iter/s)": 0.035261 + }, + { + "acc": 0.68317304, + "epoch": 5.14, + "learning_rate": 6.57012290417427e-05, + "loss": 1.0137826, + "memory(GiB)": 85.12, + "step": 6560, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.67414193, + "epoch": 5.14, + "learning_rate": 6.564995709724246e-05, + "loss": 1.05723057, + "memory(GiB)": 85.12, + "step": 6565, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.68139715, + "epoch": 5.15, + "learning_rate": 6.55986669026234e-05, + "loss": 1.02423239, + "memory(GiB)": 85.12, + "step": 6570, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67808838, + "epoch": 5.15, + "learning_rate": 6.554735851769729e-05, + "loss": 1.04498472, + "memory(GiB)": 85.12, + "step": 6575, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67794299, + "epoch": 5.16, + "learning_rate": 6.549603200229717e-05, + "loss": 1.06934061, + "memory(GiB)": 85.12, + "step": 6580, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67949061, + "epoch": 5.16, + "learning_rate": 6.54446874162772e-05, + "loss": 1.03536024, + "memory(GiB)": 85.12, + "step": 6585, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.68172569, + "epoch": 5.16, + "learning_rate": 6.539332481951261e-05, + "loss": 1.03228369, + "memory(GiB)": 85.12, + "step": 6590, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.67113338, + "epoch": 5.17, + "learning_rate": 6.534194427189961e-05, + "loss": 1.07480278, + "memory(GiB)": 85.12, + "step": 6595, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.68143373, + "epoch": 5.17, + "learning_rate": 6.529054583335538e-05, + "loss": 1.04739819, + "memory(GiB)": 85.12, + "step": 6600, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.68343229, + "epoch": 5.18, + "learning_rate": 6.523912956381797e-05, + "loss": 1.04386349, + "memory(GiB)": 85.12, + "step": 6605, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.68208308, + "epoch": 5.18, + "learning_rate": 6.518769552324619e-05, + "loss": 1.03945503, + "memory(GiB)": 85.12, + "step": 6610, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.68102374, + "epoch": 5.18, + "learning_rate": 6.513624377161957e-05, + "loss": 1.06787729, + "memory(GiB)": 85.12, + "step": 6615, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.68312049, + "epoch": 5.19, + "learning_rate": 6.508477436893835e-05, + "loss": 1.01988068, + "memory(GiB)": 85.12, + "step": 6620, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.68279829, + "epoch": 5.19, + "learning_rate": 6.503328737522327e-05, + "loss": 1.03501797, + "memory(GiB)": 85.12, + "step": 6625, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.68076296, + "epoch": 5.2, + "learning_rate": 6.498178285051567e-05, + "loss": 1.02725744, + "memory(GiB)": 85.12, + "step": 6630, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.70253773, + "epoch": 5.2, + "learning_rate": 6.493026085487725e-05, + "loss": 0.96801195, + "memory(GiB)": 85.12, + "step": 6635, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.69069552, + "epoch": 5.2, + "learning_rate": 6.487872144839018e-05, + "loss": 1.02977905, + "memory(GiB)": 85.12, + "step": 6640, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.68076792, + "epoch": 5.21, + "learning_rate": 6.482716469115685e-05, + "loss": 1.05776606, + "memory(GiB)": 85.12, + "step": 6645, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.68598046, + "epoch": 5.21, + "learning_rate": 6.47755906432999e-05, + "loss": 1.00461483, + "memory(GiB)": 85.12, + "step": 6650, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.67754989, + "epoch": 5.22, + "learning_rate": 6.472399936496219e-05, + "loss": 1.06693382, + "memory(GiB)": 85.12, + "step": 6655, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.703161, + "epoch": 5.22, + "learning_rate": 6.467239091630657e-05, + "loss": 0.96703682, + "memory(GiB)": 85.12, + "step": 6660, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.6778832, + "epoch": 5.22, + "learning_rate": 6.462076535751603e-05, + "loss": 1.05017014, + "memory(GiB)": 85.12, + "step": 6665, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.67984619, + "epoch": 5.23, + "learning_rate": 6.456912274879339e-05, + "loss": 1.02873812, + "memory(GiB)": 85.12, + "step": 6670, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.67118473, + "epoch": 5.23, + "learning_rate": 6.451746315036149e-05, + "loss": 1.05280457, + "memory(GiB)": 85.12, + "step": 6675, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.67446933, + "epoch": 5.24, + "learning_rate": 6.446578662246287e-05, + "loss": 1.04249983, + "memory(GiB)": 85.12, + "step": 6680, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.67916312, + "epoch": 5.24, + "learning_rate": 6.44140932253599e-05, + "loss": 1.04621458, + "memory(GiB)": 85.12, + "step": 6685, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.68936515, + "epoch": 5.24, + "learning_rate": 6.43623830193345e-05, + "loss": 0.99246674, + "memory(GiB)": 85.12, + "step": 6690, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.67838211, + "epoch": 5.25, + "learning_rate": 6.431065606468832e-05, + "loss": 1.06191158, + "memory(GiB)": 85.12, + "step": 6695, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.67258978, + "epoch": 5.25, + "learning_rate": 6.425891242174247e-05, + "loss": 1.08182993, + "memory(GiB)": 85.12, + "step": 6700, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67531633, + "epoch": 5.25, + "learning_rate": 6.420715215083755e-05, + "loss": 1.0620513, + "memory(GiB)": 85.12, + "step": 6705, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.68797979, + "epoch": 5.26, + "learning_rate": 6.415537531233354e-05, + "loss": 1.03437891, + "memory(GiB)": 85.12, + "step": 6710, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.68858671, + "epoch": 5.26, + "learning_rate": 6.410358196660972e-05, + "loss": 1.0127037, + "memory(GiB)": 85.12, + "step": 6715, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.68950157, + "epoch": 5.27, + "learning_rate": 6.405177217406467e-05, + "loss": 1.00921545, + "memory(GiB)": 85.12, + "step": 6720, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.69257259, + "epoch": 5.27, + "learning_rate": 6.399994599511607e-05, + "loss": 1.01077251, + "memory(GiB)": 85.12, + "step": 6725, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.68077116, + "epoch": 5.27, + "learning_rate": 6.394810349020083e-05, + "loss": 1.0404789, + "memory(GiB)": 85.12, + "step": 6730, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.68140783, + "epoch": 5.28, + "learning_rate": 6.389624471977476e-05, + "loss": 1.0449604, + "memory(GiB)": 85.12, + "step": 6735, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.69492321, + "epoch": 5.28, + "learning_rate": 6.384436974431274e-05, + "loss": 1.00443935, + "memory(GiB)": 85.12, + "step": 6740, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.67667093, + "epoch": 5.29, + "learning_rate": 6.379247862430851e-05, + "loss": 1.05137119, + "memory(GiB)": 85.12, + "step": 6745, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.68313727, + "epoch": 5.29, + "learning_rate": 6.374057142027463e-05, + "loss": 1.03881302, + "memory(GiB)": 85.12, + "step": 6750, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.68495326, + "epoch": 5.29, + "learning_rate": 6.368864819274243e-05, + "loss": 1.03522501, + "memory(GiB)": 85.12, + "step": 6755, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.68464856, + "epoch": 5.3, + "learning_rate": 6.363670900226191e-05, + "loss": 1.03497791, + "memory(GiB)": 85.12, + "step": 6760, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.68600035, + "epoch": 5.3, + "learning_rate": 6.358475390940172e-05, + "loss": 1.02388697, + "memory(GiB)": 85.12, + "step": 6765, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.69201202, + "epoch": 5.31, + "learning_rate": 6.3532782974749e-05, + "loss": 1.02907257, + "memory(GiB)": 85.12, + "step": 6770, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.67293754, + "epoch": 5.31, + "learning_rate": 6.348079625890943e-05, + "loss": 1.0643261, + "memory(GiB)": 85.12, + "step": 6775, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67401209, + "epoch": 5.31, + "learning_rate": 6.342879382250701e-05, + "loss": 1.0822813, + "memory(GiB)": 85.12, + "step": 6780, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.68836966, + "epoch": 5.32, + "learning_rate": 6.337677572618417e-05, + "loss": 1.03913021, + "memory(GiB)": 85.12, + "step": 6785, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.68064456, + "epoch": 5.32, + "learning_rate": 6.332474203060155e-05, + "loss": 1.02623987, + "memory(GiB)": 85.12, + "step": 6790, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.67679968, + "epoch": 5.33, + "learning_rate": 6.327269279643792e-05, + "loss": 1.06826124, + "memory(GiB)": 85.12, + "step": 6795, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.68307304, + "epoch": 5.33, + "learning_rate": 6.322062808439029e-05, + "loss": 1.02487335, + "memory(GiB)": 85.12, + "step": 6800, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.67592635, + "epoch": 5.33, + "learning_rate": 6.316854795517364e-05, + "loss": 1.07259035, + "memory(GiB)": 85.12, + "step": 6805, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.68332787, + "epoch": 5.34, + "learning_rate": 6.311645246952097e-05, + "loss": 1.00476055, + "memory(GiB)": 85.12, + "step": 6810, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67505136, + "epoch": 5.34, + "learning_rate": 6.306434168818315e-05, + "loss": 1.07261286, + "memory(GiB)": 85.12, + "step": 6815, + "train_speed(iter/s)": 0.035262 + }, + { + "acc": 0.68755107, + "epoch": 5.34, + "learning_rate": 6.301221567192892e-05, + "loss": 1.01972666, + "memory(GiB)": 85.12, + "step": 6820, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.688377, + "epoch": 5.35, + "learning_rate": 6.296007448154475e-05, + "loss": 1.04110975, + "memory(GiB)": 85.12, + "step": 6825, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.67587614, + "epoch": 5.35, + "learning_rate": 6.290791817783486e-05, + "loss": 1.05248299, + "memory(GiB)": 85.12, + "step": 6830, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.69134288, + "epoch": 5.36, + "learning_rate": 6.285574682162103e-05, + "loss": 1.00044346, + "memory(GiB)": 85.12, + "step": 6835, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67581382, + "epoch": 5.36, + "learning_rate": 6.280356047374264e-05, + "loss": 1.05379066, + "memory(GiB)": 85.12, + "step": 6840, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.68169432, + "epoch": 5.36, + "learning_rate": 6.275135919505655e-05, + "loss": 1.02964487, + "memory(GiB)": 85.12, + "step": 6845, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.68080144, + "epoch": 5.37, + "learning_rate": 6.269914304643698e-05, + "loss": 1.02860794, + "memory(GiB)": 85.12, + "step": 6850, + "train_speed(iter/s)": 0.035263 + }, + { + "acc": 0.68284755, + "epoch": 5.37, + "learning_rate": 6.264691208877558e-05, + "loss": 1.02458563, + "memory(GiB)": 85.12, + "step": 6855, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67072563, + "epoch": 5.38, + "learning_rate": 6.259466638298118e-05, + "loss": 1.07109718, + "memory(GiB)": 85.12, + "step": 6860, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.69171076, + "epoch": 5.38, + "learning_rate": 6.254240598997985e-05, + "loss": 1.0028264, + "memory(GiB)": 85.12, + "step": 6865, + "train_speed(iter/s)": 0.035264 + }, + { + "acc": 0.67135153, + "epoch": 5.38, + "learning_rate": 6.24901309707148e-05, + "loss": 1.08797283, + "memory(GiB)": 85.12, + "step": 6870, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.6757844, + "epoch": 5.39, + "learning_rate": 6.243784138614627e-05, + "loss": 1.04371614, + "memory(GiB)": 85.12, + "step": 6875, + "train_speed(iter/s)": 0.035265 + }, + { + "acc": 0.67799335, + "epoch": 5.39, + "learning_rate": 6.238553729725151e-05, + "loss": 1.02313776, + "memory(GiB)": 85.12, + "step": 6880, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.67846012, + "epoch": 5.4, + "learning_rate": 6.233321876502468e-05, + "loss": 1.04524956, + "memory(GiB)": 85.12, + "step": 6885, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.68262143, + "epoch": 5.4, + "learning_rate": 6.228088585047673e-05, + "loss": 1.02941513, + "memory(GiB)": 85.12, + "step": 6890, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.67653875, + "epoch": 5.4, + "learning_rate": 6.222853861463546e-05, + "loss": 1.07044706, + "memory(GiB)": 85.12, + "step": 6895, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.68358874, + "epoch": 5.41, + "learning_rate": 6.217617711854534e-05, + "loss": 1.0135643, + "memory(GiB)": 85.12, + "step": 6900, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.68347077, + "epoch": 5.41, + "learning_rate": 6.212380142326743e-05, + "loss": 1.06620531, + "memory(GiB)": 85.12, + "step": 6905, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.68109732, + "epoch": 5.42, + "learning_rate": 6.207141158987943e-05, + "loss": 1.0621707, + "memory(GiB)": 85.12, + "step": 6910, + "train_speed(iter/s)": 0.035266 + }, + { + "acc": 0.66551132, + "epoch": 5.42, + "learning_rate": 6.201900767947544e-05, + "loss": 1.09948444, + "memory(GiB)": 85.12, + "step": 6915, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.6836462, + "epoch": 5.42, + "learning_rate": 6.196658975316604e-05, + "loss": 1.04037647, + "memory(GiB)": 85.12, + "step": 6920, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.68927107, + "epoch": 5.43, + "learning_rate": 6.191415787207813e-05, + "loss": 1.01255808, + "memory(GiB)": 85.12, + "step": 6925, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.67622333, + "epoch": 5.43, + "learning_rate": 6.186171209735489e-05, + "loss": 1.06019592, + "memory(GiB)": 85.12, + "step": 6930, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.68418527, + "epoch": 5.43, + "learning_rate": 6.180925249015566e-05, + "loss": 1.03336248, + "memory(GiB)": 85.12, + "step": 6935, + "train_speed(iter/s)": 0.035267 + }, + { + "acc": 0.68626051, + "epoch": 5.44, + "learning_rate": 6.175677911165599e-05, + "loss": 1.03925867, + "memory(GiB)": 85.12, + "step": 6940, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.68999286, + "epoch": 5.44, + "learning_rate": 6.170429202304744e-05, + "loss": 1.00964413, + "memory(GiB)": 85.12, + "step": 6945, + "train_speed(iter/s)": 0.035268 + }, + { + "acc": 0.68267813, + "epoch": 5.45, + "learning_rate": 6.165179128553754e-05, + "loss": 1.02514906, + "memory(GiB)": 85.12, + "step": 6950, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.67859344, + "epoch": 5.45, + "learning_rate": 6.15992769603498e-05, + "loss": 1.08015528, + "memory(GiB)": 85.12, + "step": 6955, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.67712188, + "epoch": 5.45, + "learning_rate": 6.15467491087235e-05, + "loss": 1.05515985, + "memory(GiB)": 85.12, + "step": 6960, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.68022337, + "epoch": 5.46, + "learning_rate": 6.149420779191373e-05, + "loss": 1.05591021, + "memory(GiB)": 85.12, + "step": 6965, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.6749332, + "epoch": 5.46, + "learning_rate": 6.144165307119129e-05, + "loss": 1.07103643, + "memory(GiB)": 85.12, + "step": 6970, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.67788014, + "epoch": 5.47, + "learning_rate": 6.138908500784265e-05, + "loss": 1.05876656, + "memory(GiB)": 85.12, + "step": 6975, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.67351999, + "epoch": 5.47, + "learning_rate": 6.133650366316972e-05, + "loss": 1.08339548, + "memory(GiB)": 85.12, + "step": 6980, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.67984676, + "epoch": 5.47, + "learning_rate": 6.128390909849004e-05, + "loss": 1.04141293, + "memory(GiB)": 85.12, + "step": 6985, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.68225818, + "epoch": 5.48, + "learning_rate": 6.123130137513642e-05, + "loss": 1.01653795, + "memory(GiB)": 85.12, + "step": 6990, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.67328138, + "epoch": 5.48, + "learning_rate": 6.117868055445715e-05, + "loss": 1.04721413, + "memory(GiB)": 85.12, + "step": 6995, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.67902741, + "epoch": 5.49, + "learning_rate": 6.112604669781572e-05, + "loss": 1.03500223, + "memory(GiB)": 85.12, + "step": 7000, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.68537979, + "epoch": 5.49, + "learning_rate": 6.107339986659084e-05, + "loss": 1.05380325, + "memory(GiB)": 85.12, + "step": 7005, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.6760211, + "epoch": 5.49, + "learning_rate": 6.1020740122176343e-05, + "loss": 1.06433525, + "memory(GiB)": 85.12, + "step": 7010, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.67585163, + "epoch": 5.5, + "learning_rate": 6.096806752598112e-05, + "loss": 1.08780317, + "memory(GiB)": 85.12, + "step": 7015, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.67884717, + "epoch": 5.5, + "learning_rate": 6.091538213942908e-05, + "loss": 1.03859797, + "memory(GiB)": 85.12, + "step": 7020, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.6934917, + "epoch": 5.51, + "learning_rate": 6.086268402395898e-05, + "loss": 1.00850601, + "memory(GiB)": 85.12, + "step": 7025, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.70368981, + "epoch": 5.51, + "learning_rate": 6.080997324102449e-05, + "loss": 0.97637157, + "memory(GiB)": 85.12, + "step": 7030, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.69735985, + "epoch": 5.51, + "learning_rate": 6.0757249852094026e-05, + "loss": 0.98540497, + "memory(GiB)": 85.12, + "step": 7035, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.67899513, + "epoch": 5.52, + "learning_rate": 6.07045139186507e-05, + "loss": 1.01754208, + "memory(GiB)": 85.12, + "step": 7040, + "train_speed(iter/s)": 0.035273 + }, + { + "acc": 0.68062968, + "epoch": 5.52, + "learning_rate": 6.065176550219226e-05, + "loss": 1.04695129, + "memory(GiB)": 85.12, + "step": 7045, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.67595897, + "epoch": 5.53, + "learning_rate": 6.0599004664230984e-05, + "loss": 1.05315809, + "memory(GiB)": 85.12, + "step": 7050, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.67158957, + "epoch": 5.53, + "learning_rate": 6.054623146629368e-05, + "loss": 1.07019091, + "memory(GiB)": 85.12, + "step": 7055, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.68817263, + "epoch": 5.53, + "learning_rate": 6.049344596992153e-05, + "loss": 1.00896044, + "memory(GiB)": 85.12, + "step": 7060, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.67556105, + "epoch": 5.54, + "learning_rate": 6.04406482366701e-05, + "loss": 1.09273539, + "memory(GiB)": 85.12, + "step": 7065, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.69138508, + "epoch": 5.54, + "learning_rate": 6.038783832810918e-05, + "loss": 0.99918337, + "memory(GiB)": 85.12, + "step": 7070, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.67930202, + "epoch": 5.54, + "learning_rate": 6.03350163058228e-05, + "loss": 1.04020901, + "memory(GiB)": 85.12, + "step": 7075, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.67458458, + "epoch": 5.55, + "learning_rate": 6.028218223140908e-05, + "loss": 1.06615458, + "memory(GiB)": 85.12, + "step": 7080, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.67609491, + "epoch": 5.55, + "learning_rate": 6.022933616648021e-05, + "loss": 1.06678152, + "memory(GiB)": 85.12, + "step": 7085, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.67319765, + "epoch": 5.56, + "learning_rate": 6.017647817266236e-05, + "loss": 1.06721945, + "memory(GiB)": 85.12, + "step": 7090, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.67800503, + "epoch": 5.56, + "learning_rate": 6.012360831159565e-05, + "loss": 1.04036112, + "memory(GiB)": 85.12, + "step": 7095, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.6719317, + "epoch": 5.56, + "learning_rate": 6.007072664493395e-05, + "loss": 1.07102108, + "memory(GiB)": 85.12, + "step": 7100, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.67592273, + "epoch": 5.57, + "learning_rate": 6.0017833234344963e-05, + "loss": 1.0262372, + "memory(GiB)": 85.12, + "step": 7105, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.68355751, + "epoch": 5.57, + "learning_rate": 5.996492814151011e-05, + "loss": 1.02106323, + "memory(GiB)": 85.12, + "step": 7110, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.68380432, + "epoch": 5.58, + "learning_rate": 5.991201142812436e-05, + "loss": 1.03930197, + "memory(GiB)": 85.12, + "step": 7115, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.67916827, + "epoch": 5.58, + "learning_rate": 5.98590831558963e-05, + "loss": 1.03912115, + "memory(GiB)": 85.12, + "step": 7120, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.68918715, + "epoch": 5.58, + "learning_rate": 5.980614338654794e-05, + "loss": 1.01146679, + "memory(GiB)": 85.12, + "step": 7125, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.69327388, + "epoch": 5.59, + "learning_rate": 5.975319218181474e-05, + "loss": 1.01032009, + "memory(GiB)": 85.12, + "step": 7130, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.66974945, + "epoch": 5.59, + "learning_rate": 5.970022960344549e-05, + "loss": 1.06768141, + "memory(GiB)": 85.12, + "step": 7135, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.67780514, + "epoch": 5.6, + "learning_rate": 5.9647255713202234e-05, + "loss": 1.064604, + "memory(GiB)": 85.12, + "step": 7140, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.67822738, + "epoch": 5.6, + "learning_rate": 5.959427057286019e-05, + "loss": 1.04480143, + "memory(GiB)": 85.12, + "step": 7145, + "train_speed(iter/s)": 0.035269 + }, + { + "acc": 0.68090706, + "epoch": 5.6, + "learning_rate": 5.954127424420773e-05, + "loss": 1.03140087, + "memory(GiB)": 85.12, + "step": 7150, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.68836522, + "epoch": 5.61, + "learning_rate": 5.9488266789046255e-05, + "loss": 1.02142658, + "memory(GiB)": 85.12, + "step": 7155, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.67817101, + "epoch": 5.61, + "learning_rate": 5.943524826919013e-05, + "loss": 1.04541025, + "memory(GiB)": 85.12, + "step": 7160, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.67876797, + "epoch": 5.62, + "learning_rate": 5.9382218746466634e-05, + "loss": 1.07195463, + "memory(GiB)": 85.12, + "step": 7165, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.67407222, + "epoch": 5.62, + "learning_rate": 5.93291782827159e-05, + "loss": 1.05578661, + "memory(GiB)": 85.12, + "step": 7170, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.67716932, + "epoch": 5.62, + "learning_rate": 5.927612693979079e-05, + "loss": 1.0597784, + "memory(GiB)": 85.12, + "step": 7175, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.68023987, + "epoch": 5.63, + "learning_rate": 5.9223064779556846e-05, + "loss": 1.0568635, + "memory(GiB)": 85.12, + "step": 7180, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.67762904, + "epoch": 5.63, + "learning_rate": 5.916999186389227e-05, + "loss": 1.05888271, + "memory(GiB)": 85.12, + "step": 7185, + "train_speed(iter/s)": 0.035273 + }, + { + "acc": 0.68073959, + "epoch": 5.63, + "learning_rate": 5.911690825468774e-05, + "loss": 1.01983681, + "memory(GiB)": 85.12, + "step": 7190, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.68627586, + "epoch": 5.64, + "learning_rate": 5.9063814013846475e-05, + "loss": 1.00290499, + "memory(GiB)": 85.12, + "step": 7195, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.68013487, + "epoch": 5.64, + "learning_rate": 5.901070920328402e-05, + "loss": 1.06031981, + "memory(GiB)": 85.12, + "step": 7200, + "train_speed(iter/s)": 0.03527 + }, + { + "acc": 0.68101711, + "epoch": 5.65, + "learning_rate": 5.89575938849283e-05, + "loss": 1.05255985, + "memory(GiB)": 85.12, + "step": 7205, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.68656335, + "epoch": 5.65, + "learning_rate": 5.8904468120719506e-05, + "loss": 1.04291906, + "memory(GiB)": 85.12, + "step": 7210, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.6808826, + "epoch": 5.65, + "learning_rate": 5.885133197260993e-05, + "loss": 1.05651436, + "memory(GiB)": 85.12, + "step": 7215, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.68361244, + "epoch": 5.66, + "learning_rate": 5.879818550256405e-05, + "loss": 1.05124416, + "memory(GiB)": 85.12, + "step": 7220, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.68785329, + "epoch": 5.66, + "learning_rate": 5.874502877255835e-05, + "loss": 1.00955372, + "memory(GiB)": 85.12, + "step": 7225, + "train_speed(iter/s)": 0.035271 + }, + { + "acc": 0.67910919, + "epoch": 5.67, + "learning_rate": 5.8691861844581295e-05, + "loss": 1.06434069, + "memory(GiB)": 85.12, + "step": 7230, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.68168969, + "epoch": 5.67, + "learning_rate": 5.8638684780633216e-05, + "loss": 1.03852062, + "memory(GiB)": 85.12, + "step": 7235, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.69188733, + "epoch": 5.67, + "learning_rate": 5.858549764272629e-05, + "loss": 1.01275368, + "memory(GiB)": 85.12, + "step": 7240, + "train_speed(iter/s)": 0.035272 + }, + { + "acc": 0.68451958, + "epoch": 5.68, + "learning_rate": 5.853230049288443e-05, + "loss": 1.02619667, + "memory(GiB)": 85.12, + "step": 7245, + "train_speed(iter/s)": 0.035273 + }, + { + "acc": 0.67939177, + "epoch": 5.68, + "learning_rate": 5.847909339314322e-05, + "loss": 1.04042921, + "memory(GiB)": 85.12, + "step": 7250, + "train_speed(iter/s)": 0.035273 + }, + { + "acc": 0.66912894, + "epoch": 5.69, + "learning_rate": 5.842587640554986e-05, + "loss": 1.08952456, + "memory(GiB)": 85.12, + "step": 7255, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.68664956, + "epoch": 5.69, + "learning_rate": 5.8372649592163056e-05, + "loss": 1.02034445, + "memory(GiB)": 85.12, + "step": 7260, + "train_speed(iter/s)": 0.035273 + }, + { + "acc": 0.68716116, + "epoch": 5.69, + "learning_rate": 5.8319413015052993e-05, + "loss": 1.017033, + "memory(GiB)": 85.12, + "step": 7265, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.66364636, + "epoch": 5.7, + "learning_rate": 5.826616673630125e-05, + "loss": 1.09431334, + "memory(GiB)": 85.12, + "step": 7270, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.67621994, + "epoch": 5.7, + "learning_rate": 5.821291081800071e-05, + "loss": 1.07470884, + "memory(GiB)": 85.12, + "step": 7275, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.6812994, + "epoch": 5.71, + "learning_rate": 5.8159645322255475e-05, + "loss": 1.07116871, + "memory(GiB)": 85.12, + "step": 7280, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.684375, + "epoch": 5.71, + "learning_rate": 5.810637031118086e-05, + "loss": 1.02568693, + "memory(GiB)": 85.12, + "step": 7285, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.68658953, + "epoch": 5.71, + "learning_rate": 5.805308584690321e-05, + "loss": 1.04678984, + "memory(GiB)": 85.12, + "step": 7290, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.69045858, + "epoch": 5.72, + "learning_rate": 5.799979199155998e-05, + "loss": 1.01404943, + "memory(GiB)": 85.12, + "step": 7295, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.68868189, + "epoch": 5.72, + "learning_rate": 5.794648880729952e-05, + "loss": 1.01892052, + "memory(GiB)": 85.12, + "step": 7300, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.68861918, + "epoch": 5.72, + "learning_rate": 5.7893176356281056e-05, + "loss": 1.0252737, + "memory(GiB)": 85.12, + "step": 7305, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.68878713, + "epoch": 5.73, + "learning_rate": 5.7839854700674655e-05, + "loss": 1.01528711, + "memory(GiB)": 85.12, + "step": 7310, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.67168713, + "epoch": 5.73, + "learning_rate": 5.778652390266107e-05, + "loss": 1.0582777, + "memory(GiB)": 85.12, + "step": 7315, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.68114805, + "epoch": 5.74, + "learning_rate": 5.773318402443177e-05, + "loss": 1.05835133, + "memory(GiB)": 85.12, + "step": 7320, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.67773662, + "epoch": 5.74, + "learning_rate": 5.767983512818877e-05, + "loss": 1.04364738, + "memory(GiB)": 85.12, + "step": 7325, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.68714108, + "epoch": 5.74, + "learning_rate": 5.762647727614462e-05, + "loss": 1.0286314, + "memory(GiB)": 85.12, + "step": 7330, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.69304905, + "epoch": 5.75, + "learning_rate": 5.757311053052232e-05, + "loss": 1.0080122, + "memory(GiB)": 85.12, + "step": 7335, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.68668628, + "epoch": 5.75, + "learning_rate": 5.7519734953555225e-05, + "loss": 1.04978542, + "memory(GiB)": 85.12, + "step": 7340, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.68079734, + "epoch": 5.76, + "learning_rate": 5.7466350607486994e-05, + "loss": 1.03145504, + "memory(GiB)": 85.12, + "step": 7345, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.6759232, + "epoch": 5.76, + "learning_rate": 5.7412957554571535e-05, + "loss": 1.04577227, + "memory(GiB)": 85.12, + "step": 7350, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.67788, + "epoch": 5.76, + "learning_rate": 5.7359555857072865e-05, + "loss": 1.01738691, + "memory(GiB)": 85.12, + "step": 7355, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.67940884, + "epoch": 5.77, + "learning_rate": 5.730614557726509e-05, + "loss": 1.0438838, + "memory(GiB)": 85.12, + "step": 7360, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.67440863, + "epoch": 5.77, + "learning_rate": 5.725272677743238e-05, + "loss": 1.04039993, + "memory(GiB)": 85.12, + "step": 7365, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.66977754, + "epoch": 5.78, + "learning_rate": 5.719929951986875e-05, + "loss": 1.0763092, + "memory(GiB)": 85.12, + "step": 7370, + "train_speed(iter/s)": 0.035278 + }, + { + "acc": 0.68106508, + "epoch": 5.78, + "learning_rate": 5.71458638668782e-05, + "loss": 1.05014811, + "memory(GiB)": 85.12, + "step": 7375, + "train_speed(iter/s)": 0.035279 + }, + { + "acc": 0.68356209, + "epoch": 5.78, + "learning_rate": 5.7092419880774384e-05, + "loss": 1.04596844, + "memory(GiB)": 85.12, + "step": 7380, + "train_speed(iter/s)": 0.035279 + }, + { + "acc": 0.67028294, + "epoch": 5.79, + "learning_rate": 5.7038967623880766e-05, + "loss": 1.07842445, + "memory(GiB)": 85.12, + "step": 7385, + "train_speed(iter/s)": 0.03528 + }, + { + "acc": 0.69012942, + "epoch": 5.79, + "learning_rate": 5.698550715853041e-05, + "loss": 1.01788101, + "memory(GiB)": 85.12, + "step": 7390, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.68352227, + "epoch": 5.8, + "learning_rate": 5.6932038547065994e-05, + "loss": 1.02470961, + "memory(GiB)": 85.12, + "step": 7395, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.68133144, + "epoch": 5.8, + "learning_rate": 5.687856185183964e-05, + "loss": 1.00813093, + "memory(GiB)": 85.12, + "step": 7400, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.67967892, + "epoch": 5.8, + "learning_rate": 5.682507713521297e-05, + "loss": 1.04251871, + "memory(GiB)": 85.12, + "step": 7405, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.67965193, + "epoch": 5.81, + "learning_rate": 5.677158445955688e-05, + "loss": 1.04245062, + "memory(GiB)": 85.12, + "step": 7410, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.68244319, + "epoch": 5.81, + "learning_rate": 5.6718083887251585e-05, + "loss": 1.04257526, + "memory(GiB)": 85.12, + "step": 7415, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.67110071, + "epoch": 5.82, + "learning_rate": 5.666457548068653e-05, + "loss": 1.04886589, + "memory(GiB)": 85.12, + "step": 7420, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.68169503, + "epoch": 5.82, + "learning_rate": 5.661105930226027e-05, + "loss": 1.03461304, + "memory(GiB)": 85.12, + "step": 7425, + "train_speed(iter/s)": 0.035273 + }, + { + "acc": 0.68312588, + "epoch": 5.82, + "learning_rate": 5.65575354143804e-05, + "loss": 1.03809738, + "memory(GiB)": 85.12, + "step": 7430, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.67332888, + "epoch": 5.83, + "learning_rate": 5.650400387946358e-05, + "loss": 1.04974604, + "memory(GiB)": 85.12, + "step": 7435, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.67180338, + "epoch": 5.83, + "learning_rate": 5.6450464759935306e-05, + "loss": 1.07695293, + "memory(GiB)": 85.12, + "step": 7440, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.67879071, + "epoch": 5.83, + "learning_rate": 5.6396918118229954e-05, + "loss": 1.07525129, + "memory(GiB)": 85.12, + "step": 7445, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.69320011, + "epoch": 5.84, + "learning_rate": 5.63433640167907e-05, + "loss": 0.99239464, + "memory(GiB)": 85.12, + "step": 7450, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.68558059, + "epoch": 5.84, + "learning_rate": 5.628980251806937e-05, + "loss": 1.02958326, + "memory(GiB)": 85.12, + "step": 7455, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.68014541, + "epoch": 5.85, + "learning_rate": 5.6236233684526416e-05, + "loss": 1.01991968, + "memory(GiB)": 85.12, + "step": 7460, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.68883257, + "epoch": 5.85, + "learning_rate": 5.6182657578630896e-05, + "loss": 1.00945797, + "memory(GiB)": 85.12, + "step": 7465, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.68561568, + "epoch": 5.85, + "learning_rate": 5.6129074262860304e-05, + "loss": 1.03655548, + "memory(GiB)": 85.12, + "step": 7470, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.68775282, + "epoch": 5.86, + "learning_rate": 5.607548379970056e-05, + "loss": 1.00257397, + "memory(GiB)": 85.12, + "step": 7475, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.68747077, + "epoch": 5.86, + "learning_rate": 5.602188625164591e-05, + "loss": 1.02046089, + "memory(GiB)": 85.12, + "step": 7480, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.67813239, + "epoch": 5.87, + "learning_rate": 5.5968281681198864e-05, + "loss": 1.02797394, + "memory(GiB)": 85.12, + "step": 7485, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.68199043, + "epoch": 5.87, + "learning_rate": 5.591467015087012e-05, + "loss": 1.03519802, + "memory(GiB)": 85.12, + "step": 7490, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.67812767, + "epoch": 5.87, + "learning_rate": 5.5861051723178494e-05, + "loss": 1.052808, + "memory(GiB)": 85.12, + "step": 7495, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.68303499, + "epoch": 5.88, + "learning_rate": 5.580742646065085e-05, + "loss": 1.02687483, + "memory(GiB)": 85.12, + "step": 7500, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.67364564, + "epoch": 5.88, + "learning_rate": 5.575379442582203e-05, + "loss": 1.05254545, + "memory(GiB)": 85.12, + "step": 7505, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.67409277, + "epoch": 5.89, + "learning_rate": 5.570015568123475e-05, + "loss": 1.04328775, + "memory(GiB)": 85.12, + "step": 7510, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.684273, + "epoch": 5.89, + "learning_rate": 5.564651028943956e-05, + "loss": 1.04421234, + "memory(GiB)": 85.12, + "step": 7515, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.68276038, + "epoch": 5.89, + "learning_rate": 5.559285831299477e-05, + "loss": 1.05293627, + "memory(GiB)": 85.12, + "step": 7520, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.68299809, + "epoch": 5.9, + "learning_rate": 5.553919981446635e-05, + "loss": 1.0334218, + "memory(GiB)": 85.12, + "step": 7525, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.68169241, + "epoch": 5.9, + "learning_rate": 5.548553485642789e-05, + "loss": 1.03117504, + "memory(GiB)": 85.12, + "step": 7530, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.69697251, + "epoch": 5.91, + "learning_rate": 5.543186350146053e-05, + "loss": 0.98402576, + "memory(GiB)": 85.12, + "step": 7535, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.68771691, + "epoch": 5.91, + "learning_rate": 5.537818581215285e-05, + "loss": 1.01845856, + "memory(GiB)": 85.12, + "step": 7540, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.67708225, + "epoch": 5.91, + "learning_rate": 5.53245018511008e-05, + "loss": 1.03625803, + "memory(GiB)": 85.12, + "step": 7545, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.68522434, + "epoch": 5.92, + "learning_rate": 5.527081168090767e-05, + "loss": 1.0206852, + "memory(GiB)": 85.12, + "step": 7550, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.69737639, + "epoch": 5.92, + "learning_rate": 5.521711536418398e-05, + "loss": 0.96795731, + "memory(GiB)": 85.12, + "step": 7555, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.68795233, + "epoch": 5.92, + "learning_rate": 5.5163412963547425e-05, + "loss": 1.00420456, + "memory(GiB)": 85.12, + "step": 7560, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.67716355, + "epoch": 5.93, + "learning_rate": 5.5109704541622787e-05, + "loss": 1.05512428, + "memory(GiB)": 85.12, + "step": 7565, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.69076524, + "epoch": 5.93, + "learning_rate": 5.505599016104187e-05, + "loss": 0.99612122, + "memory(GiB)": 85.12, + "step": 7570, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.66488171, + "epoch": 5.94, + "learning_rate": 5.5002269884443433e-05, + "loss": 1.08279037, + "memory(GiB)": 85.12, + "step": 7575, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.68213077, + "epoch": 5.94, + "learning_rate": 5.4948543774473105e-05, + "loss": 1.0349185, + "memory(GiB)": 85.12, + "step": 7580, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.67493806, + "epoch": 5.94, + "learning_rate": 5.4894811893783316e-05, + "loss": 1.06746645, + "memory(GiB)": 85.12, + "step": 7585, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.67195811, + "epoch": 5.95, + "learning_rate": 5.484107430503322e-05, + "loss": 1.07974176, + "memory(GiB)": 85.12, + "step": 7590, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.68496284, + "epoch": 5.95, + "learning_rate": 5.4787331070888656e-05, + "loss": 1.03015051, + "memory(GiB)": 85.12, + "step": 7595, + "train_speed(iter/s)": 0.035277 + }, + { + "acc": 0.67029195, + "epoch": 5.96, + "learning_rate": 5.473358225402202e-05, + "loss": 1.07754288, + "memory(GiB)": 85.12, + "step": 7600, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.67144132, + "epoch": 5.96, + "learning_rate": 5.467982791711224e-05, + "loss": 1.04674873, + "memory(GiB)": 85.12, + "step": 7605, + "train_speed(iter/s)": 0.035276 + }, + { + "acc": 0.69102864, + "epoch": 5.96, + "learning_rate": 5.4626068122844634e-05, + "loss": 1.03087606, + "memory(GiB)": 85.12, + "step": 7610, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.68264012, + "epoch": 5.97, + "learning_rate": 5.4572302933910926e-05, + "loss": 1.03752575, + "memory(GiB)": 85.12, + "step": 7615, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.68130126, + "epoch": 5.97, + "learning_rate": 5.451853241300913e-05, + "loss": 1.01153135, + "memory(GiB)": 85.12, + "step": 7620, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.66839213, + "epoch": 5.98, + "learning_rate": 5.446475662284346e-05, + "loss": 1.09095182, + "memory(GiB)": 85.12, + "step": 7625, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.67351193, + "epoch": 5.98, + "learning_rate": 5.4410975626124284e-05, + "loss": 1.07571201, + "memory(GiB)": 85.12, + "step": 7630, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.68798418, + "epoch": 5.98, + "learning_rate": 5.435718948556804e-05, + "loss": 1.00712776, + "memory(GiB)": 85.12, + "step": 7635, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.70076885, + "epoch": 5.99, + "learning_rate": 5.430339826389719e-05, + "loss": 0.9588829, + "memory(GiB)": 85.12, + "step": 7640, + "train_speed(iter/s)": 0.035274 + }, + { + "acc": 0.67537951, + "epoch": 5.99, + "learning_rate": 5.424960202384006e-05, + "loss": 1.05870562, + "memory(GiB)": 85.12, + "step": 7645, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.69180627, + "epoch": 6.0, + "learning_rate": 5.419580082813089e-05, + "loss": 1.00422449, + "memory(GiB)": 85.12, + "step": 7650, + "train_speed(iter/s)": 0.035275 + }, + { + "acc": 0.66602154, + "epoch": 6.0, + "learning_rate": 5.414199473950967e-05, + "loss": 1.09426003, + "memory(GiB)": 85.12, + "step": 7655, + "train_speed(iter/s)": 0.035276 + }, + { + "epoch": 6.0, + "eval_acc": 0.6978462309040822, + "eval_loss": 0.9628272652626038, + "eval_runtime": 84.9357, + "eval_samples_per_second": 1.095, + "eval_steps_per_second": 1.095, + "step": 7656 + } + ], + "logging_steps": 5, + "max_steps": 15312, + "num_input_tokens_seen": 0, + "num_train_epochs": 12, + "save_steps": 1, + "total_flos": 3.3314641424613103e+22, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}