|
{ |
|
"best_metric": 0.7472190257000384, |
|
"best_model_checkpoint": "vit-base-patch16-224-in21k-finetuned-crop-classification/checkpoint-1468", |
|
"epoch": 9.97275204359673, |
|
"eval_steps": 500, |
|
"global_step": 1830, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.73224043715847e-06, |
|
"loss": 1.6369, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5.46448087431694e-06, |
|
"loss": 1.6111, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 8.196721311475409e-06, |
|
"loss": 1.5398, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.092896174863388e-05, |
|
"loss": 1.4457, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.3661202185792351e-05, |
|
"loss": 1.3015, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.6393442622950818e-05, |
|
"loss": 1.1684, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.912568306010929e-05, |
|
"loss": 1.0962, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 2.185792349726776e-05, |
|
"loss": 1.0177, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.459016393442623e-05, |
|
"loss": 0.9627, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.7322404371584703e-05, |
|
"loss": 0.8997, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 3.005464480874317e-05, |
|
"loss": 0.8822, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.2786885245901635e-05, |
|
"loss": 0.8726, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.551912568306011e-05, |
|
"loss": 0.8288, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.825136612021858e-05, |
|
"loss": 0.8337, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.098360655737705e-05, |
|
"loss": 0.829, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.371584699453552e-05, |
|
"loss": 0.7949, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 4.644808743169399e-05, |
|
"loss": 0.7856, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 4.918032786885246e-05, |
|
"loss": 0.8031, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7050249328730341, |
|
"eval_loss": 0.7602581977844238, |
|
"eval_runtime": 93.216, |
|
"eval_samples_per_second": 27.967, |
|
"eval_steps_per_second": 0.88, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.9787492410443234e-05, |
|
"loss": 0.78, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.948391013964785e-05, |
|
"loss": 0.7994, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.918032786885246e-05, |
|
"loss": 0.7665, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 4.8876745598057074e-05, |
|
"loss": 0.7866, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.857316332726169e-05, |
|
"loss": 0.7733, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 4.8269581056466304e-05, |
|
"loss": 0.7504, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 4.7965998785670915e-05, |
|
"loss": 0.7805, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.766241651487553e-05, |
|
"loss": 0.7671, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 4.7358834244080144e-05, |
|
"loss": 0.7012, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 4.705525197328476e-05, |
|
"loss": 0.7305, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.6751669702489374e-05, |
|
"loss": 0.7441, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.644808743169399e-05, |
|
"loss": 0.7758, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.61445051608986e-05, |
|
"loss": 0.731, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 4.584092289010322e-05, |
|
"loss": 0.7303, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.553734061930783e-05, |
|
"loss": 0.7265, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 4.523375834851245e-05, |
|
"loss": 0.7385, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 4.493017607771706e-05, |
|
"loss": 0.7396, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 4.462659380692168e-05, |
|
"loss": 0.7311, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7249712313003452, |
|
"eval_loss": 0.7046979069709778, |
|
"eval_runtime": 93.2263, |
|
"eval_samples_per_second": 27.964, |
|
"eval_steps_per_second": 0.88, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 4.432301153612629e-05, |
|
"loss": 0.7129, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 4.401942926533091e-05, |
|
"loss": 0.7103, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 4.371584699453552e-05, |
|
"loss": 0.6994, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 4.341226472374014e-05, |
|
"loss": 0.6637, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 4.310868245294475e-05, |
|
"loss": 0.7107, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 4.280510018214937e-05, |
|
"loss": 0.7326, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 4.250151791135398e-05, |
|
"loss": 0.7229, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 4.21979356405586e-05, |
|
"loss": 0.7064, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 4.189435336976321e-05, |
|
"loss": 0.6781, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 4.1590771098967827e-05, |
|
"loss": 0.7207, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 4.128718882817244e-05, |
|
"loss": 0.7244, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 4.098360655737705e-05, |
|
"loss": 0.6972, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 4.068002428658167e-05, |
|
"loss": 0.7148, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 4.037644201578628e-05, |
|
"loss": 0.7168, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 4.007285974499089e-05, |
|
"loss": 0.7146, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 3.976927747419551e-05, |
|
"loss": 0.6583, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 3.946569520340012e-05, |
|
"loss": 0.7119, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 3.916211293260474e-05, |
|
"loss": 0.6891, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.885853066180935e-05, |
|
"loss": 0.7144, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7211354046797085, |
|
"eval_loss": 0.6968041062355042, |
|
"eval_runtime": 91.8623, |
|
"eval_samples_per_second": 28.379, |
|
"eval_steps_per_second": 0.893, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"learning_rate": 3.8554948391013967e-05, |
|
"loss": 0.7037, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"learning_rate": 3.825136612021858e-05, |
|
"loss": 0.6613, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 3.7947783849423196e-05, |
|
"loss": 0.6462, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 3.764420157862781e-05, |
|
"loss": 0.6484, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 3.7340619307832425e-05, |
|
"loss": 0.6747, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.7036, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.6733454766241655e-05, |
|
"loss": 0.6444, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3.6429872495446266e-05, |
|
"loss": 0.6337, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 3.6126290224650884e-05, |
|
"loss": 0.6715, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 3.5822707953855495e-05, |
|
"loss": 0.6583, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 3.551912568306011e-05, |
|
"loss": 0.663, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 3.5215543412264725e-05, |
|
"loss": 0.6901, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 3.491196114146934e-05, |
|
"loss": 0.6582, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 3.4608378870673954e-05, |
|
"loss": 0.6868, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 3.430479659987857e-05, |
|
"loss": 0.7168, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 3.400121432908318e-05, |
|
"loss": 0.6522, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 3.36976320582878e-05, |
|
"loss": 0.6337, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 3.339404978749241e-05, |
|
"loss": 0.6516, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7376294591484465, |
|
"eval_loss": 0.6568663716316223, |
|
"eval_runtime": 93.5303, |
|
"eval_samples_per_second": 27.873, |
|
"eval_steps_per_second": 0.877, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 3.3090467516697024e-05, |
|
"loss": 0.6547, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 3.2786885245901635e-05, |
|
"loss": 0.6444, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"learning_rate": 3.248330297510625e-05, |
|
"loss": 0.6687, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"learning_rate": 3.2179720704310865e-05, |
|
"loss": 0.6634, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"learning_rate": 3.187613843351548e-05, |
|
"loss": 0.6334, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 3.1572556162720094e-05, |
|
"loss": 0.6095, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 4.36, |
|
"learning_rate": 3.126897389192471e-05, |
|
"loss": 0.6467, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 3.096539162112932e-05, |
|
"loss": 0.6532, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 3.066180935033394e-05, |
|
"loss": 0.6544, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 4.52, |
|
"learning_rate": 3.0358227079538553e-05, |
|
"loss": 0.6161, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 3.005464480874317e-05, |
|
"loss": 0.6092, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 2.9751062537947782e-05, |
|
"loss": 0.6323, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"learning_rate": 2.94474802671524e-05, |
|
"loss": 0.6593, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 2.9143897996357018e-05, |
|
"loss": 0.6493, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"learning_rate": 2.884031572556163e-05, |
|
"loss": 0.6475, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 2.8536733454766244e-05, |
|
"loss": 0.6196, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 2.823315118397086e-05, |
|
"loss": 0.6257, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 2.7929568913175473e-05, |
|
"loss": 0.6371, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7376294591484465, |
|
"eval_loss": 0.6482810974121094, |
|
"eval_runtime": 93.1536, |
|
"eval_samples_per_second": 27.986, |
|
"eval_steps_per_second": 0.88, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 2.7625986642380085e-05, |
|
"loss": 0.6049, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 2.7322404371584703e-05, |
|
"loss": 0.6072, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 2.7018822100789314e-05, |
|
"loss": 0.5548, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 2.6715239829993932e-05, |
|
"loss": 0.5689, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 5.23, |
|
"learning_rate": 2.6411657559198543e-05, |
|
"loss": 0.6459, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 2.610807528840316e-05, |
|
"loss": 0.6033, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 5.34, |
|
"learning_rate": 2.5804493017607773e-05, |
|
"loss": 0.5981, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"learning_rate": 2.550091074681239e-05, |
|
"loss": 0.6201, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"learning_rate": 2.5197328476017002e-05, |
|
"loss": 0.6121, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 2.4893746205221617e-05, |
|
"loss": 0.6147, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 2.459016393442623e-05, |
|
"loss": 0.6007, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 5.61, |
|
"learning_rate": 2.4286581663630846e-05, |
|
"loss": 0.6087, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 2.3982999392835457e-05, |
|
"loss": 0.6195, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 2.3679417122040072e-05, |
|
"loss": 0.5848, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 2.3375834851244687e-05, |
|
"loss": 0.6066, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"learning_rate": 2.30722525804493e-05, |
|
"loss": 0.6386, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 2.2768670309653916e-05, |
|
"loss": 0.6196, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 2.246508803885853e-05, |
|
"loss": 0.6127, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 5.99, |
|
"learning_rate": 2.2161505768063146e-05, |
|
"loss": 0.6246, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7364787111622555, |
|
"eval_loss": 0.6492410898208618, |
|
"eval_runtime": 92.6582, |
|
"eval_samples_per_second": 28.136, |
|
"eval_steps_per_second": 0.885, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"learning_rate": 2.185792349726776e-05, |
|
"loss": 0.5467, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 2.1554341226472375e-05, |
|
"loss": 0.577, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 2.125075895567699e-05, |
|
"loss": 0.6047, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 2.0947176684881604e-05, |
|
"loss": 0.5822, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 2.064359441408622e-05, |
|
"loss": 0.605, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 2.0340012143290834e-05, |
|
"loss": 0.5743, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 2.0036429872495445e-05, |
|
"loss": 0.5658, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"learning_rate": 1.973284760170006e-05, |
|
"loss": 0.5686, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 6.49, |
|
"learning_rate": 1.9429265330904674e-05, |
|
"loss": 0.5884, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 1.912568306010929e-05, |
|
"loss": 0.544, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 1.8822100789313904e-05, |
|
"loss": 0.5888, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.5377, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 1.8214936247723133e-05, |
|
"loss": 0.5665, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 1.7911353976927748e-05, |
|
"loss": 0.6032, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 6.81, |
|
"learning_rate": 1.7607771706132362e-05, |
|
"loss": 0.5905, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 1.7304189435336977e-05, |
|
"loss": 0.5831, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 6.92, |
|
"learning_rate": 1.700060716454159e-05, |
|
"loss": 0.5762, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 1.6697024893746206e-05, |
|
"loss": 0.5659, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7410817031070196, |
|
"eval_loss": 0.6481292843818665, |
|
"eval_runtime": 93.3242, |
|
"eval_samples_per_second": 27.935, |
|
"eval_steps_per_second": 0.879, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 1.6393442622950818e-05, |
|
"loss": 0.5738, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.6089860352155432e-05, |
|
"loss": 0.5339, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 1.5786278081360047e-05, |
|
"loss": 0.5405, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 7.19, |
|
"learning_rate": 1.548269581056466e-05, |
|
"loss": 0.5792, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 1.5179113539769276e-05, |
|
"loss": 0.5677, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"learning_rate": 1.4875531268973891e-05, |
|
"loss": 0.5319, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 7.36, |
|
"learning_rate": 1.4571948998178509e-05, |
|
"loss": 0.5693, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 1.4268366727383122e-05, |
|
"loss": 0.5532, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 1.3964784456587737e-05, |
|
"loss": 0.565, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 1.3661202185792351e-05, |
|
"loss": 0.536, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"learning_rate": 1.3357619914996966e-05, |
|
"loss": 0.5408, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 1.305403764420158e-05, |
|
"loss": 0.5835, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 1.2750455373406195e-05, |
|
"loss": 0.5353, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 7.74, |
|
"learning_rate": 1.2446873102610808e-05, |
|
"loss": 0.5568, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 1.2143290831815423e-05, |
|
"loss": 0.5531, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 1.1839708561020036e-05, |
|
"loss": 0.555, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"learning_rate": 1.153612629022465e-05, |
|
"loss": 0.5478, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 7.96, |
|
"learning_rate": 1.1232544019429265e-05, |
|
"loss": 0.533, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7472190257000384, |
|
"eval_loss": 0.6449919939041138, |
|
"eval_runtime": 93.8292, |
|
"eval_samples_per_second": 27.785, |
|
"eval_steps_per_second": 0.874, |
|
"step": 1468 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 1.092896174863388e-05, |
|
"loss": 0.5692, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 1.0625379477838495e-05, |
|
"loss": 0.5422, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"learning_rate": 1.032179720704311e-05, |
|
"loss": 0.5154, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 1.0018214936247722e-05, |
|
"loss": 0.5471, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 9.714632665452337e-06, |
|
"loss": 0.5253, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 9.411050394656952e-06, |
|
"loss": 0.526, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 9.107468123861566e-06, |
|
"loss": 0.5038, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 8.803885853066181e-06, |
|
"loss": 0.5109, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 8.500303582270796e-06, |
|
"loss": 0.4941, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"learning_rate": 8.196721311475409e-06, |
|
"loss": 0.5616, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 7.893139040680023e-06, |
|
"loss": 0.5514, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 8.61, |
|
"learning_rate": 7.589556769884638e-06, |
|
"loss": 0.5203, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 7.2859744990892545e-06, |
|
"loss": 0.5351, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"learning_rate": 6.982392228293868e-06, |
|
"loss": 0.5334, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.77, |
|
"learning_rate": 6.678809957498483e-06, |
|
"loss": 0.5162, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 6.375227686703098e-06, |
|
"loss": 0.5482, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 8.88, |
|
"learning_rate": 6.0716454159077115e-06, |
|
"loss": 0.5077, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 8.94, |
|
"learning_rate": 5.768063145112325e-06, |
|
"loss": 0.5392, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"learning_rate": 5.46448087431694e-06, |
|
"loss": 0.5416, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.74530111238972, |
|
"eval_loss": 0.638206422328949, |
|
"eval_runtime": 94.5986, |
|
"eval_samples_per_second": 27.559, |
|
"eval_steps_per_second": 0.867, |
|
"step": 1651 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 5.160898603521555e-06, |
|
"loss": 0.541, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"learning_rate": 4.8573163327261686e-06, |
|
"loss": 0.5393, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 4.553734061930783e-06, |
|
"loss": 0.488, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"learning_rate": 4.250151791135398e-06, |
|
"loss": 0.5104, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 3.946569520340012e-06, |
|
"loss": 0.5151, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 3.6429872495446273e-06, |
|
"loss": 0.5165, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 3.3394049787492415e-06, |
|
"loss": 0.5116, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 3.0358227079538558e-06, |
|
"loss": 0.5174, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 9.48, |
|
"learning_rate": 2.73224043715847e-06, |
|
"loss": 0.5321, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 9.54, |
|
"learning_rate": 2.4286581663630843e-06, |
|
"loss": 0.4799, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"learning_rate": 2.125075895567699e-06, |
|
"loss": 0.4766, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"learning_rate": 1.8214936247723136e-06, |
|
"loss": 0.5425, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 1.5179113539769279e-06, |
|
"loss": 0.4936, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 1.2143290831815421e-06, |
|
"loss": 0.5408, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 9.81, |
|
"learning_rate": 9.107468123861568e-07, |
|
"loss": 0.5223, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 6.071645415907711e-07, |
|
"loss": 0.5074, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 3.0358227079538554e-07, |
|
"loss": 0.5151, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 0.0, |
|
"loss": 0.5062, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_accuracy": 0.7460682777138473, |
|
"eval_loss": 0.6395400166511536, |
|
"eval_runtime": 94.039, |
|
"eval_samples_per_second": 27.723, |
|
"eval_steps_per_second": 0.872, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"step": 1830, |
|
"total_flos": 1.8133380652001642e+19, |
|
"train_loss": 0.6611971782204883, |
|
"train_runtime": 13354.5549, |
|
"train_samples_per_second": 17.568, |
|
"train_steps_per_second": 0.137 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1830, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"total_flos": 1.8133380652001642e+19, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|