|
{ |
|
"best_metric": 0.7965201735496521, |
|
"best_model_checkpoint": "lora-Vicuna/checkpoint-1000", |
|
"epoch": 2.1394319439321285, |
|
"global_step": 11600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 2.041, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 1.5971, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00017999999999999998, |
|
"loss": 1.2446, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00023999999999999998, |
|
"loss": 1.1737, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0003, |
|
"loss": 1.1234, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002996287817855596, |
|
"loss": 1.0991, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002992575635711192, |
|
"loss": 1.089, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002988863453566788, |
|
"loss": 1.0685, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002985151271422384, |
|
"loss": 1.0466, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029814390892779805, |
|
"loss": 1.0441, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_loss": 0.8649595379829407, |
|
"eval_runtime": 1.3492, |
|
"eval_samples_per_second": 0.741, |
|
"eval_steps_per_second": 0.741, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002977726907133576, |
|
"loss": 1.0417, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029740147249891725, |
|
"loss": 1.0269, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002970302542844769, |
|
"loss": 1.015, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002966590360700365, |
|
"loss": 1.0089, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002962878178555961, |
|
"loss": 1.0269, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002959165996411557, |
|
"loss": 1.0187, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029554538142671533, |
|
"loss": 1.0142, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002951741632122749, |
|
"loss": 1.0077, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029480294499783453, |
|
"loss": 1.004, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029443172678339416, |
|
"loss": 1.0042, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 0.8426274061203003, |
|
"eval_runtime": 1.3288, |
|
"eval_samples_per_second": 0.753, |
|
"eval_steps_per_second": 0.753, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029406050856895373, |
|
"loss": 1.0061, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029368929035451335, |
|
"loss": 1.0152, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000293318072140073, |
|
"loss": 1.0049, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002929468539256326, |
|
"loss": 1.0011, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029257563571119223, |
|
"loss": 0.9932, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002922044174967518, |
|
"loss": 0.9926, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029183319928231143, |
|
"loss": 0.9971, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000291461981067871, |
|
"loss": 0.9866, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029109076285343063, |
|
"loss": 0.9871, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029071954463899026, |
|
"loss": 0.9906, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_loss": 0.8352808356285095, |
|
"eval_runtime": 1.3169, |
|
"eval_samples_per_second": 0.759, |
|
"eval_steps_per_second": 0.759, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002903483264245499, |
|
"loss": 0.9797, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002899771082101095, |
|
"loss": 0.989, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002896058899956691, |
|
"loss": 0.9931, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002892346717812287, |
|
"loss": 0.9859, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00028886345356678834, |
|
"loss": 0.9874, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002884922353523479, |
|
"loss": 0.9888, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00028812101713790754, |
|
"loss": 0.9798, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00028774979892346716, |
|
"loss": 0.981, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00028737858070902674, |
|
"loss": 0.9824, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028700736249458636, |
|
"loss": 0.9922, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 0.8162809610366821, |
|
"eval_runtime": 1.2603, |
|
"eval_samples_per_second": 0.793, |
|
"eval_steps_per_second": 0.793, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000286636144280146, |
|
"loss": 0.9942, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002862649260657056, |
|
"loss": 0.9878, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028589370785126524, |
|
"loss": 0.9855, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002855224896368248, |
|
"loss": 0.9803, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028515127142238444, |
|
"loss": 0.9793, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.000284780053207944, |
|
"loss": 0.9939, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028440883499350364, |
|
"loss": 0.9806, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028403761677906327, |
|
"loss": 0.9864, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002836663985646229, |
|
"loss": 0.9839, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002832951803501825, |
|
"loss": 0.9816, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 0.7965201735496521, |
|
"eval_runtime": 1.3017, |
|
"eval_samples_per_second": 0.768, |
|
"eval_steps_per_second": 0.768, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002829239621357421, |
|
"loss": 0.9848, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002825527439213017, |
|
"loss": 0.9908, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00028218152570686134, |
|
"loss": 0.9885, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002818103074924209, |
|
"loss": 0.9772, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00028143908927798054, |
|
"loss": 0.9948, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028106787106354017, |
|
"loss": 0.9778, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028069665284909974, |
|
"loss": 0.9909, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00028032543463465937, |
|
"loss": 0.9802, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000279954216420219, |
|
"loss": 0.9849, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002795829982057786, |
|
"loss": 0.9788, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 0.8706400990486145, |
|
"eval_runtime": 1.3141, |
|
"eval_samples_per_second": 0.761, |
|
"eval_steps_per_second": 0.761, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027921177999133825, |
|
"loss": 0.9766, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002788405617768978, |
|
"loss": 0.9853, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027846934356245745, |
|
"loss": 0.9814, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000278098125348017, |
|
"loss": 0.9747, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00027772690713357665, |
|
"loss": 0.9715, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002773556889191363, |
|
"loss": 0.9903, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002769844707046959, |
|
"loss": 0.9862, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002766132524902555, |
|
"loss": 0.9791, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002762420342758151, |
|
"loss": 0.9834, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002758708160613747, |
|
"loss": 0.9752, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_loss": 0.8170408010482788, |
|
"eval_runtime": 1.2722, |
|
"eval_samples_per_second": 0.786, |
|
"eval_steps_per_second": 0.786, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00027549959784693435, |
|
"loss": 0.9803, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002751283796324939, |
|
"loss": 0.9968, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00027475716141805355, |
|
"loss": 0.98, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002743859432036132, |
|
"loss": 0.9819, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00027401472498917275, |
|
"loss": 0.9855, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002736435067747324, |
|
"loss": 0.9852, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000273272288560292, |
|
"loss": 0.977, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00027290107034585163, |
|
"loss": 0.9918, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00027252985213141126, |
|
"loss": 0.9965, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00027215863391697083, |
|
"loss": 0.9721, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 0.8074629902839661, |
|
"eval_runtime": 1.2561, |
|
"eval_samples_per_second": 0.796, |
|
"eval_steps_per_second": 0.796, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00027178741570253045, |
|
"loss": 0.984, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002714533193095341, |
|
"loss": 0.9858, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002711006620058157, |
|
"loss": 1.011, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00027072944379137533, |
|
"loss": 0.9876, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00027035822557693495, |
|
"loss": 0.9941, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002699870073624946, |
|
"loss": 0.9935, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00026961578914805415, |
|
"loss": 0.9818, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002692445709336138, |
|
"loss": 0.9876, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002688733527191734, |
|
"loss": 0.9628, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00026850213450473303, |
|
"loss": 0.983, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_loss": 0.8400484919548035, |
|
"eval_runtime": 1.261, |
|
"eval_samples_per_second": 0.793, |
|
"eval_steps_per_second": 0.793, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002681309162902926, |
|
"loss": 0.9845, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00026775969807585223, |
|
"loss": 0.98, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002673884798614118, |
|
"loss": 0.9649, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00026701726164697143, |
|
"loss": 0.9847, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00026664604343253106, |
|
"loss": 0.9824, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002662748252180907, |
|
"loss": 0.9777, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002659036070036503, |
|
"loss": 0.9781, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00026553238878920994, |
|
"loss": 0.9795, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002651611705747695, |
|
"loss": 0.9875, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00026478995236032914, |
|
"loss": 0.9741, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 0.8619368672370911, |
|
"eval_runtime": 1.2019, |
|
"eval_samples_per_second": 0.832, |
|
"eval_steps_per_second": 0.832, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002644187341458887, |
|
"loss": 0.9841, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026404751593144834, |
|
"loss": 0.9813, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00026367629771700796, |
|
"loss": 0.9777, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002633050795025676, |
|
"loss": 0.972, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00026293386128812716, |
|
"loss": 0.97, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002625626430736868, |
|
"loss": 0.9866, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002621914248592464, |
|
"loss": 0.9742, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00026182020664480604, |
|
"loss": 0.9843, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002614489884303656, |
|
"loss": 0.9616, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00026107777021592524, |
|
"loss": 0.9813, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 0.8606493473052979, |
|
"eval_runtime": 1.2145, |
|
"eval_samples_per_second": 0.823, |
|
"eval_steps_per_second": 0.823, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002607065520014848, |
|
"loss": 0.9716, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00026033533378704444, |
|
"loss": 0.9822, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00025996411557260407, |
|
"loss": 0.9857, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002595928973581637, |
|
"loss": 0.9977, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0002592216791437233, |
|
"loss": 0.9715, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00025885046092928294, |
|
"loss": 0.9623, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0002584792427148425, |
|
"loss": 0.9637, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025810802450040214, |
|
"loss": 0.978, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002577368062859617, |
|
"loss": 0.9802, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00025736558807152134, |
|
"loss": 0.9807, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 0.8513050079345703, |
|
"eval_runtime": 1.2201, |
|
"eval_samples_per_second": 0.82, |
|
"eval_steps_per_second": 0.82, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00025699436985708097, |
|
"loss": 0.9841, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0002566231516426406, |
|
"loss": 0.9714, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00025625193342820017, |
|
"loss": 0.9693, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002558807152137598, |
|
"loss": 0.9763, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0002555094969993194, |
|
"loss": 0.9779, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00025513827878487905, |
|
"loss": 0.9736, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0002547670605704386, |
|
"loss": 0.9788, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00025439584235599825, |
|
"loss": 0.9789, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0002540246241415578, |
|
"loss": 0.9729, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00025365340592711745, |
|
"loss": 0.9752, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.8586015105247498, |
|
"eval_runtime": 1.2961, |
|
"eval_samples_per_second": 0.772, |
|
"eval_steps_per_second": 0.772, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00025328218771267707, |
|
"loss": 0.9734, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002529109694982367, |
|
"loss": 0.9767, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0002525397512837963, |
|
"loss": 0.962, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00025216853306935595, |
|
"loss": 0.9706, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0002517973148549155, |
|
"loss": 0.975, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00025142609664047515, |
|
"loss": 0.9653, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002510548784260347, |
|
"loss": 0.976, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00025068366021159435, |
|
"loss": 0.9768, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.000250312441997154, |
|
"loss": 0.9764, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002499412237827136, |
|
"loss": 0.9741, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_loss": 0.8531299829483032, |
|
"eval_runtime": 1.2788, |
|
"eval_samples_per_second": 0.782, |
|
"eval_steps_per_second": 0.782, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002495700055682732, |
|
"loss": 0.9786, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0002491987873538328, |
|
"loss": 0.9737, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00024882756913939243, |
|
"loss": 0.9648, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00024845635092495206, |
|
"loss": 0.9717, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00024808513271051163, |
|
"loss": 0.9706, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00024771391449607125, |
|
"loss": 0.9662, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0002473426962816308, |
|
"loss": 0.9719, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00024697147806719045, |
|
"loss": 0.9729, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002466002598527501, |
|
"loss": 0.9679, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0002462290416383097, |
|
"loss": 0.9782, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 0.8357999324798584, |
|
"eval_runtime": 1.3862, |
|
"eval_samples_per_second": 0.721, |
|
"eval_steps_per_second": 0.721, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00024585782342386933, |
|
"loss": 0.9636, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00024548660520942896, |
|
"loss": 0.9779, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00024511538699498853, |
|
"loss": 0.9718, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024474416878054816, |
|
"loss": 0.9756, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00024437295056610773, |
|
"loss": 0.983, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024400173235166736, |
|
"loss": 0.9735, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00024363051413722698, |
|
"loss": 0.976, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002432592959227866, |
|
"loss": 0.9691, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002428880777083462, |
|
"loss": 0.9644, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002425168594939058, |
|
"loss": 0.9768, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_loss": 0.8302448987960815, |
|
"eval_runtime": 1.362, |
|
"eval_samples_per_second": 0.734, |
|
"eval_steps_per_second": 0.734, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0002421456412794654, |
|
"loss": 0.9709, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024177442306502504, |
|
"loss": 0.9696, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024140320485058466, |
|
"loss": 0.9669, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00024103198663614426, |
|
"loss": 0.965, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0002406607684217039, |
|
"loss": 0.9757, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00024028955020726346, |
|
"loss": 0.9717, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0002399183319928231, |
|
"loss": 0.963, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00023954711377838271, |
|
"loss": 0.9519, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0002391758955639423, |
|
"loss": 0.964, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023880467734950194, |
|
"loss": 0.967, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"eval_loss": 0.87740159034729, |
|
"eval_runtime": 1.4495, |
|
"eval_samples_per_second": 0.69, |
|
"eval_steps_per_second": 0.69, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023843345913506157, |
|
"loss": 0.9666, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00023806224092062114, |
|
"loss": 0.9616, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023769102270618077, |
|
"loss": 0.9663, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00023731980449174036, |
|
"loss": 0.9665, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0002369485862773, |
|
"loss": 0.9662, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023657736806285962, |
|
"loss": 0.9656, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00023620614984841922, |
|
"loss": 0.9587, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023583493163397882, |
|
"loss": 0.9639, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023546371341953842, |
|
"loss": 0.9727, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00023509249520509804, |
|
"loss": 0.954, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 0.8274036645889282, |
|
"eval_runtime": 1.399, |
|
"eval_samples_per_second": 0.715, |
|
"eval_steps_per_second": 0.715, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023472127699065767, |
|
"loss": 0.9614, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00023435005877621727, |
|
"loss": 0.9573, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002339788405617769, |
|
"loss": 0.9606, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00023360762234733647, |
|
"loss": 0.962, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0002332364041328961, |
|
"loss": 0.9591, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023286518591845572, |
|
"loss": 0.9547, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023249396770401532, |
|
"loss": 0.9627, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00023212274948957495, |
|
"loss": 0.9578, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023175153127513457, |
|
"loss": 0.9623, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023138031306069415, |
|
"loss": 0.946, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_loss": 0.8205245137214661, |
|
"eval_runtime": 1.3808, |
|
"eval_samples_per_second": 0.724, |
|
"eval_steps_per_second": 0.724, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00023100909484625377, |
|
"loss": 0.9734, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00023063787663181337, |
|
"loss": 0.964, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.000230266658417373, |
|
"loss": 0.9576, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022989544020293263, |
|
"loss": 0.9512, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022952422198849222, |
|
"loss": 0.9555, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00022915300377405182, |
|
"loss": 0.95, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022878178555961142, |
|
"loss": 0.9617, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022841056734517105, |
|
"loss": 0.9636, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00022803934913073068, |
|
"loss": 0.955, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00022766813091629028, |
|
"loss": 0.9516, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 0.8116470575332642, |
|
"eval_runtime": 1.3156, |
|
"eval_samples_per_second": 0.76, |
|
"eval_steps_per_second": 0.76, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0002272969127018499, |
|
"loss": 0.9517, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022692569448740948, |
|
"loss": 0.9551, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0002265544762729691, |
|
"loss": 0.9578, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00022618325805852873, |
|
"loss": 0.9613, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022581203984408833, |
|
"loss": 0.9526, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022544082162964795, |
|
"loss": 0.966, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00022506960341520755, |
|
"loss": 0.9504, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022469838520076715, |
|
"loss": 0.9458, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022432716698632678, |
|
"loss": 0.9693, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00022395594877188638, |
|
"loss": 0.9566, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 0.8783697485923767, |
|
"eval_runtime": 1.3242, |
|
"eval_samples_per_second": 0.755, |
|
"eval_steps_per_second": 0.755, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.000223584730557446, |
|
"loss": 0.9736, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0002232135123430056, |
|
"loss": 0.9454, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022284229412856523, |
|
"loss": 0.9547, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022247107591412483, |
|
"loss": 0.9521, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00022209985769968443, |
|
"loss": 0.9546, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022172863948524406, |
|
"loss": 0.9557, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022135742127080366, |
|
"loss": 0.947, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00022098620305636328, |
|
"loss": 0.9576, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0002206149848419229, |
|
"loss": 0.9506, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00022024376662748248, |
|
"loss": 0.9537, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 0.8563671112060547, |
|
"eval_runtime": 1.3666, |
|
"eval_samples_per_second": 0.732, |
|
"eval_steps_per_second": 0.732, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0002198725484130421, |
|
"loss": 0.955, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0002195013301986017, |
|
"loss": 0.9653, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00021913011198416133, |
|
"loss": 0.9601, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021875889376972096, |
|
"loss": 0.9589, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021838767555528056, |
|
"loss": 0.9448, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00021801645734084016, |
|
"loss": 0.9538, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00021764523912639976, |
|
"loss": 0.958, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0002172740209119594, |
|
"loss": 0.9466, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.000216902802697519, |
|
"loss": 0.9544, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0002165315844830786, |
|
"loss": 0.9456, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 0.8733828663825989, |
|
"eval_runtime": 1.3225, |
|
"eval_samples_per_second": 0.756, |
|
"eval_steps_per_second": 0.756, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00021616036626863824, |
|
"loss": 0.956, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0002157891480541978, |
|
"loss": 0.9614, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00021541792983975744, |
|
"loss": 0.9568, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00021504671162531706, |
|
"loss": 0.9606, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021467549341087666, |
|
"loss": 0.9375, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002143042751964363, |
|
"loss": 0.938, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00021393305698199592, |
|
"loss": 0.9647, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.0002135618387675555, |
|
"loss": 0.96, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00021319062055311512, |
|
"loss": 0.9458, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021281940233867472, |
|
"loss": 0.9492, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_loss": 0.869820773601532, |
|
"eval_runtime": 1.3761, |
|
"eval_samples_per_second": 0.727, |
|
"eval_steps_per_second": 0.727, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021244818412423434, |
|
"loss": 0.9536, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00021207696590979397, |
|
"loss": 0.9522, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021170574769535357, |
|
"loss": 0.9501, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021133452948091317, |
|
"loss": 0.958, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00021096331126647277, |
|
"loss": 0.9585, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0002105920930520324, |
|
"loss": 0.9529, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00021022087483759202, |
|
"loss": 0.9463, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00020984965662315162, |
|
"loss": 0.9558, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00020947843840871125, |
|
"loss": 0.9476, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00020910722019427087, |
|
"loss": 0.9423, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 0.8774290084838867, |
|
"eval_runtime": 1.3657, |
|
"eval_samples_per_second": 0.732, |
|
"eval_steps_per_second": 0.732, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00020873600197983045, |
|
"loss": 0.9463, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00020836478376539007, |
|
"loss": 0.9389, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00020799356555094967, |
|
"loss": 0.9443, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.0002076223473365093, |
|
"loss": 0.9412, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020725112912206892, |
|
"loss": 0.9417, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00020687991090762852, |
|
"loss": 0.9485, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020650869269318812, |
|
"loss": 0.9479, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00020613747447874772, |
|
"loss": 0.9541, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00020576625626430735, |
|
"loss": 0.9472, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00020539503804986698, |
|
"loss": 0.9382, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_loss": 0.9030184745788574, |
|
"eval_runtime": 1.4724, |
|
"eval_samples_per_second": 0.679, |
|
"eval_steps_per_second": 0.679, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00020502381983542658, |
|
"loss": 0.9323, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0002046526016209862, |
|
"loss": 0.9435, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00020428138340654577, |
|
"loss": 0.9417, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0002039101651921054, |
|
"loss": 0.9479, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020353894697766503, |
|
"loss": 0.9418, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020316772876322463, |
|
"loss": 0.946, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00020279651054878425, |
|
"loss": 0.9475, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020242529233434388, |
|
"loss": 0.94, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00020205407411990345, |
|
"loss": 0.943, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00020168285590546308, |
|
"loss": 0.9358, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 0.8716119527816772, |
|
"eval_runtime": 1.3296, |
|
"eval_samples_per_second": 0.752, |
|
"eval_steps_per_second": 0.752, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00020131163769102268, |
|
"loss": 0.9351, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0002009404194765823, |
|
"loss": 0.9412, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00020056920126214193, |
|
"loss": 0.9346, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00020019798304770153, |
|
"loss": 0.9427, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00019982676483326113, |
|
"loss": 0.9553, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019945554661882073, |
|
"loss": 0.9443, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019908432840438036, |
|
"loss": 0.9446, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019871311018993998, |
|
"loss": 0.9319, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019834189197549958, |
|
"loss": 0.9333, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0001979706737610592, |
|
"loss": 0.9405, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 0.8719614744186401, |
|
"eval_runtime": 1.2911, |
|
"eval_samples_per_second": 0.775, |
|
"eval_steps_per_second": 0.775, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019759945554661878, |
|
"loss": 0.9367, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0001972282373321784, |
|
"loss": 0.9352, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019685701911773803, |
|
"loss": 0.9422, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019648580090329763, |
|
"loss": 0.9543, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019611458268885726, |
|
"loss": 0.9308, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0001957433644744169, |
|
"loss": 0.9471, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019537214625997646, |
|
"loss": 0.933, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019500092804553609, |
|
"loss": 0.9475, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019462970983109569, |
|
"loss": 0.9368, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.0001942584916166553, |
|
"loss": 0.945, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_loss": 0.9158815145492554, |
|
"eval_runtime": 1.3387, |
|
"eval_samples_per_second": 0.747, |
|
"eval_steps_per_second": 0.747, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019388727340221494, |
|
"loss": 0.9407, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019351605518777454, |
|
"loss": 0.9345, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019314483697333414, |
|
"loss": 0.9355, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019277361875889374, |
|
"loss": 0.9368, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019240240054445336, |
|
"loss": 0.9339, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.000192031182330013, |
|
"loss": 0.9315, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0001916599641155726, |
|
"loss": 0.9382, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019128874590113222, |
|
"loss": 0.9391, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.0001909175276866918, |
|
"loss": 0.9324, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019054630947225142, |
|
"loss": 0.9346, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 0.8685809969902039, |
|
"eval_runtime": 1.2797, |
|
"eval_samples_per_second": 0.781, |
|
"eval_steps_per_second": 0.781, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019017509125781104, |
|
"loss": 0.9302, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00018980387304337064, |
|
"loss": 0.9375, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018943265482893027, |
|
"loss": 0.9265, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00018906143661448987, |
|
"loss": 0.9421, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00018869021840004947, |
|
"loss": 0.9382, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001883190001856091, |
|
"loss": 0.9376, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001879477819711687, |
|
"loss": 0.9315, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018757656375672832, |
|
"loss": 0.9326, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018720534554228792, |
|
"loss": 0.9283, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00018683412732784755, |
|
"loss": 0.9252, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 0.8812684416770935, |
|
"eval_runtime": 1.2442, |
|
"eval_samples_per_second": 0.804, |
|
"eval_steps_per_second": 0.804, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018646290911340715, |
|
"loss": 0.929, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018609169089896674, |
|
"loss": 0.9303, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00018572047268452637, |
|
"loss": 0.9288, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00018534925447008597, |
|
"loss": 0.9308, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0001849780362556456, |
|
"loss": 0.9247, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018460681804120522, |
|
"loss": 0.939, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0001842355998267648, |
|
"loss": 0.927, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00018386438161232442, |
|
"loss": 0.9227, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018349316339788402, |
|
"loss": 0.9275, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018312194518344365, |
|
"loss": 0.9377, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 0.9376851916313171, |
|
"eval_runtime": 1.3094, |
|
"eval_samples_per_second": 0.764, |
|
"eval_steps_per_second": 0.764, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00018275072696900328, |
|
"loss": 0.9386, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018237950875456287, |
|
"loss": 0.9389, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00018200829054012247, |
|
"loss": 0.9323, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018163707232568207, |
|
"loss": 0.9304, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001812658541112417, |
|
"loss": 0.9382, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00018089463589680133, |
|
"loss": 0.9344, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00018052341768236093, |
|
"loss": 0.9382, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00018015219946792055, |
|
"loss": 0.9298, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00017978098125348013, |
|
"loss": 0.9246, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017940976303903975, |
|
"loss": 0.9349, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"eval_loss": 0.8311364650726318, |
|
"eval_runtime": 1.2953, |
|
"eval_samples_per_second": 0.772, |
|
"eval_steps_per_second": 0.772, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017903854482459938, |
|
"loss": 0.9309, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00017866732661015898, |
|
"loss": 0.9311, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001782961083957186, |
|
"loss": 0.938, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00017792489018127823, |
|
"loss": 0.9324, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017755367196683783, |
|
"loss": 0.9128, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017718245375239743, |
|
"loss": 0.9407, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00017681123553795703, |
|
"loss": 0.9286, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017644001732351666, |
|
"loss": 0.9298, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017606879910907628, |
|
"loss": 0.9329, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00017569758089463588, |
|
"loss": 0.9331, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.8751674890518188, |
|
"eval_runtime": 1.3065, |
|
"eval_samples_per_second": 0.765, |
|
"eval_steps_per_second": 0.765, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0001753263626801955, |
|
"loss": 0.9264, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00017495514446575508, |
|
"loss": 0.9345, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001745839262513147, |
|
"loss": 0.925, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017421270803687433, |
|
"loss": 0.9227, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00017384148982243393, |
|
"loss": 0.9237, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017347027160799356, |
|
"loss": 0.9365, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0001730990533935532, |
|
"loss": 0.9356, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00017272783517911276, |
|
"loss": 0.9242, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017235661696467239, |
|
"loss": 0.9297, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00017198539875023199, |
|
"loss": 0.9232, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 0.8359224796295166, |
|
"eval_runtime": 1.2767, |
|
"eval_samples_per_second": 0.783, |
|
"eval_steps_per_second": 0.783, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0001716141805357916, |
|
"loss": 0.9324, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017124296232135124, |
|
"loss": 0.9222, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00017087174410691084, |
|
"loss": 0.9214, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00017050052589247044, |
|
"loss": 0.9214, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00017012930767803004, |
|
"loss": 0.931, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00016975808946358966, |
|
"loss": 0.9218, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001693868712491493, |
|
"loss": 0.9136, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001690156530347089, |
|
"loss": 0.9284, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00016864443482026852, |
|
"loss": 0.921, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.0001682732166058281, |
|
"loss": 0.9245, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 0.8561541438102722, |
|
"eval_runtime": 1.3583, |
|
"eval_samples_per_second": 0.736, |
|
"eval_steps_per_second": 0.736, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00016790199839138771, |
|
"loss": 0.9259, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016753078017694734, |
|
"loss": 0.9238, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016715956196250694, |
|
"loss": 0.9237, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00016678834374806657, |
|
"loss": 0.9111, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0001664171255336262, |
|
"loss": 0.9257, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016604590731918577, |
|
"loss": 0.9128, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0001656746891047454, |
|
"loss": 0.9219, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.000165303470890305, |
|
"loss": 0.9163, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016493225267586462, |
|
"loss": 0.9195, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00016456103446142425, |
|
"loss": 0.9283, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"eval_loss": 0.8775261044502258, |
|
"eval_runtime": 1.3808, |
|
"eval_samples_per_second": 0.724, |
|
"eval_steps_per_second": 0.724, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00016418981624698385, |
|
"loss": 0.9177, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00016381859803254344, |
|
"loss": 0.9113, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016344737981810304, |
|
"loss": 0.9189, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016307616160366267, |
|
"loss": 0.9096, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0001627049433892223, |
|
"loss": 0.9191, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0001623337251747819, |
|
"loss": 0.9107, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00016196250696034152, |
|
"loss": 0.9173, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0001615912887459011, |
|
"loss": 0.9187, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016122007053146072, |
|
"loss": 0.9187, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00016084885231702035, |
|
"loss": 0.9211, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_loss": 0.8760462999343872, |
|
"eval_runtime": 1.4851, |
|
"eval_samples_per_second": 0.673, |
|
"eval_steps_per_second": 0.673, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00016047763410257995, |
|
"loss": 0.9116, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00016010641588813957, |
|
"loss": 0.9117, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0001597351976736992, |
|
"loss": 0.9184, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00015936397945925877, |
|
"loss": 0.9122, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0001589927612448184, |
|
"loss": 0.9155, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.000158621543030378, |
|
"loss": 0.9186, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015825032481593763, |
|
"loss": 0.9144, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015787910660149725, |
|
"loss": 0.9224, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00015750788838705685, |
|
"loss": 0.9192, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015713667017261645, |
|
"loss": 0.9106, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 0.8746755719184875, |
|
"eval_runtime": 1.3658, |
|
"eval_samples_per_second": 0.732, |
|
"eval_steps_per_second": 0.732, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00015676545195817605, |
|
"loss": 0.9106, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00015639423374373568, |
|
"loss": 0.9024, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001560230155292953, |
|
"loss": 0.9055, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001556517973148549, |
|
"loss": 0.9106, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015528057910041453, |
|
"loss": 0.9219, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0001549093608859741, |
|
"loss": 0.9015, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00015453814267153373, |
|
"loss": 0.9029, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015416692445709336, |
|
"loss": 0.9095, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00015379570624265296, |
|
"loss": 0.9284, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015342448802821258, |
|
"loss": 0.9047, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 0.8788416981697083, |
|
"eval_runtime": 1.3646, |
|
"eval_samples_per_second": 0.733, |
|
"eval_steps_per_second": 0.733, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015305326981377218, |
|
"loss": 0.9151, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00015268205159933178, |
|
"loss": 0.9076, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.0001523108333848914, |
|
"loss": 0.9134, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.000151939615170451, |
|
"loss": 0.9136, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00015156839695601063, |
|
"loss": 0.9073, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015119717874157023, |
|
"loss": 0.9054, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015082596052712986, |
|
"loss": 0.9169, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015045474231268946, |
|
"loss": 0.9249, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00015008352409824906, |
|
"loss": 0.9032, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00014971230588380869, |
|
"loss": 0.9089, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"eval_loss": 0.8750318288803101, |
|
"eval_runtime": 1.361, |
|
"eval_samples_per_second": 0.735, |
|
"eval_steps_per_second": 0.735, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00014934108766936828, |
|
"loss": 0.903, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0001489698694549279, |
|
"loss": 0.9045, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0001485986512404875, |
|
"loss": 0.9146, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014822743302604714, |
|
"loss": 0.9085, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014785621481160674, |
|
"loss": 0.9105, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00014748499659716634, |
|
"loss": 0.9034, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00014711377838272596, |
|
"loss": 0.906, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001467425601682856, |
|
"loss": 0.9055, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0001463713419538452, |
|
"loss": 0.9013, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0001460001237394048, |
|
"loss": 0.9136, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 0.8716868162155151, |
|
"eval_runtime": 1.3893, |
|
"eval_samples_per_second": 0.72, |
|
"eval_steps_per_second": 0.72, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00014562890552496441, |
|
"loss": 0.8991, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014525768731052401, |
|
"loss": 0.9072, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014488646909608364, |
|
"loss": 0.9185, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00014451525088164324, |
|
"loss": 0.9097, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014414403266720284, |
|
"loss": 0.9044, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00014377281445276247, |
|
"loss": 0.9095, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0001434015962383221, |
|
"loss": 0.9119, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0001430303780238817, |
|
"loss": 0.8971, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0001426591598094413, |
|
"loss": 0.9011, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014228794159500092, |
|
"loss": 0.9048, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_loss": 0.8536492586135864, |
|
"eval_runtime": 1.2569, |
|
"eval_samples_per_second": 0.796, |
|
"eval_steps_per_second": 0.796, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014191672338056052, |
|
"loss": 0.9031, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00014154550516612014, |
|
"loss": 0.9078, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014117428695167974, |
|
"loss": 0.9115, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014080306873723934, |
|
"loss": 0.9018, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00014043185052279897, |
|
"loss": 0.8974, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001400606323083586, |
|
"loss": 0.9084, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.0001396894140939182, |
|
"loss": 0.9041, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0001393181958794778, |
|
"loss": 0.8967, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013894697766503742, |
|
"loss": 0.8969, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00013857575945059702, |
|
"loss": 0.8993, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 0.8307236433029175, |
|
"eval_runtime": 1.2771, |
|
"eval_samples_per_second": 0.783, |
|
"eval_steps_per_second": 0.783, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013820454123615665, |
|
"loss": 0.9031, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013783332302171625, |
|
"loss": 0.9008, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00013746210480727585, |
|
"loss": 0.9039, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00013709088659283547, |
|
"loss": 0.9063, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0001367196683783951, |
|
"loss": 0.901, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0001363484501639547, |
|
"loss": 0.8973, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.0001359772319495143, |
|
"loss": 0.9057, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00013560601373507393, |
|
"loss": 0.898, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013523479552063353, |
|
"loss": 0.9027, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013486357730619315, |
|
"loss": 0.9084, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 0.8812193274497986, |
|
"eval_runtime": 1.2199, |
|
"eval_samples_per_second": 0.82, |
|
"eval_steps_per_second": 0.82, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00013449235909175275, |
|
"loss": 0.9043, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013412114087731235, |
|
"loss": 0.9021, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00013374992266287198, |
|
"loss": 0.8951, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.0001333787044484316, |
|
"loss": 0.8865, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0001330074862339912, |
|
"loss": 0.9027, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0001326362680195508, |
|
"loss": 0.8976, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013226504980511043, |
|
"loss": 0.8911, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013189383159067003, |
|
"loss": 0.8973, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00013152261337622966, |
|
"loss": 0.9076, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013115139516178925, |
|
"loss": 0.899, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"eval_loss": 0.82787024974823, |
|
"eval_runtime": 1.2643, |
|
"eval_samples_per_second": 0.791, |
|
"eval_steps_per_second": 0.791, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013078017694734885, |
|
"loss": 0.9022, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00013040895873290848, |
|
"loss": 0.8969, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0001300377405184681, |
|
"loss": 0.895, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.0001296665223040277, |
|
"loss": 0.9034, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.0001292953040895873, |
|
"loss": 0.8892, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00012892408587514693, |
|
"loss": 0.8956, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00012855286766070656, |
|
"loss": 0.8952, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012818164944626616, |
|
"loss": 0.89, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012781043123182576, |
|
"loss": 0.9029, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00012743921301738539, |
|
"loss": 0.8954, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_loss": 0.8729041218757629, |
|
"eval_runtime": 1.2221, |
|
"eval_samples_per_second": 0.818, |
|
"eval_steps_per_second": 0.818, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.00012706799480294498, |
|
"loss": 0.8805, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001266967765885046, |
|
"loss": 0.8936, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001263255583740642, |
|
"loss": 0.8893, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0001259543401596238, |
|
"loss": 0.8953, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00012558312194518344, |
|
"loss": 0.8943, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012521190373074306, |
|
"loss": 0.8766, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012484068551630266, |
|
"loss": 0.899, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00012446946730186226, |
|
"loss": 0.8949, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0001240982490874219, |
|
"loss": 0.8849, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0001237270308729815, |
|
"loss": 0.894, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"eval_loss": 0.8376001715660095, |
|
"eval_runtime": 1.3277, |
|
"eval_samples_per_second": 0.753, |
|
"eval_steps_per_second": 0.753, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00012335581265854111, |
|
"loss": 0.9029, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012298459444410071, |
|
"loss": 0.898, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00012261337622966031, |
|
"loss": 0.8848, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012224215801521994, |
|
"loss": 0.8932, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012187093980077955, |
|
"loss": 0.891, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00012149972158633915, |
|
"loss": 0.8936, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012112850337189877, |
|
"loss": 0.8907, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012075728515745839, |
|
"loss": 0.8958, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00012038606694301799, |
|
"loss": 0.8912, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0001200148487285776, |
|
"loss": 0.9032, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_loss": 0.8445655703544617, |
|
"eval_runtime": 1.3427, |
|
"eval_samples_per_second": 0.745, |
|
"eval_steps_per_second": 0.745, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011964363051413722, |
|
"loss": 0.8918, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00011927241229969682, |
|
"loss": 0.8865, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011890119408525644, |
|
"loss": 0.8893, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00011852997587081606, |
|
"loss": 0.8863, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011815875765637566, |
|
"loss": 0.8978, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00011778753944193527, |
|
"loss": 0.886, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0001174163212274949, |
|
"loss": 0.8789, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0001170451030130545, |
|
"loss": 0.8944, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011667388479861411, |
|
"loss": 0.8964, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00011630266658417372, |
|
"loss": 0.8921, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"eval_loss": 0.8549759387969971, |
|
"eval_runtime": 1.2407, |
|
"eval_samples_per_second": 0.806, |
|
"eval_steps_per_second": 0.806, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011593144836973332, |
|
"loss": 0.8918, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00011556023015529295, |
|
"loss": 0.8966, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011518901194085256, |
|
"loss": 0.8919, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011481779372641216, |
|
"loss": 0.8963, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00011444657551197177, |
|
"loss": 0.8805, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0001140753572975314, |
|
"loss": 0.8837, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.000113704139083091, |
|
"loss": 0.8919, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00011333292086865061, |
|
"loss": 0.8904, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011296170265421023, |
|
"loss": 0.8765, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011259048443976982, |
|
"loss": 0.8846, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"eval_loss": 0.8317739963531494, |
|
"eval_runtime": 1.3125, |
|
"eval_samples_per_second": 0.762, |
|
"eval_steps_per_second": 0.762, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00011221926622532945, |
|
"loss": 0.8754, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011184804801088906, |
|
"loss": 0.8824, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00011147682979644866, |
|
"loss": 0.888, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00011110561158200828, |
|
"loss": 0.8986, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001107343933675679, |
|
"loss": 0.8764, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001103631751531275, |
|
"loss": 0.8901, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010999195693868712, |
|
"loss": 0.8859, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010962073872424673, |
|
"loss": 0.8868, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00010924952050980633, |
|
"loss": 0.8827, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010887830229536595, |
|
"loss": 0.8911, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 0.8604958057403564, |
|
"eval_runtime": 1.2674, |
|
"eval_samples_per_second": 0.789, |
|
"eval_steps_per_second": 0.789, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00010850708408092557, |
|
"loss": 0.8888, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010813586586648517, |
|
"loss": 0.8878, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00010776464765204478, |
|
"loss": 0.8853, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0001073934294376044, |
|
"loss": 0.8831, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.000107022211223164, |
|
"loss": 0.8906, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010665099300872362, |
|
"loss": 0.8869, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00010627977479428323, |
|
"loss": 0.882, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010590855657984283, |
|
"loss": 0.8789, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010553733836540246, |
|
"loss": 0.8819, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00010516612015096207, |
|
"loss": 0.8765, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 0.8246310949325562, |
|
"eval_runtime": 1.3918, |
|
"eval_samples_per_second": 0.719, |
|
"eval_steps_per_second": 0.719, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010479490193652167, |
|
"loss": 0.8754, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00010442368372208128, |
|
"loss": 0.8876, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0001040524655076409, |
|
"loss": 0.8871, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010368124729320051, |
|
"loss": 0.879, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00010331002907876012, |
|
"loss": 0.8809, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010293881086431974, |
|
"loss": 0.8847, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010256759264987934, |
|
"loss": 0.8861, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00010219637443543895, |
|
"loss": 0.8758, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010182515622099858, |
|
"loss": 0.8781, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00010145393800655817, |
|
"loss": 0.8726, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 0.846712052822113, |
|
"eval_runtime": 1.3944, |
|
"eval_samples_per_second": 0.717, |
|
"eval_steps_per_second": 0.717, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00010108271979211779, |
|
"loss": 0.8774, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001007115015776774, |
|
"loss": 0.8755, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.000100340283363237, |
|
"loss": 0.8814, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.996906514879663e-05, |
|
"loss": 0.8694, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.959784693435624e-05, |
|
"loss": 0.8808, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 9.922662871991584e-05, |
|
"loss": 0.8824, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.885541050547545e-05, |
|
"loss": 0.8669, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 9.848419229103508e-05, |
|
"loss": 0.8702, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.811297407659468e-05, |
|
"loss": 0.8722, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.774175586215429e-05, |
|
"loss": 0.8844, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 0.8575500845909119, |
|
"eval_runtime": 1.622, |
|
"eval_samples_per_second": 0.617, |
|
"eval_steps_per_second": 0.617, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 9.73705376477139e-05, |
|
"loss": 0.877, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.699931943327353e-05, |
|
"loss": 0.8635, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.662810121883313e-05, |
|
"loss": 0.8729, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 9.625688300439274e-05, |
|
"loss": 0.865, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.588566478995236e-05, |
|
"loss": 0.8811, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.551444657551196e-05, |
|
"loss": 0.8729, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 9.514322836107158e-05, |
|
"loss": 0.8865, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.47720101466312e-05, |
|
"loss": 0.8747, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 9.44007919321908e-05, |
|
"loss": 0.8779, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.402957371775041e-05, |
|
"loss": 0.8764, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"eval_loss": 0.8142430186271667, |
|
"eval_runtime": 1.4953, |
|
"eval_samples_per_second": 0.669, |
|
"eval_steps_per_second": 0.669, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.365835550331003e-05, |
|
"loss": 0.8779, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 9.328713728886963e-05, |
|
"loss": 0.8702, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.291591907442925e-05, |
|
"loss": 0.8832, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.254470085998886e-05, |
|
"loss": 0.8718, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 9.217348264554846e-05, |
|
"loss": 0.8726, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.180226443110809e-05, |
|
"loss": 0.8677, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 9.14310462166677e-05, |
|
"loss": 0.8748, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.10598280022273e-05, |
|
"loss": 0.8694, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.068860978778691e-05, |
|
"loss": 0.8613, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 9.031739157334652e-05, |
|
"loss": 0.8687, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"eval_loss": 0.827206552028656, |
|
"eval_runtime": 1.6259, |
|
"eval_samples_per_second": 0.615, |
|
"eval_steps_per_second": 0.615, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.994617335890614e-05, |
|
"loss": 0.8735, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.957495514446575e-05, |
|
"loss": 0.8691, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 8.920373693002536e-05, |
|
"loss": 0.8605, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.883251871558496e-05, |
|
"loss": 0.8715, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.846130050114459e-05, |
|
"loss": 0.8541, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 8.80900822867042e-05, |
|
"loss": 0.8668, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.77188640722638e-05, |
|
"loss": 0.8745, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 8.734764585782342e-05, |
|
"loss": 0.8781, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.697642764338303e-05, |
|
"loss": 0.8782, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 8.660520942894264e-05, |
|
"loss": 0.8733, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_loss": 0.8250707983970642, |
|
"eval_runtime": 1.5297, |
|
"eval_samples_per_second": 0.654, |
|
"eval_steps_per_second": 0.654, |
|
"step": 11600 |
|
} |
|
], |
|
"max_steps": 16263, |
|
"num_train_epochs": 3, |
|
"total_flos": 7.693572394031514e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|