diff --git "a/log/debug_0.log" "b/log/debug_0.log" --- "a/log/debug_0.log" +++ "b/log/debug_0.log" @@ -60345,3 +60345,7461 @@ Use FP16 precision: False 03/05/2022 12:28:46 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) 03/05/2022 12:28:51 - INFO - codeparrot_training - Step 39999: {'lr': 0.00042298854844183476, 'samples': 20480000, 'steps': 39999, 'loss/train': 1.882041096687317} 03/05/2022 12:28:51 - INFO - codeparrot_training - Evaluating and saving model checkpoint +03/05/2022 12:29:06 - WARNING - huggingface_hub.repository - Several commits (8) will be pushed upstream. +03/05/2022 12:29:06 - WARNING - huggingface_hub.repository - The progress bars may be unreliable. +03/05/2022 12:29:29 - WARNING - huggingface_hub.repository - To https://huggingface.co/ncoop57/cm_code_clippy + eeb78f1..7a07393 glowing-puddle-3 -> glowing-puddle-3 + +03/05/2022 12:29:33 - INFO - codeparrot_training - Step 40000: {'lr': 0.0004229847172443866, 'samples': 20480512, 'steps': 40000, 'loss/train': 1.982017993927002} +03/05/2022 12:29:34 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/05/2022 12:29:38 - INFO - codeparrot_training - Step 40001: {'lr': 0.0004229808859689941, 'samples': 20481024, 'steps': 40001, 'loss/train': 1.7813818454742432} +03/05/2022 12:29:41 - INFO - codeparrot_training - Step 40002: {'lr': 0.0004229770546156592, 'samples': 20481536, 'steps': 40002, 'loss/train': 1.8193222284317017} +03/05/2022 12:29:42 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/05/2022 12:29:47 - INFO - codeparrot_training - Step 40003: {'lr': 0.00042297322318438345, 'samples': 20482048, 'steps': 40003, 'loss/train': 0.20749032497406006} +03/05/2022 12:29:50 - INFO - codeparrot_training - Step 40004: {'lr': 0.0004229693916751687, 'samples': 20482560, 'steps': 40004, 'loss/train': 0.14177154004573822} +03/05/2022 12:29:51 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/05/2022 12:29:55 - INFO - codeparrot_training - Step 40005: {'lr': 0.00042296556008801663, 'samples': 20483072, 'steps': 40005, 'loss/train': 1.8099523782730103} +03/05/2022 12:29:59 - INFO - codeparrot_training - Step 40006: {'lr': 0.0004229617284229289, 'samples': 20483584, 'steps': 40006, 'loss/train': 1.6427102088928223} +03/05/2022 12:29:59 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/05/2022 12:30:04 - INFO - codeparrot_training - Step 40007: {'lr': 0.00042295789667990726, 'samples': 20484096, 'steps': 40007, 'loss/train': 0.9013746976852417} +03/05/2022 12:30:07 - INFO - codeparrot_training - Step 40008: {'lr': 0.00042295406485895346, 'samples': 20484608, 'steps': 40008, 'loss/train': 1.6865116357803345} +03/05/2022 12:30:08 - INFO - codeparrot_training - Skipping example with length 382 (seq_length=1024) +03/05/2022 12:30:13 - INFO - codeparrot_training - Step 40009: {'lr': 0.0004229502329600692, 'samples': 20485120, 'steps': 40009, 'loss/train': 1.8931599855422974} +03/05/2022 12:30:16 - INFO - codeparrot_training - Step 40010: {'lr': 0.0004229464009832563, 'samples': 20485632, 'steps': 40010, 'loss/train': 2.002829074859619} +03/05/2022 12:30:16 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/05/2022 12:30:21 - INFO - codeparrot_training - Step 40011: {'lr': 0.0004229425689285163, 'samples': 20486144, 'steps': 40011, 'loss/train': 1.4560149908065796} +03/05/2022 12:30:24 - INFO - codeparrot_training - Step 40012: {'lr': 0.00042293873679585125, 'samples': 20486656, 'steps': 40012, 'loss/train': 1.9236010313034058} +03/05/2022 12:30:25 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/05/2022 12:30:30 - INFO - codeparrot_training - Step 40013: {'lr': 0.00042293490458526257, 'samples': 20487168, 'steps': 40013, 'loss/train': 1.3040850162506104} +03/05/2022 12:30:33 - INFO - codeparrot_training - Step 40014: {'lr': 0.0004229310722967521, 'samples': 20487680, 'steps': 40014, 'loss/train': 1.6971408128738403} +03/05/2022 12:30:33 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/05/2022 12:30:38 - INFO - codeparrot_training - Step 40015: {'lr': 0.00042292723993032157, 'samples': 20488192, 'steps': 40015, 'loss/train': 7.169426918029785} +03/05/2022 12:30:42 - INFO - codeparrot_training - Step 40016: {'lr': 0.0004229234074859726, 'samples': 20488704, 'steps': 40016, 'loss/train': 1.8417553901672363} +03/05/2022 12:30:43 - INFO - codeparrot_training - Skipping example with length 938 (seq_length=1024) +03/05/2022 12:30:47 - INFO - codeparrot_training - Step 40017: {'lr': 0.00042291957496370713, 'samples': 20489216, 'steps': 40017, 'loss/train': 1.4970208406448364} +03/05/2022 12:30:50 - INFO - codeparrot_training - Step 40018: {'lr': 0.0004229157423635267, 'samples': 20489728, 'steps': 40018, 'loss/train': 1.6064833402633667} +03/05/2022 12:30:51 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/05/2022 12:30:55 - INFO - codeparrot_training - Step 40019: {'lr': 0.00042291190968543315, 'samples': 20490240, 'steps': 40019, 'loss/train': 1.817595362663269} +03/05/2022 12:30:58 - INFO - codeparrot_training - Step 40020: {'lr': 0.0004229080769294281, 'samples': 20490752, 'steps': 40020, 'loss/train': 2.1051650047302246} +03/05/2022 12:30:59 - INFO - codeparrot_training - Skipping example with length 475 (seq_length=1024) +03/05/2022 12:31:04 - INFO - codeparrot_training - Step 40021: {'lr': 0.00042290424409551343, 'samples': 20491264, 'steps': 40021, 'loss/train': 2.0216541290283203} +03/05/2022 12:31:07 - INFO - codeparrot_training - Step 40022: {'lr': 0.0004229004111836907, 'samples': 20491776, 'steps': 40022, 'loss/train': 0.9492252469062805} +03/05/2022 12:31:08 - INFO - codeparrot_training - Skipping example with length 484 (seq_length=1024) +03/05/2022 12:31:12 - INFO - codeparrot_training - Step 40023: {'lr': 0.0004228965781939617, 'samples': 20492288, 'steps': 40023, 'loss/train': 1.4902007579803467} +03/05/2022 12:31:15 - INFO - codeparrot_training - Step 40024: {'lr': 0.00042289274512632817, 'samples': 20492800, 'steps': 40024, 'loss/train': 1.2674927711486816} +03/05/2022 12:31:16 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/05/2022 12:31:21 - INFO - codeparrot_training - Step 40025: {'lr': 0.00042288891198079194, 'samples': 20493312, 'steps': 40025, 'loss/train': 1.5982645750045776} +03/05/2022 12:31:24 - INFO - codeparrot_training - Step 40026: {'lr': 0.00042288507875735455, 'samples': 20493824, 'steps': 40026, 'loss/train': 1.1172744035720825} +03/05/2022 12:31:25 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/05/2022 12:31:29 - INFO - codeparrot_training - Step 40027: {'lr': 0.0004228812454560178, 'samples': 20494336, 'steps': 40027, 'loss/train': 1.5279732942581177} +03/05/2022 12:31:32 - INFO - codeparrot_training - Step 40028: {'lr': 0.0004228774120767835, 'samples': 20494848, 'steps': 40028, 'loss/train': 1.5613210201263428} +03/05/2022 12:31:33 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/05/2022 12:31:37 - INFO - codeparrot_training - Step 40029: {'lr': 0.00042287357861965326, 'samples': 20495360, 'steps': 40029, 'loss/train': 2.1684770584106445} +03/05/2022 12:31:41 - INFO - codeparrot_training - Step 40030: {'lr': 0.00042286974508462885, 'samples': 20495872, 'steps': 40030, 'loss/train': 1.9535554647445679} +03/05/2022 12:31:41 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/05/2022 12:31:46 - INFO - codeparrot_training - Step 40031: {'lr': 0.000422865911471712, 'samples': 20496384, 'steps': 40031, 'loss/train': 1.8874787092208862} +03/05/2022 12:31:49 - INFO - codeparrot_training - Step 40032: {'lr': 0.00042286207778090447, 'samples': 20496896, 'steps': 40032, 'loss/train': 1.6239120960235596} +03/05/2022 12:31:49 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/05/2022 12:31:54 - INFO - codeparrot_training - Step 40033: {'lr': 0.00042285824401220787, 'samples': 20497408, 'steps': 40033, 'loss/train': 1.7153030633926392} +03/05/2022 12:31:57 - INFO - codeparrot_training - Step 40034: {'lr': 0.0004228544101656241, 'samples': 20497920, 'steps': 40034, 'loss/train': 1.1692639589309692} +03/05/2022 12:31:58 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/05/2022 12:32:03 - INFO - codeparrot_training - Step 40035: {'lr': 0.00042285057624115473, 'samples': 20498432, 'steps': 40035, 'loss/train': 1.8297556638717651} +03/05/2022 12:32:06 - INFO - codeparrot_training - Step 40036: {'lr': 0.0004228467422388016, 'samples': 20498944, 'steps': 40036, 'loss/train': 2.065459728240967} +03/05/2022 12:32:07 - INFO - codeparrot_training - Skipping example with length 217 (seq_length=1024) +03/05/2022 12:32:11 - INFO - codeparrot_training - Step 40037: {'lr': 0.0004228429081585664, 'samples': 20499456, 'steps': 40037, 'loss/train': 1.5635398626327515} +03/05/2022 12:32:14 - INFO - codeparrot_training - Step 40038: {'lr': 0.00042283907400045084, 'samples': 20499968, 'steps': 40038, 'loss/train': 2.088895082473755} +03/05/2022 12:32:15 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/05/2022 12:32:20 - INFO - codeparrot_training - Step 40039: {'lr': 0.0004228352397644567, 'samples': 20500480, 'steps': 40039, 'loss/train': 0.4609100818634033} +03/05/2022 12:32:23 - INFO - codeparrot_training - Step 40040: {'lr': 0.0004228314054505856, 'samples': 20500992, 'steps': 40040, 'loss/train': 1.338619589805603} +03/05/2022 12:32:24 - INFO - codeparrot_training - Skipping example with length 898 (seq_length=1024) +03/05/2022 12:32:28 - INFO - codeparrot_training - Step 40041: {'lr': 0.0004228275710588394, 'samples': 20501504, 'steps': 40041, 'loss/train': 2.011685848236084} +03/05/2022 12:32:31 - INFO - codeparrot_training - Step 40042: {'lr': 0.0004228237365892197, 'samples': 20502016, 'steps': 40042, 'loss/train': 0.653678834438324} +03/05/2022 12:32:32 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/05/2022 12:32:36 - INFO - codeparrot_training - Step 40043: {'lr': 0.00042281990204172837, 'samples': 20502528, 'steps': 40043, 'loss/train': 2.355133295059204} +03/05/2022 12:32:40 - INFO - codeparrot_training - Step 40044: {'lr': 0.000422816067416367, 'samples': 20503040, 'steps': 40044, 'loss/train': 2.189216375350952} +03/05/2022 12:32:40 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/05/2022 12:32:45 - INFO - codeparrot_training - Step 40045: {'lr': 0.00042281223271313734, 'samples': 20503552, 'steps': 40045, 'loss/train': 1.884752869606018} +03/05/2022 12:32:48 - INFO - codeparrot_training - Step 40046: {'lr': 0.0004228083979320412, 'samples': 20504064, 'steps': 40046, 'loss/train': 1.824157953262329} +03/05/2022 12:32:49 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/05/2022 12:32:53 - INFO - codeparrot_training - Step 40047: {'lr': 0.00042280456307308034, 'samples': 20504576, 'steps': 40047, 'loss/train': 2.1382288932800293} +03/05/2022 12:32:57 - INFO - codeparrot_training - Step 40048: {'lr': 0.0004228007281362563, 'samples': 20505088, 'steps': 40048, 'loss/train': 2.0494191646575928} +03/05/2022 12:32:58 - INFO - codeparrot_training - Skipping example with length 396 (seq_length=1024) +03/05/2022 12:33:02 - INFO - codeparrot_training - Step 40049: {'lr': 0.0004227968931215709, 'samples': 20505600, 'steps': 40049, 'loss/train': 1.1707737445831299} +03/05/2022 12:33:05 - INFO - codeparrot_training - Step 40050: {'lr': 0.000422793058029026, 'samples': 20506112, 'steps': 40050, 'loss/train': 1.9375284910202026} +03/05/2022 12:33:06 - INFO - codeparrot_training - Skipping example with length 993 (seq_length=1024) +03/05/2022 12:33:10 - INFO - codeparrot_training - Step 40051: {'lr': 0.0004227892228586231, 'samples': 20506624, 'steps': 40051, 'loss/train': 2.786980628967285} +03/05/2022 12:33:14 - INFO - codeparrot_training - Step 40052: {'lr': 0.0004227853876103641, 'samples': 20507136, 'steps': 40052, 'loss/train': 1.707208514213562} +03/05/2022 12:33:15 - INFO - codeparrot_training - Skipping example with length 945 (seq_length=1024) +03/05/2022 12:33:19 - INFO - codeparrot_training - Step 40053: {'lr': 0.0004227815522842507, 'samples': 20507648, 'steps': 40053, 'loss/train': 1.6560243368148804} +03/05/2022 12:33:22 - INFO - codeparrot_training - Step 40054: {'lr': 0.00042277771688028457, 'samples': 20508160, 'steps': 40054, 'loss/train': 1.6504963636398315} +03/05/2022 12:33:23 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/05/2022 12:33:28 - INFO - codeparrot_training - Step 40055: {'lr': 0.0004227738813984675, 'samples': 20508672, 'steps': 40055, 'loss/train': 1.3405498266220093} +03/05/2022 12:33:31 - INFO - codeparrot_training - Step 40056: {'lr': 0.0004227700458388011, 'samples': 20509184, 'steps': 40056, 'loss/train': 1.5012426376342773} +03/05/2022 12:33:32 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/05/2022 12:33:36 - INFO - codeparrot_training - Step 40057: {'lr': 0.00042276621020128724, 'samples': 20509696, 'steps': 40057, 'loss/train': 1.8666256666183472} +03/05/2022 12:33:39 - INFO - codeparrot_training - Step 40058: {'lr': 0.0004227623744859276, 'samples': 20510208, 'steps': 40058, 'loss/train': 2.1589877605438232} +03/05/2022 12:33:40 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/05/2022 12:33:44 - INFO - codeparrot_training - Step 40059: {'lr': 0.0004227585386927239, 'samples': 20510720, 'steps': 40059, 'loss/train': 1.2905285358428955} +03/05/2022 12:33:47 - INFO - codeparrot_training - Step 40060: {'lr': 0.0004227547028216778, 'samples': 20511232, 'steps': 40060, 'loss/train': 1.4985536336898804} +03/05/2022 12:33:48 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/05/2022 12:33:53 - INFO - codeparrot_training - Step 40061: {'lr': 0.00042275086687279116, 'samples': 20511744, 'steps': 40061, 'loss/train': 1.3976259231567383} +03/05/2022 12:33:56 - INFO - codeparrot_training - Step 40062: {'lr': 0.0004227470308460657, 'samples': 20512256, 'steps': 40062, 'loss/train': 1.9555387496948242} +03/05/2022 12:33:57 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/05/2022 12:34:01 - INFO - codeparrot_training - Step 40063: {'lr': 0.000422743194741503, 'samples': 20512768, 'steps': 40063, 'loss/train': 0.6598406434059143} +03/05/2022 12:34:04 - INFO - codeparrot_training - Step 40064: {'lr': 0.00042273935855910487, 'samples': 20513280, 'steps': 40064, 'loss/train': 1.7276569604873657} +03/05/2022 12:34:05 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/05/2022 12:34:09 - INFO - codeparrot_training - Step 40065: {'lr': 0.00042273552229887313, 'samples': 20513792, 'steps': 40065, 'loss/train': 1.597737431526184} +03/05/2022 12:34:13 - INFO - codeparrot_training - Step 40066: {'lr': 0.00042273168596080934, 'samples': 20514304, 'steps': 40066, 'loss/train': 2.3827953338623047} +03/05/2022 12:34:13 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/05/2022 12:34:18 - INFO - codeparrot_training - Step 40067: {'lr': 0.0004227278495449154, 'samples': 20514816, 'steps': 40067, 'loss/train': 1.7025450468063354} +03/05/2022 12:34:21 - INFO - codeparrot_training - Step 40068: {'lr': 0.0004227240130511929, 'samples': 20515328, 'steps': 40068, 'loss/train': 1.2661598920822144} +03/05/2022 12:34:22 - INFO - codeparrot_training - Skipping example with length 338 (seq_length=1024) +03/05/2022 12:34:26 - INFO - codeparrot_training - Step 40069: {'lr': 0.0004227201764796437, 'samples': 20515840, 'steps': 40069, 'loss/train': 1.8638112545013428} +03/05/2022 12:34:29 - INFO - codeparrot_training - Step 40070: {'lr': 0.00042271633983026935, 'samples': 20516352, 'steps': 40070, 'loss/train': 1.9659042358398438} +03/05/2022 12:34:30 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/05/2022 12:34:35 - INFO - codeparrot_training - Step 40071: {'lr': 0.00042271250310307174, 'samples': 20516864, 'steps': 40071, 'loss/train': 1.7575346231460571} +03/05/2022 12:34:38 - INFO - codeparrot_training - Step 40072: {'lr': 0.0004227086662980525, 'samples': 20517376, 'steps': 40072, 'loss/train': 2.0858993530273438} +03/05/2022 12:34:39 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/05/2022 12:34:43 - INFO - codeparrot_training - Step 40073: {'lr': 0.00042270482941521347, 'samples': 20517888, 'steps': 40073, 'loss/train': 1.6693480014801025} +03/05/2022 12:34:46 - INFO - codeparrot_training - Step 40074: {'lr': 0.0004227009924545563, 'samples': 20518400, 'steps': 40074, 'loss/train': 1.7536224126815796} +03/05/2022 12:34:47 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/05/2022 12:34:52 - INFO - codeparrot_training - Step 40075: {'lr': 0.00042269715541608265, 'samples': 20518912, 'steps': 40075, 'loss/train': 1.4964795112609863} +03/05/2022 12:34:55 - INFO - codeparrot_training - Step 40076: {'lr': 0.0004226933182997944, 'samples': 20519424, 'steps': 40076, 'loss/train': 1.1757622957229614} +03/05/2022 12:34:56 - INFO - codeparrot_training - Skipping example with length 125 (seq_length=1024) +03/05/2022 12:35:00 - INFO - codeparrot_training - Step 40077: {'lr': 0.00042268948110569317, 'samples': 20519936, 'steps': 40077, 'loss/train': 1.8408674001693726} +03/05/2022 12:35:04 - INFO - codeparrot_training - Step 40078: {'lr': 0.00042268564383378073, 'samples': 20520448, 'steps': 40078, 'loss/train': 1.9779270887374878} +03/05/2022 12:35:06 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/05/2022 12:35:09 - INFO - codeparrot_training - Step 40079: {'lr': 0.00042268180648405884, 'samples': 20520960, 'steps': 40079, 'loss/train': 1.5238009691238403} +03/05/2022 12:35:12 - INFO - codeparrot_training - Step 40080: {'lr': 0.00042267796905652924, 'samples': 20521472, 'steps': 40080, 'loss/train': 1.7198408842086792} +03/05/2022 12:35:15 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/05/2022 12:35:18 - INFO - codeparrot_training - Step 40081: {'lr': 0.0004226741315511935, 'samples': 20521984, 'steps': 40081, 'loss/train': 1.4137829542160034} +03/05/2022 12:35:21 - INFO - codeparrot_training - Step 40082: {'lr': 0.00042267029396805345, 'samples': 20522496, 'steps': 40082, 'loss/train': 1.7004022598266602} +03/05/2022 12:35:23 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/05/2022 12:35:26 - INFO - codeparrot_training - Step 40083: {'lr': 0.0004226664563071109, 'samples': 20523008, 'steps': 40083, 'loss/train': 1.3843481540679932} +03/05/2022 12:35:29 - INFO - codeparrot_training - Step 40084: {'lr': 0.0004226626185683675, 'samples': 20523520, 'steps': 40084, 'loss/train': 1.5956250429153442} +03/05/2022 12:35:32 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/05/2022 12:35:35 - INFO - codeparrot_training - Step 40085: {'lr': 0.00042265878075182497, 'samples': 20524032, 'steps': 40085, 'loss/train': 3.077409029006958} +03/05/2022 12:35:38 - INFO - codeparrot_training - Step 40086: {'lr': 0.0004226549428574851, 'samples': 20524544, 'steps': 40086, 'loss/train': 1.7898083925247192} +03/05/2022 12:35:40 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/05/2022 12:35:43 - INFO - codeparrot_training - Step 40087: {'lr': 0.0004226511048853495, 'samples': 20525056, 'steps': 40087, 'loss/train': 1.8427422046661377} +03/05/2022 12:35:46 - INFO - codeparrot_training - Step 40088: {'lr': 0.00042264726683542, 'samples': 20525568, 'steps': 40088, 'loss/train': 1.1510858535766602} +03/05/2022 12:35:48 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/05/2022 12:35:51 - INFO - codeparrot_training - Step 40089: {'lr': 0.00042264342870769835, 'samples': 20526080, 'steps': 40089, 'loss/train': 1.1904371976852417} +03/05/2022 12:35:55 - INFO - codeparrot_training - Step 40090: {'lr': 0.0004226395905021862, 'samples': 20526592, 'steps': 40090, 'loss/train': 3.0607223510742188} +03/05/2022 12:35:57 - INFO - codeparrot_training - Skipping example with length 274 (seq_length=1024) +03/05/2022 12:36:00 - INFO - codeparrot_training - Step 40091: {'lr': 0.0004226357522188853, 'samples': 20527104, 'steps': 40091, 'loss/train': 1.9740082025527954} +03/05/2022 12:36:03 - INFO - codeparrot_training - Step 40092: {'lr': 0.0004226319138577974, 'samples': 20527616, 'steps': 40092, 'loss/train': 0.8607341647148132} +03/05/2022 12:36:05 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/05/2022 12:36:08 - INFO - codeparrot_training - Step 40093: {'lr': 0.0004226280754189243, 'samples': 20528128, 'steps': 40093, 'loss/train': 2.1337058544158936} +03/05/2022 12:36:11 - INFO - codeparrot_training - Step 40094: {'lr': 0.0004226242369022676, 'samples': 20528640, 'steps': 40094, 'loss/train': 1.6986380815505981} +03/05/2022 12:36:14 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/05/2022 12:36:17 - INFO - codeparrot_training - Step 40095: {'lr': 0.00042262039830782906, 'samples': 20529152, 'steps': 40095, 'loss/train': 1.4544637203216553} +03/05/2022 12:36:20 - INFO - codeparrot_training - Step 40096: {'lr': 0.00042261655963561043, 'samples': 20529664, 'steps': 40096, 'loss/train': 0.5990719199180603} +03/05/2022 12:36:22 - INFO - codeparrot_training - Skipping example with length 798 (seq_length=1024) +03/05/2022 12:36:25 - INFO - codeparrot_training - Step 40097: {'lr': 0.0004226127208856134, 'samples': 20530176, 'steps': 40097, 'loss/train': 1.8629103899002075} +03/05/2022 12:36:28 - INFO - codeparrot_training - Step 40098: {'lr': 0.0004226088820578399, 'samples': 20530688, 'steps': 40098, 'loss/train': 2.90800142288208} +03/05/2022 12:36:30 - INFO - codeparrot_training - Skipping example with length 564 (seq_length=1024) +03/05/2022 12:36:34 - INFO - codeparrot_training - Step 40099: {'lr': 0.00042260504315229136, 'samples': 20531200, 'steps': 40099, 'loss/train': 2.4964165687561035} +03/05/2022 12:36:37 - INFO - codeparrot_training - Step 40100: {'lr': 0.00042260120416896975, 'samples': 20531712, 'steps': 40100, 'loss/train': 1.4463250637054443} +03/05/2022 12:36:39 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/05/2022 12:36:42 - INFO - codeparrot_training - Step 40101: {'lr': 0.0004225973651078766, 'samples': 20532224, 'steps': 40101, 'loss/train': 2.009486198425293} +03/05/2022 12:36:45 - INFO - codeparrot_training - Step 40102: {'lr': 0.0004225935259690138, 'samples': 20532736, 'steps': 40102, 'loss/train': 6.392016410827637} +03/05/2022 12:36:49 - INFO - codeparrot_training - Step 40103: {'lr': 0.00042258968675238295, 'samples': 20533248, 'steps': 40103, 'loss/train': 1.937026858329773} +03/05/2022 12:36:49 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/05/2022 12:36:54 - INFO - codeparrot_training - Step 40104: {'lr': 0.00042258584745798595, 'samples': 20533760, 'steps': 40104, 'loss/train': 2.071216344833374} +03/05/2022 12:36:57 - INFO - codeparrot_training - Step 40105: {'lr': 0.00042258200808582434, 'samples': 20534272, 'steps': 40105, 'loss/train': 1.7090741395950317} +03/05/2022 12:36:57 - INFO - codeparrot_training - Skipping example with length 983 (seq_length=1024) +03/05/2022 12:37:02 - INFO - codeparrot_training - Step 40106: {'lr': 0.00042257816863590006, 'samples': 20534784, 'steps': 40106, 'loss/train': 2.1346094608306885} +03/05/2022 12:37:05 - INFO - codeparrot_training - Step 40107: {'lr': 0.0004225743291082146, 'samples': 20535296, 'steps': 40107, 'loss/train': 1.561100721359253} +03/05/2022 12:37:06 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/05/2022 12:37:11 - INFO - codeparrot_training - Step 40108: {'lr': 0.0004225704895027699, 'samples': 20535808, 'steps': 40108, 'loss/train': 1.9655472040176392} +03/05/2022 12:37:14 - INFO - codeparrot_training - Step 40109: {'lr': 0.0004225666498195675, 'samples': 20536320, 'steps': 40109, 'loss/train': 1.828470230102539} +03/05/2022 12:37:15 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/05/2022 12:37:19 - INFO - codeparrot_training - Step 40110: {'lr': 0.0004225628100586093, 'samples': 20536832, 'steps': 40110, 'loss/train': 2.2743496894836426} +03/05/2022 12:37:22 - INFO - codeparrot_training - Step 40111: {'lr': 0.00042255897021989695, 'samples': 20537344, 'steps': 40111, 'loss/train': 0.999409019947052} +03/05/2022 12:37:23 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/05/2022 12:37:28 - INFO - codeparrot_training - Step 40112: {'lr': 0.0004225551303034322, 'samples': 20537856, 'steps': 40112, 'loss/train': 3.1471493244171143} +03/05/2022 12:37:31 - INFO - codeparrot_training - Step 40113: {'lr': 0.00042255129030921673, 'samples': 20538368, 'steps': 40113, 'loss/train': 0.32940083742141724} +03/05/2022 12:37:32 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/05/2022 12:37:36 - INFO - codeparrot_training - Step 40114: {'lr': 0.0004225474502372524, 'samples': 20538880, 'steps': 40114, 'loss/train': 1.0865380764007568} +03/05/2022 12:37:39 - INFO - codeparrot_training - Step 40115: {'lr': 0.00042254361008754076, 'samples': 20539392, 'steps': 40115, 'loss/train': 1.8155457973480225} +03/05/2022 12:37:44 - INFO - codeparrot_training - Step 40116: {'lr': 0.0004225397698600837, 'samples': 20539904, 'steps': 40116, 'loss/train': 2.086334228515625} +03/05/2022 12:37:48 - INFO - codeparrot_training - Step 40117: {'lr': 0.0004225359295548828, 'samples': 20540416, 'steps': 40117, 'loss/train': 1.52240788936615} +03/05/2022 12:37:49 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/05/2022 12:37:53 - INFO - codeparrot_training - Step 40118: {'lr': 0.0004225320891719399, 'samples': 20540928, 'steps': 40118, 'loss/train': 1.6929336786270142} +03/05/2022 12:37:56 - INFO - codeparrot_training - Step 40119: {'lr': 0.0004225282487112567, 'samples': 20541440, 'steps': 40119, 'loss/train': 2.1643548011779785} +03/05/2022 12:37:57 - INFO - codeparrot_training - Skipping example with length 916 (seq_length=1024) +03/05/2022 12:38:01 - INFO - codeparrot_training - Step 40120: {'lr': 0.000422524408172835, 'samples': 20541952, 'steps': 40120, 'loss/train': 1.766913652420044} +03/05/2022 12:38:05 - INFO - codeparrot_training - Step 40121: {'lr': 0.0004225205675566765, 'samples': 20542464, 'steps': 40121, 'loss/train': 1.7677667140960693} +03/05/2022 12:38:05 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/05/2022 12:38:10 - INFO - codeparrot_training - Step 40122: {'lr': 0.00042251672686278275, 'samples': 20542976, 'steps': 40122, 'loss/train': 1.4917700290679932} +03/05/2022 12:38:13 - INFO - codeparrot_training - Step 40123: {'lr': 0.0004225128860911557, 'samples': 20543488, 'steps': 40123, 'loss/train': 1.9835234880447388} +03/05/2022 12:38:14 - INFO - codeparrot_training - Skipping example with length 369 (seq_length=1024) +03/05/2022 12:38:18 - INFO - codeparrot_training - Step 40124: {'lr': 0.00042250904524179697, 'samples': 20544000, 'steps': 40124, 'loss/train': 2.0760698318481445} +03/05/2022 12:38:21 - INFO - codeparrot_training - Step 40125: {'lr': 0.00042250520431470827, 'samples': 20544512, 'steps': 40125, 'loss/train': 2.111795425415039} +03/05/2022 12:38:22 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/05/2022 12:38:27 - INFO - codeparrot_training - Step 40126: {'lr': 0.00042250136330989154, 'samples': 20545024, 'steps': 40126, 'loss/train': 1.3340188264846802} +03/05/2022 12:38:30 - INFO - codeparrot_training - Step 40127: {'lr': 0.00042249752222734826, 'samples': 20545536, 'steps': 40127, 'loss/train': 0.8474777936935425} +03/05/2022 12:38:31 - INFO - codeparrot_training - Skipping example with length 681 (seq_length=1024) +03/05/2022 12:38:35 - INFO - codeparrot_training - Step 40128: {'lr': 0.0004224936810670803, 'samples': 20546048, 'steps': 40128, 'loss/train': 1.2117540836334229} +03/05/2022 12:38:38 - INFO - codeparrot_training - Step 40129: {'lr': 0.0004224898398290893, 'samples': 20546560, 'steps': 40129, 'loss/train': 1.5462563037872314} +03/05/2022 12:38:39 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/05/2022 12:38:44 - INFO - codeparrot_training - Step 40130: {'lr': 0.0004224859985133771, 'samples': 20547072, 'steps': 40130, 'loss/train': 0.8715566992759705} +03/05/2022 12:38:47 - INFO - codeparrot_training - Step 40131: {'lr': 0.0004224821571199453, 'samples': 20547584, 'steps': 40131, 'loss/train': 1.8059865236282349} +03/05/2022 12:38:48 - INFO - codeparrot_training - Skipping example with length 213 (seq_length=1024) +03/05/2022 12:38:52 - INFO - codeparrot_training - Step 40132: {'lr': 0.0004224783156487958, 'samples': 20548096, 'steps': 40132, 'loss/train': 2.555755853652954} +03/05/2022 12:38:55 - INFO - codeparrot_training - Step 40133: {'lr': 0.0004224744740999302, 'samples': 20548608, 'steps': 40133, 'loss/train': 1.2180461883544922} +03/05/2022 12:38:56 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/05/2022 12:39:00 - INFO - codeparrot_training - Step 40134: {'lr': 0.0004224706324733502, 'samples': 20549120, 'steps': 40134, 'loss/train': 1.474763035774231} +03/05/2022 12:39:04 - INFO - codeparrot_training - Step 40135: {'lr': 0.00042246679076905763, 'samples': 20549632, 'steps': 40135, 'loss/train': 1.5892252922058105} +03/05/2022 12:39:04 - INFO - codeparrot_training - Skipping example with length 879 (seq_length=1024) +03/05/2022 12:39:09 - INFO - codeparrot_training - Step 40136: {'lr': 0.00042246294898705416, 'samples': 20550144, 'steps': 40136, 'loss/train': 2.395535945892334} +03/05/2022 12:39:12 - INFO - codeparrot_training - Step 40137: {'lr': 0.0004224591071273416, 'samples': 20550656, 'steps': 40137, 'loss/train': 2.1740994453430176} +03/05/2022 12:39:13 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/05/2022 12:39:17 - INFO - codeparrot_training - Step 40138: {'lr': 0.00042245526518992164, 'samples': 20551168, 'steps': 40138, 'loss/train': 1.3885573148727417} +03/05/2022 12:39:21 - INFO - codeparrot_training - Step 40139: {'lr': 0.0004224514231747959, 'samples': 20551680, 'steps': 40139, 'loss/train': 0.794738233089447} +03/05/2022 12:39:21 - INFO - codeparrot_training - Skipping example with length 619 (seq_length=1024) +03/05/2022 12:39:26 - INFO - codeparrot_training - Step 40140: {'lr': 0.00042244758108196635, 'samples': 20552192, 'steps': 40140, 'loss/train': 1.0422838926315308} +03/05/2022 12:39:29 - INFO - codeparrot_training - Step 40141: {'lr': 0.00042244373891143453, 'samples': 20552704, 'steps': 40141, 'loss/train': 1.9386508464813232} +03/05/2022 12:39:29 - INFO - codeparrot_training - Skipping example with length 177 (seq_length=1024) +03/05/2022 12:39:34 - INFO - codeparrot_training - Step 40142: {'lr': 0.00042243989666320217, 'samples': 20553216, 'steps': 40142, 'loss/train': 1.5855708122253418} +03/05/2022 12:39:37 - INFO - codeparrot_training - Step 40143: {'lr': 0.00042243605433727106, 'samples': 20553728, 'steps': 40143, 'loss/train': 1.4137191772460938} +03/05/2022 12:39:38 - INFO - codeparrot_training - Skipping example with length 302 (seq_length=1024) +03/05/2022 12:39:43 - INFO - codeparrot_training - Step 40144: {'lr': 0.0004224322119336429, 'samples': 20554240, 'steps': 40144, 'loss/train': 1.9725868701934814} +03/05/2022 12:39:46 - INFO - codeparrot_training - Step 40145: {'lr': 0.0004224283694523195, 'samples': 20554752, 'steps': 40145, 'loss/train': 1.1569421291351318} +03/05/2022 12:39:46 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/05/2022 12:39:51 - INFO - codeparrot_training - Step 40146: {'lr': 0.0004224245268933025, 'samples': 20555264, 'steps': 40146, 'loss/train': 1.5679454803466797} +03/05/2022 12:39:54 - INFO - codeparrot_training - Step 40147: {'lr': 0.0004224206842565937, 'samples': 20555776, 'steps': 40147, 'loss/train': 1.7620419263839722} +03/05/2022 12:39:54 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/05/2022 12:40:00 - INFO - codeparrot_training - Step 40148: {'lr': 0.0004224168415421948, 'samples': 20556288, 'steps': 40148, 'loss/train': 1.6999926567077637} +03/05/2022 12:40:03 - INFO - codeparrot_training - Step 40149: {'lr': 0.0004224129987501075, 'samples': 20556800, 'steps': 40149, 'loss/train': 1.3701801300048828} +03/05/2022 12:40:03 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/05/2022 12:40:08 - INFO - codeparrot_training - Step 40150: {'lr': 0.0004224091558803337, 'samples': 20557312, 'steps': 40150, 'loss/train': 0.7844430804252625} +03/05/2022 12:40:11 - INFO - codeparrot_training - Step 40151: {'lr': 0.0004224053129328748, 'samples': 20557824, 'steps': 40151, 'loss/train': 0.7366156578063965} +03/05/2022 12:40:11 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/05/2022 12:40:16 - INFO - codeparrot_training - Step 40152: {'lr': 0.0004224014699077329, 'samples': 20558336, 'steps': 40152, 'loss/train': 1.378142237663269} +03/05/2022 12:40:19 - INFO - codeparrot_training - Step 40153: {'lr': 0.00042239762680490944, 'samples': 20558848, 'steps': 40153, 'loss/train': 1.6801011562347412} +03/05/2022 12:40:20 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/05/2022 12:40:25 - INFO - codeparrot_training - Step 40154: {'lr': 0.00042239378362440627, 'samples': 20559360, 'steps': 40154, 'loss/train': 0.9654422998428345} +03/05/2022 12:40:28 - INFO - codeparrot_training - Step 40155: {'lr': 0.0004223899403662251, 'samples': 20559872, 'steps': 40155, 'loss/train': 2.0935287475585938} +03/05/2022 12:40:28 - INFO - codeparrot_training - Skipping example with length 677 (seq_length=1024) +03/05/2022 12:40:33 - INFO - codeparrot_training - Step 40156: {'lr': 0.0004223860970303678, 'samples': 20560384, 'steps': 40156, 'loss/train': 1.410354495048523} +03/05/2022 12:40:36 - INFO - codeparrot_training - Step 40157: {'lr': 0.00042238225361683593, 'samples': 20560896, 'steps': 40157, 'loss/train': 1.3500657081604004} +03/05/2022 12:40:36 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/05/2022 12:40:42 - INFO - codeparrot_training - Step 40158: {'lr': 0.00042237841012563126, 'samples': 20561408, 'steps': 40158, 'loss/train': 1.7577975988388062} +03/05/2022 12:40:45 - INFO - codeparrot_training - Step 40159: {'lr': 0.00042237456655675555, 'samples': 20561920, 'steps': 40159, 'loss/train': 2.228916883468628} +03/05/2022 12:40:45 - INFO - codeparrot_training - Skipping example with length 289 (seq_length=1024) +03/05/2022 12:40:50 - INFO - codeparrot_training - Step 40160: {'lr': 0.0004223707229102105, 'samples': 20562432, 'steps': 40160, 'loss/train': 1.876413106918335} +03/05/2022 12:40:53 - INFO - codeparrot_training - Step 40161: {'lr': 0.0004223668791859979, 'samples': 20562944, 'steps': 40161, 'loss/train': 2.4925155639648438} +03/05/2022 12:40:53 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/05/2022 12:40:59 - INFO - codeparrot_training - Step 40162: {'lr': 0.00042236303538411934, 'samples': 20563456, 'steps': 40162, 'loss/train': 1.9710724353790283} +03/05/2022 12:41:02 - INFO - codeparrot_training - Step 40163: {'lr': 0.0004223591915045768, 'samples': 20563968, 'steps': 40163, 'loss/train': 1.6714404821395874} +03/05/2022 12:41:02 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/05/2022 12:41:07 - INFO - codeparrot_training - Step 40164: {'lr': 0.0004223553475473718, 'samples': 20564480, 'steps': 40164, 'loss/train': 1.1864397525787354} +03/05/2022 12:41:10 - INFO - codeparrot_training - Step 40165: {'lr': 0.00042235150351250617, 'samples': 20564992, 'steps': 40165, 'loss/train': 1.390030026435852} +03/05/2022 12:41:10 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/05/2022 12:41:15 - INFO - codeparrot_training - Step 40166: {'lr': 0.00042234765939998156, 'samples': 20565504, 'steps': 40166, 'loss/train': 1.4653346538543701} +03/05/2022 12:41:18 - INFO - codeparrot_training - Step 40167: {'lr': 0.00042234381520979983, 'samples': 20566016, 'steps': 40167, 'loss/train': 1.2761303186416626} +03/05/2022 12:41:24 - INFO - codeparrot_training - Step 40168: {'lr': 0.0004223399709419625, 'samples': 20566528, 'steps': 40168, 'loss/train': 1.78235924243927} +03/05/2022 12:41:27 - INFO - codeparrot_training - Step 40169: {'lr': 0.0004223361265964716, 'samples': 20567040, 'steps': 40169, 'loss/train': 1.2955230474472046} +03/05/2022 12:41:27 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/05/2022 12:41:32 - INFO - codeparrot_training - Step 40170: {'lr': 0.0004223322821733286, 'samples': 20567552, 'steps': 40170, 'loss/train': 2.638183355331421} +03/05/2022 12:41:35 - INFO - codeparrot_training - Step 40171: {'lr': 0.0004223284376725354, 'samples': 20568064, 'steps': 40171, 'loss/train': 2.005826711654663} +03/05/2022 12:41:36 - INFO - codeparrot_training - Skipping example with length 896 (seq_length=1024) +03/05/2022 12:41:41 - INFO - codeparrot_training - Step 40172: {'lr': 0.00042232459309409355, 'samples': 20568576, 'steps': 40172, 'loss/train': 1.723232626914978} +03/05/2022 12:41:44 - INFO - codeparrot_training - Step 40173: {'lr': 0.00042232074843800494, 'samples': 20569088, 'steps': 40173, 'loss/train': 2.195824146270752} +03/05/2022 12:41:44 - INFO - codeparrot_training - Skipping example with length 456 (seq_length=1024) +03/05/2022 12:41:49 - INFO - codeparrot_training - Step 40174: {'lr': 0.00042231690370427135, 'samples': 20569600, 'steps': 40174, 'loss/train': 1.312303900718689} +03/05/2022 12:41:52 - INFO - codeparrot_training - Step 40175: {'lr': 0.00042231305889289437, 'samples': 20570112, 'steps': 40175, 'loss/train': 1.787569284439087} +03/05/2022 12:41:52 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/05/2022 12:41:58 - INFO - codeparrot_training - Step 40176: {'lr': 0.00042230921400387576, 'samples': 20570624, 'steps': 40176, 'loss/train': 2.1782748699188232} +03/05/2022 12:42:00 - INFO - codeparrot_training - Skipping example with length 939 (seq_length=1024) +03/05/2022 12:42:03 - INFO - codeparrot_training - Step 40177: {'lr': 0.0004223053690372173, 'samples': 20571136, 'steps': 40177, 'loss/train': 1.4381171464920044} +03/05/2022 12:42:06 - INFO - codeparrot_training - Step 40178: {'lr': 0.00042230152399292065, 'samples': 20571648, 'steps': 40178, 'loss/train': 1.6184061765670776} +03/05/2022 12:42:10 - INFO - codeparrot_training - Step 40179: {'lr': 0.00042229767887098766, 'samples': 20572160, 'steps': 40179, 'loss/train': 2.133863687515259} +03/05/2022 12:42:11 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/05/2022 12:42:15 - INFO - codeparrot_training - Step 40180: {'lr': 0.00042229383367142, 'samples': 20572672, 'steps': 40180, 'loss/train': 1.8218458890914917} +03/05/2022 12:42:18 - INFO - codeparrot_training - Step 40181: {'lr': 0.0004222899883942194, 'samples': 20573184, 'steps': 40181, 'loss/train': 1.4018020629882812} +03/05/2022 12:42:20 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/05/2022 12:42:23 - INFO - codeparrot_training - Step 40182: {'lr': 0.0004222861430393875, 'samples': 20573696, 'steps': 40182, 'loss/train': 2.0200421810150146} +03/05/2022 12:42:27 - INFO - codeparrot_training - Step 40183: {'lr': 0.0004222822976069262, 'samples': 20574208, 'steps': 40183, 'loss/train': 1.0286396741867065} +03/05/2022 12:42:28 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/05/2022 12:42:32 - INFO - codeparrot_training - Step 40184: {'lr': 0.0004222784520968371, 'samples': 20574720, 'steps': 40184, 'loss/train': 1.369282603263855} +03/05/2022 12:42:35 - INFO - codeparrot_training - Step 40185: {'lr': 0.0004222746065091221, 'samples': 20575232, 'steps': 40185, 'loss/train': 1.4660123586654663} +03/05/2022 12:42:36 - INFO - codeparrot_training - Skipping example with length 269 (seq_length=1024) +03/05/2022 12:42:40 - INFO - codeparrot_training - Step 40186: {'lr': 0.0004222707608437827, 'samples': 20575744, 'steps': 40186, 'loss/train': 1.8770731687545776} +03/05/2022 12:42:44 - INFO - codeparrot_training - Step 40187: {'lr': 0.00042226691510082083, 'samples': 20576256, 'steps': 40187, 'loss/train': 2.175034523010254} +03/05/2022 12:42:45 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/05/2022 12:42:49 - INFO - codeparrot_training - Step 40188: {'lr': 0.0004222630692802381, 'samples': 20576768, 'steps': 40188, 'loss/train': 0.8981433510780334} +03/05/2022 12:42:52 - INFO - codeparrot_training - Step 40189: {'lr': 0.00042225922338203625, 'samples': 20577280, 'steps': 40189, 'loss/train': 1.7281867265701294} +03/05/2022 12:42:53 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/05/2022 12:42:57 - INFO - codeparrot_training - Step 40190: {'lr': 0.00042225537740621713, 'samples': 20577792, 'steps': 40190, 'loss/train': 1.393416166305542} +03/05/2022 12:43:01 - INFO - codeparrot_training - Step 40191: {'lr': 0.00042225153135278236, 'samples': 20578304, 'steps': 40191, 'loss/train': 1.6467995643615723} +03/05/2022 12:43:02 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/05/2022 12:43:06 - INFO - codeparrot_training - Step 40192: {'lr': 0.00042224768522173374, 'samples': 20578816, 'steps': 40192, 'loss/train': 2.472520351409912} +03/05/2022 12:43:09 - INFO - codeparrot_training - Step 40193: {'lr': 0.00042224383901307293, 'samples': 20579328, 'steps': 40193, 'loss/train': 0.19825619459152222} +03/05/2022 12:43:11 - INFO - codeparrot_training - Skipping example with length 971 (seq_length=1024) +03/05/2022 12:43:15 - INFO - codeparrot_training - Step 40194: {'lr': 0.0004222399927268018, 'samples': 20579840, 'steps': 40194, 'loss/train': 1.455000400543213} +03/05/2022 12:43:18 - INFO - codeparrot_training - Step 40195: {'lr': 0.0004222361463629218, 'samples': 20580352, 'steps': 40195, 'loss/train': 2.1388959884643555} +03/05/2022 12:43:20 - INFO - codeparrot_training - Skipping example with length 341 (seq_length=1024) +03/05/2022 12:43:23 - INFO - codeparrot_training - Step 40196: {'lr': 0.00042223229992143505, 'samples': 20580864, 'steps': 40196, 'loss/train': 2.0254151821136475} +03/05/2022 12:43:26 - INFO - codeparrot_training - Step 40197: {'lr': 0.00042222845340234293, 'samples': 20581376, 'steps': 40197, 'loss/train': 0.4651538133621216} +03/05/2022 12:43:28 - INFO - codeparrot_training - Skipping example with length 890 (seq_length=1024) +03/05/2022 12:43:31 - INFO - codeparrot_training - Step 40198: {'lr': 0.00042222460680564747, 'samples': 20581888, 'steps': 40198, 'loss/train': 2.1400110721588135} +03/05/2022 12:43:35 - INFO - codeparrot_training - Step 40199: {'lr': 0.0004222207601313501, 'samples': 20582400, 'steps': 40199, 'loss/train': 1.599306344985962} +03/05/2022 12:43:37 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/05/2022 12:43:40 - INFO - codeparrot_training - Step 40200: {'lr': 0.00042221691337945285, 'samples': 20582912, 'steps': 40200, 'loss/train': 1.5169196128845215} +03/05/2022 12:43:43 - INFO - codeparrot_training - Step 40201: {'lr': 0.0004222130665499573, 'samples': 20583424, 'steps': 40201, 'loss/train': 3.047896146774292} +03/05/2022 12:43:45 - INFO - codeparrot_training - Skipping example with length 319 (seq_length=1024) +03/05/2022 12:43:48 - INFO - codeparrot_training - Step 40202: {'lr': 0.0004222092196428651, 'samples': 20583936, 'steps': 40202, 'loss/train': 2.149799108505249} +03/05/2022 12:43:52 - INFO - codeparrot_training - Step 40203: {'lr': 0.0004222053726581782, 'samples': 20584448, 'steps': 40203, 'loss/train': 1.1042876243591309} +03/05/2022 12:43:54 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/05/2022 12:43:57 - INFO - codeparrot_training - Step 40204: {'lr': 0.0004222015255958981, 'samples': 20584960, 'steps': 40204, 'loss/train': 1.1554042100906372} +03/05/2022 12:44:00 - INFO - codeparrot_training - Step 40205: {'lr': 0.0004221976784560267, 'samples': 20585472, 'steps': 40205, 'loss/train': 1.6719025373458862} +03/05/2022 12:44:02 - INFO - codeparrot_training - Skipping example with length 939 (seq_length=1024) +03/05/2022 12:44:05 - INFO - codeparrot_training - Step 40206: {'lr': 0.0004221938312385657, 'samples': 20585984, 'steps': 40206, 'loss/train': 0.94319748878479} +03/05/2022 12:44:09 - INFO - codeparrot_training - Step 40207: {'lr': 0.00042218998394351684, 'samples': 20586496, 'steps': 40207, 'loss/train': 1.8789873123168945} +03/05/2022 12:44:10 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/05/2022 12:44:14 - INFO - codeparrot_training - Step 40208: {'lr': 0.0004221861365708818, 'samples': 20587008, 'steps': 40208, 'loss/train': 1.6031830310821533} +03/05/2022 12:44:17 - INFO - codeparrot_training - Step 40209: {'lr': 0.0004221822891206623, 'samples': 20587520, 'steps': 40209, 'loss/train': 2.490661859512329} +03/05/2022 12:44:20 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/05/2022 12:44:22 - INFO - codeparrot_training - Step 40210: {'lr': 0.00042217844159286015, 'samples': 20588032, 'steps': 40210, 'loss/train': 2.1415233612060547} +03/05/2022 12:44:26 - INFO - codeparrot_training - Step 40211: {'lr': 0.00042217459398747703, 'samples': 20588544, 'steps': 40211, 'loss/train': 1.0633940696716309} +03/05/2022 12:44:28 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/05/2022 12:44:31 - INFO - codeparrot_training - Step 40212: {'lr': 0.0004221707463045148, 'samples': 20589056, 'steps': 40212, 'loss/train': 2.316683053970337} +03/05/2022 12:44:34 - INFO - codeparrot_training - Step 40213: {'lr': 0.0004221668985439749, 'samples': 20589568, 'steps': 40213, 'loss/train': 1.1005949974060059} +03/05/2022 12:44:37 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/05/2022 12:44:39 - INFO - codeparrot_training - Step 40214: {'lr': 0.00042216305070585946, 'samples': 20590080, 'steps': 40214, 'loss/train': 1.2847903966903687} +03/05/2022 12:44:42 - INFO - codeparrot_training - Step 40215: {'lr': 0.00042215920279016993, 'samples': 20590592, 'steps': 40215, 'loss/train': 2.0846872329711914} +03/05/2022 12:44:45 - INFO - codeparrot_training - Skipping example with length 926 (seq_length=1024) +03/05/2022 12:44:48 - INFO - codeparrot_training - Step 40216: {'lr': 0.00042215535479690807, 'samples': 20591104, 'steps': 40216, 'loss/train': 0.8156063556671143} +03/05/2022 12:44:51 - INFO - codeparrot_training - Step 40217: {'lr': 0.0004221515067260757, 'samples': 20591616, 'steps': 40217, 'loss/train': 0.7461385130882263} +03/05/2022 12:44:56 - INFO - codeparrot_training - Step 40218: {'lr': 0.0004221476585776745, 'samples': 20592128, 'steps': 40218, 'loss/train': 1.6894958019256592} +03/05/2022 12:44:59 - INFO - codeparrot_training - Step 40219: {'lr': 0.00042214381035170624, 'samples': 20592640, 'steps': 40219, 'loss/train': 2.781083106994629} +03/05/2022 12:45:02 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/05/2022 12:45:05 - INFO - codeparrot_training - Step 40220: {'lr': 0.0004221399620481726, 'samples': 20593152, 'steps': 40220, 'loss/train': 1.8164409399032593} +03/05/2022 12:45:08 - INFO - codeparrot_training - Step 40221: {'lr': 0.00042213611366707547, 'samples': 20593664, 'steps': 40221, 'loss/train': 1.8897675275802612} +03/05/2022 12:45:11 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/05/2022 12:45:13 - INFO - codeparrot_training - Step 40222: {'lr': 0.0004221322652084163, 'samples': 20594176, 'steps': 40222, 'loss/train': 1.7072744369506836} +03/05/2022 12:45:16 - INFO - codeparrot_training - Step 40223: {'lr': 0.0004221284166721971, 'samples': 20594688, 'steps': 40223, 'loss/train': 1.7959158420562744} +03/05/2022 12:45:19 - INFO - codeparrot_training - Skipping example with length 346 (seq_length=1024) +03/05/2022 12:45:21 - INFO - codeparrot_training - Step 40224: {'lr': 0.00042212456805841944, 'samples': 20595200, 'steps': 40224, 'loss/train': 1.5631266832351685} +03/05/2022 12:45:25 - INFO - codeparrot_training - Step 40225: {'lr': 0.00042212071936708506, 'samples': 20595712, 'steps': 40225, 'loss/train': 1.8987339735031128} +03/05/2022 12:45:28 - INFO - codeparrot_training - Step 40226: {'lr': 0.0004221168705981958, 'samples': 20596224, 'steps': 40226, 'loss/train': 2.4004874229431152} +03/05/2022 12:45:28 - INFO - codeparrot_training - Skipping example with length 177 (seq_length=1024) +03/05/2022 12:45:33 - INFO - codeparrot_training - Step 40227: {'lr': 0.00042211302175175334, 'samples': 20596736, 'steps': 40227, 'loss/train': 1.5915488004684448} +03/05/2022 12:45:36 - INFO - codeparrot_training - Step 40228: {'lr': 0.0004221091728277595, 'samples': 20597248, 'steps': 40228, 'loss/train': 1.2016637325286865} +03/05/2022 12:45:37 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/05/2022 12:45:42 - INFO - codeparrot_training - Step 40229: {'lr': 0.0004221053238262158, 'samples': 20597760, 'steps': 40229, 'loss/train': 2.1468186378479004} +03/05/2022 12:45:45 - INFO - codeparrot_training - Step 40230: {'lr': 0.0004221014747471241, 'samples': 20598272, 'steps': 40230, 'loss/train': 1.2492948770523071} +03/05/2022 12:45:45 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/05/2022 12:45:50 - INFO - codeparrot_training - Step 40231: {'lr': 0.0004220976255904861, 'samples': 20598784, 'steps': 40231, 'loss/train': 1.9653582572937012} +03/05/2022 12:45:53 - INFO - codeparrot_training - Step 40232: {'lr': 0.00042209377635630364, 'samples': 20599296, 'steps': 40232, 'loss/train': 2.0514402389526367} +03/05/2022 12:45:54 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/05/2022 12:45:59 - INFO - codeparrot_training - Step 40233: {'lr': 0.00042208992704457837, 'samples': 20599808, 'steps': 40233, 'loss/train': 1.984248161315918} +03/05/2022 12:46:02 - INFO - codeparrot_training - Step 40234: {'lr': 0.00042208607765531204, 'samples': 20600320, 'steps': 40234, 'loss/train': 0.4056980609893799} +03/05/2022 12:46:02 - INFO - codeparrot_training - Skipping example with length 909 (seq_length=1024) +03/05/2022 12:46:07 - INFO - codeparrot_training - Step 40235: {'lr': 0.00042208222818850634, 'samples': 20600832, 'steps': 40235, 'loss/train': 1.567301630973816} +03/05/2022 12:46:10 - INFO - codeparrot_training - Step 40236: {'lr': 0.0004220783786441631, 'samples': 20601344, 'steps': 40236, 'loss/train': 1.6031955480575562} +03/05/2022 12:46:10 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/05/2022 12:46:16 - INFO - codeparrot_training - Step 40237: {'lr': 0.0004220745290222839, 'samples': 20601856, 'steps': 40237, 'loss/train': 1.9657427072525024} +03/05/2022 12:46:19 - INFO - codeparrot_training - Step 40238: {'lr': 0.00042207067932287066, 'samples': 20602368, 'steps': 40238, 'loss/train': 0.7697015404701233} +03/05/2022 12:46:19 - INFO - codeparrot_training - Skipping example with length 386 (seq_length=1024) +03/05/2022 12:46:24 - INFO - codeparrot_training - Step 40239: {'lr': 0.00042206682954592503, 'samples': 20602880, 'steps': 40239, 'loss/train': 1.86000394821167} +03/05/2022 12:46:29 - INFO - codeparrot_training - Step 40240: {'lr': 0.0004220629796914487, 'samples': 20603392, 'steps': 40240, 'loss/train': 1.21883225440979} +03/05/2022 12:46:32 - INFO - codeparrot_training - Step 40241: {'lr': 0.00042205912975944344, 'samples': 20603904, 'steps': 40241, 'loss/train': 2.1045234203338623} +03/05/2022 12:46:35 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/05/2022 12:46:38 - INFO - codeparrot_training - Step 40242: {'lr': 0.00042205527974991096, 'samples': 20604416, 'steps': 40242, 'loss/train': 1.9751604795455933} +03/05/2022 12:46:41 - INFO - codeparrot_training - Step 40243: {'lr': 0.00042205142966285315, 'samples': 20604928, 'steps': 40243, 'loss/train': 1.3504664897918701} +03/05/2022 12:46:44 - INFO - codeparrot_training - Step 40244: {'lr': 0.0004220475794982716, 'samples': 20605440, 'steps': 40244, 'loss/train': 1.7863941192626953} +03/05/2022 12:46:44 - INFO - codeparrot_training - Skipping example with length 867 (seq_length=1024) +03/05/2022 12:46:50 - INFO - codeparrot_training - Step 40245: {'lr': 0.00042204372925616797, 'samples': 20605952, 'steps': 40245, 'loss/train': 1.7033005952835083} +03/05/2022 12:46:53 - INFO - codeparrot_training - Step 40246: {'lr': 0.0004220398789365441, 'samples': 20606464, 'steps': 40246, 'loss/train': 2.4130101203918457} +03/05/2022 12:46:53 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/05/2022 12:46:58 - INFO - codeparrot_training - Step 40247: {'lr': 0.0004220360285394017, 'samples': 20606976, 'steps': 40247, 'loss/train': 0.6802616715431213} +03/05/2022 12:47:01 - INFO - codeparrot_training - Step 40248: {'lr': 0.0004220321780647426, 'samples': 20607488, 'steps': 40248, 'loss/train': 1.4052786827087402} +03/05/2022 12:47:01 - INFO - codeparrot_training - Skipping example with length 373 (seq_length=1024) +03/05/2022 12:47:07 - INFO - codeparrot_training - Step 40249: {'lr': 0.00042202832751256846, 'samples': 20608000, 'steps': 40249, 'loss/train': 2.135979175567627} +03/05/2022 12:47:10 - INFO - codeparrot_training - Step 40250: {'lr': 0.0004220244768828809, 'samples': 20608512, 'steps': 40250, 'loss/train': 1.4819645881652832} +03/05/2022 12:47:10 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/05/2022 12:47:15 - INFO - codeparrot_training - Step 40251: {'lr': 0.0004220206261756819, 'samples': 20609024, 'steps': 40251, 'loss/train': 1.1301878690719604} +03/05/2022 12:47:18 - INFO - codeparrot_training - Step 40252: {'lr': 0.00042201677539097294, 'samples': 20609536, 'steps': 40252, 'loss/train': 1.7582886219024658} +03/05/2022 12:47:18 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/05/2022 12:47:24 - INFO - codeparrot_training - Step 40253: {'lr': 0.00042201292452875595, 'samples': 20610048, 'steps': 40253, 'loss/train': 1.7708183526992798} +03/05/2022 12:47:27 - INFO - codeparrot_training - Step 40254: {'lr': 0.00042200907358903264, 'samples': 20610560, 'steps': 40254, 'loss/train': 1.255233883857727} +03/05/2022 12:47:27 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/05/2022 12:47:32 - INFO - codeparrot_training - Step 40255: {'lr': 0.0004220052225718046, 'samples': 20611072, 'steps': 40255, 'loss/train': 1.8493973016738892} +03/05/2022 12:47:35 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/05/2022 12:47:37 - INFO - codeparrot_training - Step 40256: {'lr': 0.0004220013714770737, 'samples': 20611584, 'steps': 40256, 'loss/train': 1.7493174076080322} +03/05/2022 12:47:41 - INFO - codeparrot_training - Step 40257: {'lr': 0.0004219975203048416, 'samples': 20612096, 'steps': 40257, 'loss/train': 1.768437385559082} +03/05/2022 12:47:43 - INFO - codeparrot_training - Skipping example with length 385 (seq_length=1024) +03/05/2022 12:47:46 - INFO - codeparrot_training - Step 40258: {'lr': 0.0004219936690551101, 'samples': 20612608, 'steps': 40258, 'loss/train': 1.8031048774719238} +03/05/2022 12:47:49 - INFO - codeparrot_training - Step 40259: {'lr': 0.0004219898177278809, 'samples': 20613120, 'steps': 40259, 'loss/train': 1.0127263069152832} +03/05/2022 12:47:52 - INFO - codeparrot_training - Step 40260: {'lr': 0.00042198596632315576, 'samples': 20613632, 'steps': 40260, 'loss/train': 2.4111196994781494} +03/05/2022 12:47:52 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/05/2022 12:47:58 - INFO - codeparrot_training - Step 40261: {'lr': 0.0004219821148409364, 'samples': 20614144, 'steps': 40261, 'loss/train': 1.9793587923049927} +03/05/2022 12:48:01 - INFO - codeparrot_training - Step 40262: {'lr': 0.00042197826328122456, 'samples': 20614656, 'steps': 40262, 'loss/train': 1.9395338296890259} +03/05/2022 12:48:02 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/05/2022 12:48:07 - INFO - codeparrot_training - Step 40263: {'lr': 0.00042197441164402197, 'samples': 20615168, 'steps': 40263, 'loss/train': 1.5725336074829102} +03/05/2022 12:48:10 - INFO - codeparrot_training - Step 40264: {'lr': 0.0004219705599293303, 'samples': 20615680, 'steps': 40264, 'loss/train': 1.6913716793060303} +03/05/2022 12:48:11 - INFO - codeparrot_training - Skipping example with length 845 (seq_length=1024) +03/05/2022 12:48:15 - INFO - codeparrot_training - Step 40265: {'lr': 0.00042196670813715137, 'samples': 20616192, 'steps': 40265, 'loss/train': 0.9745018482208252} +03/05/2022 12:48:18 - INFO - codeparrot_training - Step 40266: {'lr': 0.0004219628562674869, 'samples': 20616704, 'steps': 40266, 'loss/train': 0.7311116456985474} +03/05/2022 12:48:19 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/05/2022 12:48:23 - INFO - codeparrot_training - Step 40267: {'lr': 0.00042195900432033865, 'samples': 20617216, 'steps': 40267, 'loss/train': 1.4652647972106934} +03/05/2022 12:48:27 - INFO - codeparrot_training - Step 40268: {'lr': 0.00042195515229570833, 'samples': 20617728, 'steps': 40268, 'loss/train': 1.67008376121521} +03/05/2022 12:48:27 - INFO - codeparrot_training - Skipping example with length 50 (seq_length=1024) +03/05/2022 12:48:32 - INFO - codeparrot_training - Step 40269: {'lr': 0.0004219513001935976, 'samples': 20618240, 'steps': 40269, 'loss/train': 1.4139922857284546} +03/05/2022 12:48:35 - INFO - codeparrot_training - Step 40270: {'lr': 0.00042194744801400837, 'samples': 20618752, 'steps': 40270, 'loss/train': 2.2401371002197266} +03/05/2022 12:48:37 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/05/2022 12:48:41 - INFO - codeparrot_training - Step 40271: {'lr': 0.0004219435957569422, 'samples': 20619264, 'steps': 40271, 'loss/train': 1.7601525783538818} +03/05/2022 12:48:44 - INFO - codeparrot_training - Step 40272: {'lr': 0.0004219397434224009, 'samples': 20619776, 'steps': 40272, 'loss/train': 1.7022475004196167} +03/05/2022 12:48:46 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/05/2022 12:48:49 - INFO - codeparrot_training - Step 40273: {'lr': 0.0004219358910103862, 'samples': 20620288, 'steps': 40273, 'loss/train': 1.2593765258789062} +03/05/2022 12:48:52 - INFO - codeparrot_training - Step 40274: {'lr': 0.00042193203852089993, 'samples': 20620800, 'steps': 40274, 'loss/train': 1.4984902143478394} +03/05/2022 12:48:54 - INFO - codeparrot_training - Skipping example with length 391 (seq_length=1024) +03/05/2022 12:48:57 - INFO - codeparrot_training - Step 40275: {'lr': 0.00042192818595394367, 'samples': 20621312, 'steps': 40275, 'loss/train': 1.2564425468444824} +03/05/2022 12:49:01 - INFO - codeparrot_training - Step 40276: {'lr': 0.00042192433330951926, 'samples': 20621824, 'steps': 40276, 'loss/train': 1.3420233726501465} +03/05/2022 12:49:02 - INFO - codeparrot_training - Skipping example with length 86 (seq_length=1024) +03/05/2022 12:49:06 - INFO - codeparrot_training - Step 40277: {'lr': 0.00042192048058762834, 'samples': 20622336, 'steps': 40277, 'loss/train': 1.660277009010315} +03/05/2022 12:49:09 - INFO - codeparrot_training - Step 40278: {'lr': 0.00042191662778827275, 'samples': 20622848, 'steps': 40278, 'loss/train': 1.7961219549179077} +03/05/2022 12:49:11 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/05/2022 12:49:14 - INFO - codeparrot_training - Step 40279: {'lr': 0.0004219127749114541, 'samples': 20623360, 'steps': 40279, 'loss/train': 0.9085057377815247} +03/05/2022 12:49:17 - INFO - codeparrot_training - Step 40280: {'lr': 0.00042190892195717426, 'samples': 20623872, 'steps': 40280, 'loss/train': 1.5079188346862793} +03/05/2022 12:49:19 - INFO - codeparrot_training - Skipping example with length 361 (seq_length=1024) +03/05/2022 12:49:23 - INFO - codeparrot_training - Step 40281: {'lr': 0.000421905068925435, 'samples': 20624384, 'steps': 40281, 'loss/train': 1.2070930004119873} +03/05/2022 12:49:26 - INFO - codeparrot_training - Step 40282: {'lr': 0.00042190121581623784, 'samples': 20624896, 'steps': 40282, 'loss/train': 1.6784605979919434} +03/05/2022 12:49:27 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/05/2022 12:49:31 - INFO - codeparrot_training - Step 40283: {'lr': 0.0004218973626295847, 'samples': 20625408, 'steps': 40283, 'loss/train': 1.5749303102493286} +03/05/2022 12:49:34 - INFO - codeparrot_training - Step 40284: {'lr': 0.0004218935093654772, 'samples': 20625920, 'steps': 40284, 'loss/train': 1.6060492992401123} +03/05/2022 12:49:36 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/05/2022 12:49:40 - INFO - codeparrot_training - Step 40285: {'lr': 0.00042188965602391726, 'samples': 20626432, 'steps': 40285, 'loss/train': 2.050286054611206} +03/05/2022 12:49:43 - INFO - codeparrot_training - Step 40286: {'lr': 0.0004218858026049064, 'samples': 20626944, 'steps': 40286, 'loss/train': 0.8168420791625977} +03/05/2022 12:49:44 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/05/2022 12:49:48 - INFO - codeparrot_training - Step 40287: {'lr': 0.00042188194910844644, 'samples': 20627456, 'steps': 40287, 'loss/train': 1.77615225315094} +03/05/2022 12:49:51 - INFO - codeparrot_training - Step 40288: {'lr': 0.0004218780955345392, 'samples': 20627968, 'steps': 40288, 'loss/train': 1.5234456062316895} +03/05/2022 12:49:52 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/05/2022 12:49:57 - INFO - codeparrot_training - Step 40289: {'lr': 0.0004218742418831863, 'samples': 20628480, 'steps': 40289, 'loss/train': 1.285285234451294} +03/05/2022 12:50:00 - INFO - codeparrot_training - Step 40290: {'lr': 0.0004218703881543895, 'samples': 20628992, 'steps': 40290, 'loss/train': 1.875503659248352} +03/05/2022 12:50:01 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/05/2022 12:50:05 - INFO - codeparrot_training - Step 40291: {'lr': 0.0004218665343481506, 'samples': 20629504, 'steps': 40291, 'loss/train': 1.5325589179992676} +03/05/2022 12:50:08 - INFO - codeparrot_training - Step 40292: {'lr': 0.00042186268046447124, 'samples': 20630016, 'steps': 40292, 'loss/train': 1.9097596406936646} +03/05/2022 12:50:09 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/05/2022 12:50:13 - INFO - codeparrot_training - Step 40293: {'lr': 0.0004218588265033533, 'samples': 20630528, 'steps': 40293, 'loss/train': 2.6795494556427} +03/05/2022 12:50:17 - INFO - codeparrot_training - Step 40294: {'lr': 0.0004218549724647983, 'samples': 20631040, 'steps': 40294, 'loss/train': 1.7871052026748657} +03/05/2022 12:50:17 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/05/2022 12:50:22 - INFO - codeparrot_training - Step 40295: {'lr': 0.0004218511183488082, 'samples': 20631552, 'steps': 40295, 'loss/train': 2.957859516143799} +03/05/2022 12:50:25 - INFO - codeparrot_training - Step 40296: {'lr': 0.00042184726415538457, 'samples': 20632064, 'steps': 40296, 'loss/train': 1.789513111114502} +03/05/2022 12:50:25 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/05/2022 12:50:30 - INFO - codeparrot_training - Step 40297: {'lr': 0.00042184340988452924, 'samples': 20632576, 'steps': 40297, 'loss/train': 1.6110070943832397} +03/05/2022 12:50:33 - INFO - codeparrot_training - Step 40298: {'lr': 0.00042183955553624393, 'samples': 20633088, 'steps': 40298, 'loss/train': 0.9351043701171875} +03/05/2022 12:50:33 - INFO - codeparrot_training - Skipping example with length 62 (seq_length=1024) +03/05/2022 12:50:39 - INFO - codeparrot_training - Step 40299: {'lr': 0.0004218357011105304, 'samples': 20633600, 'steps': 40299, 'loss/train': 1.9298216104507446} +03/05/2022 12:50:42 - INFO - codeparrot_training - Step 40300: {'lr': 0.00042183184660739027, 'samples': 20634112, 'steps': 40300, 'loss/train': 1.3387579917907715} +03/05/2022 12:50:42 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/05/2022 12:50:47 - INFO - codeparrot_training - Step 40301: {'lr': 0.00042182799202682543, 'samples': 20634624, 'steps': 40301, 'loss/train': 1.7560116052627563} +03/05/2022 12:50:50 - INFO - codeparrot_training - Step 40302: {'lr': 0.0004218241373688375, 'samples': 20635136, 'steps': 40302, 'loss/train': 0.708766520023346} +03/05/2022 12:50:50 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/05/2022 12:50:55 - INFO - codeparrot_training - Step 40303: {'lr': 0.0004218202826334283, 'samples': 20635648, 'steps': 40303, 'loss/train': 0.47029706835746765} +03/05/2022 12:50:58 - INFO - codeparrot_training - Skipping example with length 235 (seq_length=1024) +03/05/2022 12:51:01 - INFO - codeparrot_training - Step 40304: {'lr': 0.0004218164278205995, 'samples': 20636160, 'steps': 40304, 'loss/train': 2.161125421524048} +03/05/2022 12:51:04 - INFO - codeparrot_training - Step 40305: {'lr': 0.00042181257293035293, 'samples': 20636672, 'steps': 40305, 'loss/train': 1.8054277896881104} +03/05/2022 12:51:07 - INFO - codeparrot_training - Skipping example with length 897 (seq_length=1024) +03/05/2022 12:51:09 - INFO - codeparrot_training - Step 40306: {'lr': 0.00042180871796269025, 'samples': 20637184, 'steps': 40306, 'loss/train': 0.5323250889778137} +03/05/2022 12:51:12 - INFO - codeparrot_training - Step 40307: {'lr': 0.00042180486291761314, 'samples': 20637696, 'steps': 40307, 'loss/train': 1.5488289594650269} +03/05/2022 12:51:15 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/05/2022 12:51:18 - INFO - codeparrot_training - Step 40308: {'lr': 0.0004218010077951235, 'samples': 20638208, 'steps': 40308, 'loss/train': 1.853540062904358} +03/05/2022 12:51:21 - INFO - codeparrot_training - Step 40309: {'lr': 0.00042179715259522293, 'samples': 20638720, 'steps': 40309, 'loss/train': 2.5182223320007324} +03/05/2022 12:51:24 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/05/2022 12:51:26 - INFO - codeparrot_training - Step 40310: {'lr': 0.00042179329731791324, 'samples': 20639232, 'steps': 40310, 'loss/train': 2.00529146194458} +03/05/2022 12:51:29 - INFO - codeparrot_training - Step 40311: {'lr': 0.0004217894419631961, 'samples': 20639744, 'steps': 40311, 'loss/train': 1.5842262506484985} +03/05/2022 12:51:33 - INFO - codeparrot_training - Step 40312: {'lr': 0.00042178558653107337, 'samples': 20640256, 'steps': 40312, 'loss/train': 1.5957057476043701} +03/05/2022 12:51:33 - INFO - codeparrot_training - Skipping example with length 619 (seq_length=1024) +03/05/2022 12:51:38 - INFO - codeparrot_training - Step 40313: {'lr': 0.0004217817310215466, 'samples': 20640768, 'steps': 40313, 'loss/train': 2.4080498218536377} +03/05/2022 12:51:41 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/05/2022 12:51:43 - INFO - codeparrot_training - Step 40314: {'lr': 0.00042177787543461767, 'samples': 20641280, 'steps': 40314, 'loss/train': 1.6943702697753906} +03/05/2022 12:51:46 - INFO - codeparrot_training - Step 40315: {'lr': 0.0004217740197702883, 'samples': 20641792, 'steps': 40315, 'loss/train': 2.04622220993042} +03/05/2022 12:51:49 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/05/2022 12:51:51 - INFO - codeparrot_training - Step 40316: {'lr': 0.00042177016402856023, 'samples': 20642304, 'steps': 40316, 'loss/train': 1.220918893814087} +03/05/2022 12:51:55 - INFO - codeparrot_training - Step 40317: {'lr': 0.00042176630820943515, 'samples': 20642816, 'steps': 40317, 'loss/train': 2.8290717601776123} +03/05/2022 12:51:57 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/05/2022 12:52:00 - INFO - codeparrot_training - Step 40318: {'lr': 0.0004217624523129148, 'samples': 20643328, 'steps': 40318, 'loss/train': 2.869656562805176} +03/05/2022 12:52:03 - INFO - codeparrot_training - Step 40319: {'lr': 0.0004217585963390009, 'samples': 20643840, 'steps': 40319, 'loss/train': 1.237679362297058} +03/05/2022 12:52:06 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/05/2022 12:52:08 - INFO - codeparrot_training - Step 40320: {'lr': 0.00042175474028769534, 'samples': 20644352, 'steps': 40320, 'loss/train': 1.5363487005233765} +03/05/2022 12:52:11 - INFO - codeparrot_training - Step 40321: {'lr': 0.00042175088415899963, 'samples': 20644864, 'steps': 40321, 'loss/train': 2.3435394763946533} +03/05/2022 12:52:14 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/05/2022 12:52:17 - INFO - codeparrot_training - Step 40322: {'lr': 0.00042174702795291574, 'samples': 20645376, 'steps': 40322, 'loss/train': 1.8248348236083984} +03/05/2022 12:52:20 - INFO - codeparrot_training - Step 40323: {'lr': 0.0004217431716694452, 'samples': 20645888, 'steps': 40323, 'loss/train': 1.5584944486618042} +03/05/2022 12:52:23 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/05/2022 12:52:25 - INFO - codeparrot_training - Step 40324: {'lr': 0.00042173931530858986, 'samples': 20646400, 'steps': 40324, 'loss/train': 1.779521107673645} +03/05/2022 12:52:28 - INFO - codeparrot_training - Step 40325: {'lr': 0.00042173545887035145, 'samples': 20646912, 'steps': 40325, 'loss/train': 1.5679527521133423} +03/05/2022 12:52:31 - INFO - codeparrot_training - Skipping example with length 89 (seq_length=1024) +03/05/2022 12:52:34 - INFO - codeparrot_training - Step 40326: {'lr': 0.0004217316023547317, 'samples': 20647424, 'steps': 40326, 'loss/train': 2.856717109680176} +03/05/2022 12:52:37 - INFO - codeparrot_training - Step 40327: {'lr': 0.00042172774576173226, 'samples': 20647936, 'steps': 40327, 'loss/train': 1.3751329183578491} +03/05/2022 12:52:40 - INFO - codeparrot_training - Step 40328: {'lr': 0.00042172388909135505, 'samples': 20648448, 'steps': 40328, 'loss/train': 1.240923285484314} +03/05/2022 12:52:40 - INFO - codeparrot_training - Skipping example with length 415 (seq_length=1024) +03/05/2022 12:52:45 - INFO - codeparrot_training - Step 40329: {'lr': 0.0004217200323436017, 'samples': 20648960, 'steps': 40329, 'loss/train': 1.105275273323059} +03/05/2022 12:52:49 - INFO - codeparrot_training - Step 40330: {'lr': 0.00042171617551847387, 'samples': 20649472, 'steps': 40330, 'loss/train': 1.6173230409622192} +03/05/2022 12:52:49 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/05/2022 12:52:54 - INFO - codeparrot_training - Step 40331: {'lr': 0.0004217123186159735, 'samples': 20649984, 'steps': 40331, 'loss/train': 3.351693630218506} +03/05/2022 12:52:58 - INFO - codeparrot_training - Step 40332: {'lr': 0.0004217084616361021, 'samples': 20650496, 'steps': 40332, 'loss/train': 1.0710575580596924} +03/05/2022 12:53:00 - INFO - codeparrot_training - Skipping example with length 245 (seq_length=1024) +03/05/2022 12:53:03 - INFO - codeparrot_training - Step 40333: {'lr': 0.0004217046045788615, 'samples': 20651008, 'steps': 40333, 'loss/train': 0.9402898550033569} +03/05/2022 12:53:06 - INFO - codeparrot_training - Step 40334: {'lr': 0.0004217007474442535, 'samples': 20651520, 'steps': 40334, 'loss/train': 1.787232756614685} +03/05/2022 12:53:08 - INFO - codeparrot_training - Skipping example with length 273 (seq_length=1024) +03/05/2022 12:53:11 - INFO - codeparrot_training - Step 40335: {'lr': 0.00042169689023227987, 'samples': 20652032, 'steps': 40335, 'loss/train': 2.142812728881836} +03/05/2022 12:53:14 - INFO - codeparrot_training - Step 40336: {'lr': 0.00042169303294294216, 'samples': 20652544, 'steps': 40336, 'loss/train': 0.7325757741928101} +03/05/2022 12:53:16 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/05/2022 12:53:20 - INFO - codeparrot_training - Step 40337: {'lr': 0.0004216891755762423, 'samples': 20653056, 'steps': 40337, 'loss/train': 2.296633243560791} +03/05/2022 12:53:23 - INFO - codeparrot_training - Step 40338: {'lr': 0.00042168531813218193, 'samples': 20653568, 'steps': 40338, 'loss/train': 1.2176541090011597} +03/05/2022 12:53:25 - INFO - codeparrot_training - Skipping example with length 803 (seq_length=1024) +03/05/2022 12:53:28 - INFO - codeparrot_training - Step 40339: {'lr': 0.0004216814606107627, 'samples': 20654080, 'steps': 40339, 'loss/train': 2.2222280502319336} +03/05/2022 12:53:31 - INFO - codeparrot_training - Step 40340: {'lr': 0.00042167760301198656, 'samples': 20654592, 'steps': 40340, 'loss/train': 1.4149153232574463} +03/05/2022 12:53:33 - INFO - codeparrot_training - Skipping example with length 675 (seq_length=1024) +03/05/2022 12:53:37 - INFO - codeparrot_training - Step 40341: {'lr': 0.0004216737453358551, 'samples': 20655104, 'steps': 40341, 'loss/train': 1.9043769836425781} +03/05/2022 12:53:40 - INFO - codeparrot_training - Step 40342: {'lr': 0.00042166988758237013, 'samples': 20655616, 'steps': 40342, 'loss/train': 2.356032371520996} +03/05/2022 12:53:42 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/05/2022 12:53:45 - INFO - codeparrot_training - Step 40343: {'lr': 0.00042166602975153333, 'samples': 20656128, 'steps': 40343, 'loss/train': 2.0854787826538086} +03/05/2022 12:53:48 - INFO - codeparrot_training - Step 40344: {'lr': 0.0004216621718433465, 'samples': 20656640, 'steps': 40344, 'loss/train': 1.160915732383728} +03/05/2022 12:53:51 - INFO - codeparrot_training - Skipping example with length 547 (seq_length=1024) +03/05/2022 12:53:54 - INFO - codeparrot_training - Step 40345: {'lr': 0.0004216583138578113, 'samples': 20657152, 'steps': 40345, 'loss/train': 1.7056429386138916} +03/05/2022 12:53:57 - INFO - codeparrot_training - Step 40346: {'lr': 0.00042165445579492956, 'samples': 20657664, 'steps': 40346, 'loss/train': 1.211523175239563} +03/05/2022 12:53:59 - INFO - codeparrot_training - Skipping example with length 335 (seq_length=1024) +03/05/2022 12:54:02 - INFO - codeparrot_training - Step 40347: {'lr': 0.00042165059765470294, 'samples': 20658176, 'steps': 40347, 'loss/train': 1.269286870956421} +03/05/2022 12:54:05 - INFO - codeparrot_training - Step 40348: {'lr': 0.0004216467394371333, 'samples': 20658688, 'steps': 40348, 'loss/train': 2.2034244537353516} +03/05/2022 12:54:08 - INFO - codeparrot_training - Skipping example with length 313 (seq_length=1024) +03/05/2022 12:54:11 - INFO - codeparrot_training - Step 40349: {'lr': 0.00042164288114222213, 'samples': 20659200, 'steps': 40349, 'loss/train': 0.9977911710739136} +03/05/2022 12:54:14 - INFO - codeparrot_training - Step 40350: {'lr': 0.0004216390227699714, 'samples': 20659712, 'steps': 40350, 'loss/train': 1.3264919519424438} +03/05/2022 12:54:17 - INFO - codeparrot_training - Step 40351: {'lr': 0.0004216351643203828, 'samples': 20660224, 'steps': 40351, 'loss/train': 1.4736515283584595} +03/05/2022 12:54:18 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/05/2022 12:54:23 - INFO - codeparrot_training - Step 40352: {'lr': 0.000421631305793458, 'samples': 20660736, 'steps': 40352, 'loss/train': 2.7394800186157227} +03/05/2022 12:54:26 - INFO - codeparrot_training - Step 40353: {'lr': 0.00042162744718919875, 'samples': 20661248, 'steps': 40353, 'loss/train': 1.6738702058792114} +03/05/2022 12:54:27 - INFO - codeparrot_training - Skipping example with length 81 (seq_length=1024) +03/05/2022 12:54:31 - INFO - codeparrot_training - Step 40354: {'lr': 0.0004216235885076069, 'samples': 20661760, 'steps': 40354, 'loss/train': 0.9687190055847168} +03/05/2022 12:54:34 - INFO - codeparrot_training - Step 40355: {'lr': 0.00042161972974868415, 'samples': 20662272, 'steps': 40355, 'loss/train': 2.260641574859619} +03/05/2022 12:54:35 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/05/2022 12:54:40 - INFO - codeparrot_training - Step 40356: {'lr': 0.00042161587091243215, 'samples': 20662784, 'steps': 40356, 'loss/train': 0.9091039896011353} +03/05/2022 12:54:43 - INFO - codeparrot_training - Step 40357: {'lr': 0.00042161201199885257, 'samples': 20663296, 'steps': 40357, 'loss/train': 1.64747154712677} +03/05/2022 12:54:44 - INFO - codeparrot_training - Skipping example with length 418 (seq_length=1024) +03/05/2022 12:54:48 - INFO - codeparrot_training - Step 40358: {'lr': 0.0004216081530079474, 'samples': 20663808, 'steps': 40358, 'loss/train': 1.711816430091858} +03/05/2022 12:54:51 - INFO - codeparrot_training - Step 40359: {'lr': 0.0004216042939397182, 'samples': 20664320, 'steps': 40359, 'loss/train': 1.868990421295166} +03/05/2022 12:54:52 - INFO - codeparrot_training - Skipping example with length 231 (seq_length=1024) +03/05/2022 12:54:57 - INFO - codeparrot_training - Step 40360: {'lr': 0.00042160043479416676, 'samples': 20664832, 'steps': 40360, 'loss/train': 1.6425817012786865} +03/05/2022 12:55:00 - INFO - codeparrot_training - Step 40361: {'lr': 0.00042159657557129483, 'samples': 20665344, 'steps': 40361, 'loss/train': 1.5550633668899536} +03/05/2022 12:55:00 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/05/2022 12:55:05 - INFO - codeparrot_training - Step 40362: {'lr': 0.0004215927162711041, 'samples': 20665856, 'steps': 40362, 'loss/train': 1.6908413171768188} +03/05/2022 12:55:08 - INFO - codeparrot_training - Step 40363: {'lr': 0.00042158885689359637, 'samples': 20666368, 'steps': 40363, 'loss/train': 3.022095203399658} +03/05/2022 12:55:08 - INFO - codeparrot_training - Skipping example with length 903 (seq_length=1024) +03/05/2022 12:55:13 - INFO - codeparrot_training - Step 40364: {'lr': 0.0004215849974387733, 'samples': 20666880, 'steps': 40364, 'loss/train': 1.7659708261489868} +03/05/2022 12:55:17 - INFO - codeparrot_training - Step 40365: {'lr': 0.0004215811379066367, 'samples': 20667392, 'steps': 40365, 'loss/train': 1.6003681421279907} +03/05/2022 12:55:17 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/05/2022 12:55:22 - INFO - codeparrot_training - Step 40366: {'lr': 0.00042157727829718827, 'samples': 20667904, 'steps': 40366, 'loss/train': 2.088017225265503} +03/05/2022 12:55:25 - INFO - codeparrot_training - Step 40367: {'lr': 0.00042157341861042986, 'samples': 20668416, 'steps': 40367, 'loss/train': 2.4340786933898926} +03/05/2022 12:55:25 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/05/2022 12:55:30 - INFO - codeparrot_training - Step 40368: {'lr': 0.00042156955884636307, 'samples': 20668928, 'steps': 40368, 'loss/train': 1.2669000625610352} +03/05/2022 12:55:34 - INFO - codeparrot_training - Step 40369: {'lr': 0.0004215656990049896, 'samples': 20669440, 'steps': 40369, 'loss/train': 0.949313759803772} +03/05/2022 12:55:34 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/05/2022 12:55:39 - INFO - codeparrot_training - Step 40370: {'lr': 0.0004215618390863114, 'samples': 20669952, 'steps': 40370, 'loss/train': 1.9419571161270142} +03/05/2022 12:55:42 - INFO - codeparrot_training - Step 40371: {'lr': 0.00042155797909033, 'samples': 20670464, 'steps': 40371, 'loss/train': 1.8753588199615479} +03/05/2022 12:55:42 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/05/2022 12:55:47 - INFO - codeparrot_training - Step 40372: {'lr': 0.00042155411901704723, 'samples': 20670976, 'steps': 40372, 'loss/train': 1.2074916362762451} +03/05/2022 12:55:51 - INFO - codeparrot_training - Step 40373: {'lr': 0.0004215502588664648, 'samples': 20671488, 'steps': 40373, 'loss/train': 1.6162254810333252} +03/05/2022 12:55:51 - INFO - codeparrot_training - Skipping example with length 992 (seq_length=1024) +03/05/2022 12:55:56 - INFO - codeparrot_training - Step 40374: {'lr': 0.0004215463986385845, 'samples': 20672000, 'steps': 40374, 'loss/train': 1.6806952953338623} +03/05/2022 12:55:59 - INFO - codeparrot_training - Step 40375: {'lr': 0.0004215425383334081, 'samples': 20672512, 'steps': 40375, 'loss/train': 1.0073401927947998} +03/05/2022 12:55:59 - INFO - codeparrot_training - Skipping example with length 206 (seq_length=1024) +03/05/2022 12:56:05 - INFO - codeparrot_training - Step 40376: {'lr': 0.00042153867795093714, 'samples': 20673024, 'steps': 40376, 'loss/train': 1.6950125694274902} +03/05/2022 12:56:08 - INFO - codeparrot_training - Step 40377: {'lr': 0.0004215348174911736, 'samples': 20673536, 'steps': 40377, 'loss/train': 0.7287610173225403} +03/05/2022 12:56:08 - INFO - codeparrot_training - Skipping example with length 225 (seq_length=1024) +03/05/2022 12:56:13 - INFO - codeparrot_training - Step 40378: {'lr': 0.0004215309569541191, 'samples': 20674048, 'steps': 40378, 'loss/train': 1.9431819915771484} +03/05/2022 12:56:16 - INFO - codeparrot_training - Step 40379: {'lr': 0.00042152709633977545, 'samples': 20674560, 'steps': 40379, 'loss/train': 1.4730685949325562} +03/05/2022 12:56:16 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/05/2022 12:56:22 - INFO - codeparrot_training - Step 40380: {'lr': 0.0004215232356481442, 'samples': 20675072, 'steps': 40380, 'loss/train': 1.224008560180664} +03/05/2022 12:56:25 - INFO - codeparrot_training - Step 40381: {'lr': 0.0004215193748792273, 'samples': 20675584, 'steps': 40381, 'loss/train': 1.7239350080490112} +03/05/2022 12:56:25 - INFO - codeparrot_training - Skipping example with length 805 (seq_length=1024) +03/05/2022 12:56:30 - INFO - codeparrot_training - Step 40382: {'lr': 0.00042151551403302645, 'samples': 20676096, 'steps': 40382, 'loss/train': 1.7135679721832275} +03/05/2022 12:56:33 - INFO - codeparrot_training - Step 40383: {'lr': 0.00042151165310954335, 'samples': 20676608, 'steps': 40383, 'loss/train': 0.8786811828613281} +03/05/2022 12:56:33 - INFO - codeparrot_training - Skipping example with length 205 (seq_length=1024) +03/05/2022 12:56:39 - INFO - codeparrot_training - Step 40384: {'lr': 0.0004215077921087798, 'samples': 20677120, 'steps': 40384, 'loss/train': 1.348458170890808} +03/05/2022 12:56:42 - INFO - codeparrot_training - Step 40385: {'lr': 0.00042150393103073736, 'samples': 20677632, 'steps': 40385, 'loss/train': 1.6739588975906372} +03/05/2022 12:56:43 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/05/2022 12:56:47 - INFO - codeparrot_training - Step 40386: {'lr': 0.00042150006987541795, 'samples': 20678144, 'steps': 40386, 'loss/train': 1.393949031829834} +03/05/2022 12:56:50 - INFO - codeparrot_training - Step 40387: {'lr': 0.0004214962086428232, 'samples': 20678656, 'steps': 40387, 'loss/train': 1.1483789682388306} +03/05/2022 12:56:51 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/05/2022 12:56:55 - INFO - codeparrot_training - Step 40388: {'lr': 0.00042149234733295497, 'samples': 20679168, 'steps': 40388, 'loss/train': 2.563614845275879} +03/05/2022 12:56:59 - INFO - codeparrot_training - Step 40389: {'lr': 0.00042148848594581503, 'samples': 20679680, 'steps': 40389, 'loss/train': 1.7633692026138306} +03/05/2022 12:56:59 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/05/2022 12:57:04 - INFO - codeparrot_training - Step 40390: {'lr': 0.00042148462448140487, 'samples': 20680192, 'steps': 40390, 'loss/train': 1.609235405921936} +03/05/2022 12:57:07 - INFO - codeparrot_training - Step 40391: {'lr': 0.0004214807629397264, 'samples': 20680704, 'steps': 40391, 'loss/train': 1.716700553894043} +03/05/2022 12:57:08 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 12:57:13 - INFO - codeparrot_training - Step 40392: {'lr': 0.00042147690132078136, 'samples': 20681216, 'steps': 40392, 'loss/train': 1.8902121782302856} +03/05/2022 12:57:16 - INFO - codeparrot_training - Step 40393: {'lr': 0.0004214730396245715, 'samples': 20681728, 'steps': 40393, 'loss/train': 1.286687970161438} +03/05/2022 12:57:16 - INFO - codeparrot_training - Skipping example with length 329 (seq_length=1024) +03/05/2022 12:57:21 - INFO - codeparrot_training - Step 40394: {'lr': 0.0004214691778510985, 'samples': 20682240, 'steps': 40394, 'loss/train': 1.6278773546218872} +03/05/2022 12:57:24 - INFO - codeparrot_training - Step 40395: {'lr': 0.0004214653160003642, 'samples': 20682752, 'steps': 40395, 'loss/train': 1.3448028564453125} +03/05/2022 12:57:24 - INFO - codeparrot_training - Skipping example with length 954 (seq_length=1024) +03/05/2022 12:57:29 - INFO - codeparrot_training - Step 40396: {'lr': 0.00042146145407237023, 'samples': 20683264, 'steps': 40396, 'loss/train': 1.7101707458496094} +03/05/2022 12:57:33 - INFO - codeparrot_training - Step 40397: {'lr': 0.00042145759206711834, 'samples': 20683776, 'steps': 40397, 'loss/train': 1.7709565162658691} +03/05/2022 12:57:33 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/05/2022 12:57:39 - INFO - codeparrot_training - Step 40398: {'lr': 0.0004214537299846104, 'samples': 20684288, 'steps': 40398, 'loss/train': 1.4574381113052368} +03/05/2022 12:57:42 - INFO - codeparrot_training - Step 40399: {'lr': 0.00042144986782484796, 'samples': 20684800, 'steps': 40399, 'loss/train': 2.3374555110931396} +03/05/2022 12:57:44 - INFO - codeparrot_training - Skipping example with length 1010 (seq_length=1024) +03/05/2022 12:57:47 - INFO - codeparrot_training - Step 40400: {'lr': 0.00042144600558783284, 'samples': 20685312, 'steps': 40400, 'loss/train': 1.3583827018737793} +03/05/2022 12:57:50 - INFO - codeparrot_training - Step 40401: {'lr': 0.0004214421432735669, 'samples': 20685824, 'steps': 40401, 'loss/train': 1.7995244264602661} +03/05/2022 12:57:53 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/05/2022 12:57:55 - INFO - codeparrot_training - Step 40402: {'lr': 0.0004214382808820517, 'samples': 20686336, 'steps': 40402, 'loss/train': 1.208327054977417} +03/05/2022 12:57:59 - INFO - codeparrot_training - Step 40403: {'lr': 0.0004214344184132891, 'samples': 20686848, 'steps': 40403, 'loss/train': 2.945127487182617} +03/05/2022 12:58:01 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/05/2022 12:58:04 - INFO - codeparrot_training - Step 40404: {'lr': 0.0004214305558672808, 'samples': 20687360, 'steps': 40404, 'loss/train': 1.7858608961105347} +03/05/2022 12:58:07 - INFO - codeparrot_training - Step 40405: {'lr': 0.0004214266932440285, 'samples': 20687872, 'steps': 40405, 'loss/train': 2.7339656352996826} +03/05/2022 12:58:10 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/05/2022 12:58:13 - INFO - codeparrot_training - Step 40406: {'lr': 0.000421422830543534, 'samples': 20688384, 'steps': 40406, 'loss/train': 0.13135404884815216} +03/05/2022 12:58:16 - INFO - codeparrot_training - Step 40407: {'lr': 0.00042141896776579904, 'samples': 20688896, 'steps': 40407, 'loss/train': 1.8912774324417114} +03/05/2022 12:58:19 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/05/2022 12:58:21 - INFO - codeparrot_training - Step 40408: {'lr': 0.0004214151049108252, 'samples': 20689408, 'steps': 40408, 'loss/train': 3.0950307846069336} +03/05/2022 12:58:24 - INFO - codeparrot_training - Step 40409: {'lr': 0.00042141124197861456, 'samples': 20689920, 'steps': 40409, 'loss/train': 1.602133870124817} +03/05/2022 12:58:27 - INFO - codeparrot_training - Skipping example with length 814 (seq_length=1024) +03/05/2022 12:58:30 - INFO - codeparrot_training - Step 40410: {'lr': 0.0004214073789691686, 'samples': 20690432, 'steps': 40410, 'loss/train': 1.0112121105194092} +03/05/2022 12:58:33 - INFO - codeparrot_training - Step 40411: {'lr': 0.00042140351588248906, 'samples': 20690944, 'steps': 40411, 'loss/train': 0.4828191101551056} +03/05/2022 12:58:35 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/05/2022 12:58:38 - INFO - codeparrot_training - Step 40412: {'lr': 0.00042139965271857774, 'samples': 20691456, 'steps': 40412, 'loss/train': 1.8102251291275024} +03/05/2022 12:58:41 - INFO - codeparrot_training - Step 40413: {'lr': 0.0004213957894774364, 'samples': 20691968, 'steps': 40413, 'loss/train': 0.9536743760108948} +03/05/2022 12:58:44 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/05/2022 12:58:47 - INFO - codeparrot_training - Step 40414: {'lr': 0.0004213919261590667, 'samples': 20692480, 'steps': 40414, 'loss/train': 2.5317165851593018} +03/05/2022 12:58:50 - INFO - codeparrot_training - Step 40415: {'lr': 0.0004213880627634705, 'samples': 20692992, 'steps': 40415, 'loss/train': 1.9899123907089233} +03/05/2022 12:58:52 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/05/2022 12:58:55 - INFO - codeparrot_training - Step 40416: {'lr': 0.0004213841992906496, 'samples': 20693504, 'steps': 40416, 'loss/train': 1.7682116031646729} +03/05/2022 12:58:58 - INFO - codeparrot_training - Step 40417: {'lr': 0.0004213803357406055, 'samples': 20694016, 'steps': 40417, 'loss/train': 1.7396270036697388} +03/05/2022 12:59:00 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/05/2022 12:59:03 - INFO - codeparrot_training - Step 40418: {'lr': 0.00042137647211334007, 'samples': 20694528, 'steps': 40418, 'loss/train': 1.9525171518325806} +03/05/2022 12:59:07 - INFO - codeparrot_training - Step 40419: {'lr': 0.000421372608408855, 'samples': 20695040, 'steps': 40419, 'loss/train': 1.0113167762756348} +03/05/2022 12:59:09 - INFO - codeparrot_training - Skipping example with length 1011 (seq_length=1024) +03/05/2022 12:59:12 - INFO - codeparrot_training - Step 40420: {'lr': 0.0004213687446271522, 'samples': 20695552, 'steps': 40420, 'loss/train': 2.3042619228363037} +03/05/2022 12:59:15 - INFO - codeparrot_training - Step 40421: {'lr': 0.0004213648807682332, 'samples': 20696064, 'steps': 40421, 'loss/train': 1.7643669843673706} +03/05/2022 12:59:18 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/05/2022 12:59:20 - INFO - codeparrot_training - Step 40422: {'lr': 0.00042136101683209993, 'samples': 20696576, 'steps': 40422, 'loss/train': 1.6008896827697754} +03/05/2022 12:59:24 - INFO - codeparrot_training - Step 40423: {'lr': 0.00042135715281875393, 'samples': 20697088, 'steps': 40423, 'loss/train': 1.5414414405822754} +03/05/2022 12:59:26 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/05/2022 12:59:29 - INFO - codeparrot_training - Step 40424: {'lr': 0.000421353288728197, 'samples': 20697600, 'steps': 40424, 'loss/train': 2.1668522357940674} +03/05/2022 12:59:32 - INFO - codeparrot_training - Step 40425: {'lr': 0.00042134942456043104, 'samples': 20698112, 'steps': 40425, 'loss/train': 1.9574054479599} +03/05/2022 12:59:35 - INFO - codeparrot_training - Skipping example with length 918 (seq_length=1024) +03/05/2022 12:59:37 - INFO - codeparrot_training - Step 40426: {'lr': 0.00042134556031545755, 'samples': 20698624, 'steps': 40426, 'loss/train': 1.439299464225769} +03/05/2022 12:59:41 - INFO - codeparrot_training - Step 40427: {'lr': 0.0004213416959932785, 'samples': 20699136, 'steps': 40427, 'loss/train': 1.8368465900421143} +03/05/2022 12:59:43 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/05/2022 12:59:46 - INFO - codeparrot_training - Step 40428: {'lr': 0.0004213378315938955, 'samples': 20699648, 'steps': 40428, 'loss/train': 2.3524179458618164} +03/05/2022 12:59:49 - INFO - codeparrot_training - Step 40429: {'lr': 0.0004213339671173103, 'samples': 20700160, 'steps': 40429, 'loss/train': 1.7115963697433472} +03/05/2022 12:59:52 - INFO - codeparrot_training - Skipping example with length 555 (seq_length=1024) +03/05/2022 12:59:55 - INFO - codeparrot_training - Step 40430: {'lr': 0.00042133010256352466, 'samples': 20700672, 'steps': 40430, 'loss/train': 0.7688522338867188} +03/05/2022 12:59:58 - INFO - codeparrot_training - Step 40431: {'lr': 0.00042132623793254034, 'samples': 20701184, 'steps': 40431, 'loss/train': 2.027472496032715} +03/05/2022 13:00:01 - INFO - codeparrot_training - Step 40432: {'lr': 0.0004213223732243591, 'samples': 20701696, 'steps': 40432, 'loss/train': 2.3814525604248047} +03/05/2022 13:00:02 - INFO - codeparrot_training - Skipping example with length 663 (seq_length=1024) +03/05/2022 13:00:06 - INFO - codeparrot_training - Step 40433: {'lr': 0.00042131850843898255, 'samples': 20702208, 'steps': 40433, 'loss/train': 2.0063397884368896} +03/05/2022 13:00:09 - INFO - codeparrot_training - Step 40434: {'lr': 0.0004213146435764126, 'samples': 20702720, 'steps': 40434, 'loss/train': 1.459519624710083} +03/05/2022 13:00:10 - INFO - codeparrot_training - Skipping example with length 538 (seq_length=1024) +03/05/2022 13:00:15 - INFO - codeparrot_training - Step 40435: {'lr': 0.00042131077863665086, 'samples': 20703232, 'steps': 40435, 'loss/train': 1.7103214263916016} +03/05/2022 13:00:18 - INFO - codeparrot_training - Step 40436: {'lr': 0.00042130691361969914, 'samples': 20703744, 'steps': 40436, 'loss/train': 1.070001482963562} +03/05/2022 13:00:18 - INFO - codeparrot_training - Skipping example with length 633 (seq_length=1024) +03/05/2022 13:00:23 - INFO - codeparrot_training - Step 40437: {'lr': 0.00042130304852555916, 'samples': 20704256, 'steps': 40437, 'loss/train': 1.699519157409668} +03/05/2022 13:00:26 - INFO - codeparrot_training - Step 40438: {'lr': 0.00042129918335423265, 'samples': 20704768, 'steps': 40438, 'loss/train': 2.064394235610962} +03/05/2022 13:00:27 - INFO - codeparrot_training - Skipping example with length 938 (seq_length=1024) +03/05/2022 13:00:32 - INFO - codeparrot_training - Step 40439: {'lr': 0.0004212953181057214, 'samples': 20705280, 'steps': 40439, 'loss/train': 1.8346575498580933} +03/05/2022 13:00:35 - INFO - codeparrot_training - Step 40440: {'lr': 0.0004212914527800272, 'samples': 20705792, 'steps': 40440, 'loss/train': 1.8808594942092896} +03/05/2022 13:00:35 - INFO - codeparrot_training - Skipping example with length 23 (seq_length=1024) +03/05/2022 13:00:40 - INFO - codeparrot_training - Step 40441: {'lr': 0.0004212875873771516, 'samples': 20706304, 'steps': 40441, 'loss/train': 1.5219106674194336} +03/05/2022 13:00:43 - INFO - codeparrot_training - Step 40442: {'lr': 0.0004212837218970965, 'samples': 20706816, 'steps': 40442, 'loss/train': 2.1068472862243652} +03/05/2022 13:00:44 - INFO - codeparrot_training - Skipping example with length 521 (seq_length=1024) +03/05/2022 13:00:49 - INFO - codeparrot_training - Step 40443: {'lr': 0.00042127985633986365, 'samples': 20707328, 'steps': 40443, 'loss/train': 2.0398902893066406} +03/05/2022 13:00:52 - INFO - codeparrot_training - Step 40444: {'lr': 0.0004212759907054546, 'samples': 20707840, 'steps': 40444, 'loss/train': 2.1075222492218018} +03/05/2022 13:00:52 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/05/2022 13:00:58 - INFO - codeparrot_training - Step 40445: {'lr': 0.00042127212499387136, 'samples': 20708352, 'steps': 40445, 'loss/train': 0.8281036019325256} +03/05/2022 13:01:01 - INFO - codeparrot_training - Step 40446: {'lr': 0.0004212682592051155, 'samples': 20708864, 'steps': 40446, 'loss/train': 0.5361807942390442} +03/05/2022 13:01:02 - INFO - codeparrot_training - Skipping example with length 775 (seq_length=1024) +03/05/2022 13:01:06 - INFO - codeparrot_training - Step 40447: {'lr': 0.0004212643933391888, 'samples': 20709376, 'steps': 40447, 'loss/train': 1.4625929594039917} +03/05/2022 13:01:09 - INFO - codeparrot_training - Step 40448: {'lr': 0.000421260527396093, 'samples': 20709888, 'steps': 40448, 'loss/train': 0.3280651569366455} +03/05/2022 13:01:11 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/05/2022 13:01:15 - INFO - codeparrot_training - Step 40449: {'lr': 0.0004212566613758299, 'samples': 20710400, 'steps': 40449, 'loss/train': 1.0910710096359253} +03/05/2022 13:01:18 - INFO - codeparrot_training - Step 40450: {'lr': 0.00042125279527840124, 'samples': 20710912, 'steps': 40450, 'loss/train': 1.8031296730041504} +03/05/2022 13:01:20 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/05/2022 13:01:23 - INFO - codeparrot_training - Step 40451: {'lr': 0.0004212489291038085, 'samples': 20711424, 'steps': 40451, 'loss/train': 1.6279146671295166} +03/05/2022 13:01:26 - INFO - codeparrot_training - Step 40452: {'lr': 0.0004212450628520538, 'samples': 20711936, 'steps': 40452, 'loss/train': 2.129652976989746} +03/05/2022 13:01:28 - INFO - codeparrot_training - Skipping example with length 754 (seq_length=1024) +03/05/2022 13:01:32 - INFO - codeparrot_training - Step 40453: {'lr': 0.0004212411965231387, 'samples': 20712448, 'steps': 40453, 'loss/train': 1.7844544649124146} +03/05/2022 13:01:35 - INFO - codeparrot_training - Step 40454: {'lr': 0.0004212373301170649, 'samples': 20712960, 'steps': 40454, 'loss/train': 0.6720020174980164} +03/05/2022 13:01:37 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/05/2022 13:01:40 - INFO - codeparrot_training - Step 40455: {'lr': 0.00042123346363383426, 'samples': 20713472, 'steps': 40455, 'loss/train': 1.6479519605636597} +03/05/2022 13:01:43 - INFO - codeparrot_training - Step 40456: {'lr': 0.0004212295970734484, 'samples': 20713984, 'steps': 40456, 'loss/train': 1.8850681781768799} +03/05/2022 13:01:45 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/05/2022 13:01:49 - INFO - codeparrot_training - Step 40457: {'lr': 0.00042122573043590925, 'samples': 20714496, 'steps': 40457, 'loss/train': 1.4840099811553955} +03/05/2022 13:01:52 - INFO - codeparrot_training - Step 40458: {'lr': 0.0004212218637212183, 'samples': 20715008, 'steps': 40458, 'loss/train': 1.8641413450241089} +03/05/2022 13:01:54 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/05/2022 13:01:57 - INFO - codeparrot_training - Step 40459: {'lr': 0.00042121799692937747, 'samples': 20715520, 'steps': 40459, 'loss/train': 2.2151408195495605} +03/05/2022 13:02:00 - INFO - codeparrot_training - Step 40460: {'lr': 0.00042121413006038845, 'samples': 20716032, 'steps': 40460, 'loss/train': 2.5388576984405518} +03/05/2022 13:02:02 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/05/2022 13:02:06 - INFO - codeparrot_training - Step 40461: {'lr': 0.000421210263114253, 'samples': 20716544, 'steps': 40461, 'loss/train': 0.24657876789569855} +03/05/2022 13:02:09 - INFO - codeparrot_training - Step 40462: {'lr': 0.00042120639609097277, 'samples': 20717056, 'steps': 40462, 'loss/train': 1.4923537969589233} +03/05/2022 13:02:11 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/05/2022 13:02:14 - INFO - codeparrot_training - Step 40463: {'lr': 0.0004212025289905497, 'samples': 20717568, 'steps': 40463, 'loss/train': 1.5568737983703613} +03/05/2022 13:02:17 - INFO - codeparrot_training - Step 40464: {'lr': 0.0004211986618129854, 'samples': 20718080, 'steps': 40464, 'loss/train': 1.427642583847046} +03/05/2022 13:02:19 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/05/2022 13:02:22 - INFO - codeparrot_training - Step 40465: {'lr': 0.00042119479455828153, 'samples': 20718592, 'steps': 40465, 'loss/train': 2.0156407356262207} +03/05/2022 13:02:26 - INFO - codeparrot_training - Step 40466: {'lr': 0.00042119092722644, 'samples': 20719104, 'steps': 40466, 'loss/train': 1.5152536630630493} +03/05/2022 13:02:27 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/05/2022 13:02:31 - INFO - codeparrot_training - Step 40467: {'lr': 0.0004211870598174624, 'samples': 20719616, 'steps': 40467, 'loss/train': 1.8197591304779053} +03/05/2022 13:02:34 - INFO - codeparrot_training - Step 40468: {'lr': 0.0004211831923313506, 'samples': 20720128, 'steps': 40468, 'loss/train': 0.12048035115003586} +03/05/2022 13:02:36 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/05/2022 13:02:40 - INFO - codeparrot_training - Step 40469: {'lr': 0.0004211793247681064, 'samples': 20720640, 'steps': 40469, 'loss/train': 2.8310811519622803} +03/05/2022 13:02:43 - INFO - codeparrot_training - Step 40470: {'lr': 0.0004211754571277313, 'samples': 20721152, 'steps': 40470, 'loss/train': 1.6204134225845337} +03/05/2022 13:02:45 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/05/2022 13:02:48 - INFO - codeparrot_training - Step 40471: {'lr': 0.0004211715894102272, 'samples': 20721664, 'steps': 40471, 'loss/train': 0.06425996124744415} +03/05/2022 13:02:51 - INFO - codeparrot_training - Step 40472: {'lr': 0.00042116772161559585, 'samples': 20722176, 'steps': 40472, 'loss/train': 1.3094398975372314} +03/05/2022 13:02:54 - INFO - codeparrot_training - Skipping example with length 390 (seq_length=1024) +03/05/2022 13:02:57 - INFO - codeparrot_training - Step 40473: {'lr': 0.0004211638537438389, 'samples': 20722688, 'steps': 40473, 'loss/train': 2.0183467864990234} +03/05/2022 13:03:00 - INFO - codeparrot_training - Step 40474: {'lr': 0.0004211599857949583, 'samples': 20723200, 'steps': 40474, 'loss/train': 1.3488357067108154} +03/05/2022 13:03:03 - INFO - codeparrot_training - Skipping example with length 238 (seq_length=1024) +03/05/2022 13:03:05 - INFO - codeparrot_training - Step 40475: {'lr': 0.00042115611776895556, 'samples': 20723712, 'steps': 40475, 'loss/train': 1.3609415292739868} +03/05/2022 13:03:08 - INFO - codeparrot_training - Step 40476: {'lr': 0.00042115224966583255, 'samples': 20724224, 'steps': 40476, 'loss/train': 1.0609122514724731} +03/05/2022 13:03:11 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/05/2022 13:03:14 - INFO - codeparrot_training - Step 40477: {'lr': 0.00042114838148559093, 'samples': 20724736, 'steps': 40477, 'loss/train': 2.4370596408843994} +03/05/2022 13:03:17 - INFO - codeparrot_training - Step 40478: {'lr': 0.0004211445132282325, 'samples': 20725248, 'steps': 40478, 'loss/train': 1.23561429977417} +03/05/2022 13:03:19 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/05/2022 13:03:22 - INFO - codeparrot_training - Step 40479: {'lr': 0.000421140644893759, 'samples': 20725760, 'steps': 40479, 'loss/train': 1.4060800075531006} +03/05/2022 13:03:25 - INFO - codeparrot_training - Step 40480: {'lr': 0.0004211367764821722, 'samples': 20726272, 'steps': 40480, 'loss/train': 1.4225828647613525} +03/05/2022 13:03:28 - INFO - codeparrot_training - Skipping example with length 493 (seq_length=1024) +03/05/2022 13:03:31 - INFO - codeparrot_training - Step 40481: {'lr': 0.00042113290799347376, 'samples': 20726784, 'steps': 40481, 'loss/train': 1.8455241918563843} +03/05/2022 13:03:34 - INFO - codeparrot_training - Step 40482: {'lr': 0.00042112903942766546, 'samples': 20727296, 'steps': 40482, 'loss/train': 1.7216932773590088} +03/05/2022 13:03:36 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/05/2022 13:03:39 - INFO - codeparrot_training - Step 40483: {'lr': 0.00042112517078474914, 'samples': 20727808, 'steps': 40483, 'loss/train': 1.8887310028076172} +03/05/2022 13:03:42 - INFO - codeparrot_training - Step 40484: {'lr': 0.0004211213020647264, 'samples': 20728320, 'steps': 40484, 'loss/train': 1.6480332612991333} +03/05/2022 13:03:44 - INFO - codeparrot_training - Skipping example with length 76 (seq_length=1024) +03/05/2022 13:03:48 - INFO - codeparrot_training - Step 40485: {'lr': 0.00042111743326759903, 'samples': 20728832, 'steps': 40485, 'loss/train': 1.9263304471969604} +03/05/2022 13:03:51 - INFO - codeparrot_training - Step 40486: {'lr': 0.00042111356439336877, 'samples': 20729344, 'steps': 40486, 'loss/train': 1.3970844745635986} +03/05/2022 13:03:53 - INFO - codeparrot_training - Skipping example with length 475 (seq_length=1024) +03/05/2022 13:03:56 - INFO - codeparrot_training - Step 40487: {'lr': 0.0004211096954420375, 'samples': 20729856, 'steps': 40487, 'loss/train': 2.2267203330993652} +03/05/2022 13:03:59 - INFO - codeparrot_training - Step 40488: {'lr': 0.0004211058264136067, 'samples': 20730368, 'steps': 40488, 'loss/train': 1.5073891878128052} +03/05/2022 13:04:02 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/05/2022 13:04:05 - INFO - codeparrot_training - Step 40489: {'lr': 0.0004211019573080783, 'samples': 20730880, 'steps': 40489, 'loss/train': 2.1810362339019775} +03/05/2022 13:04:08 - INFO - codeparrot_training - Step 40490: {'lr': 0.00042109808812545405, 'samples': 20731392, 'steps': 40490, 'loss/train': 1.982108235359192} +03/05/2022 13:04:10 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/05/2022 13:04:13 - INFO - codeparrot_training - Step 40491: {'lr': 0.0004210942188657356, 'samples': 20731904, 'steps': 40491, 'loss/train': 1.579054832458496} +03/05/2022 13:04:16 - INFO - codeparrot_training - Step 40492: {'lr': 0.00042109034952892473, 'samples': 20732416, 'steps': 40492, 'loss/train': 1.6329907178878784} +03/05/2022 13:04:18 - INFO - codeparrot_training - Skipping example with length 332 (seq_length=1024) +03/05/2022 13:04:22 - INFO - codeparrot_training - Step 40493: {'lr': 0.00042108648011502314, 'samples': 20732928, 'steps': 40493, 'loss/train': 0.17180968821048737} +03/05/2022 13:04:25 - INFO - codeparrot_training - Step 40494: {'lr': 0.00042108261062403276, 'samples': 20733440, 'steps': 40494, 'loss/train': 6.234177112579346} +03/05/2022 13:04:26 - INFO - codeparrot_training - Skipping example with length 391 (seq_length=1024) +03/05/2022 13:04:30 - INFO - codeparrot_training - Step 40495: {'lr': 0.00042107874105595507, 'samples': 20733952, 'steps': 40495, 'loss/train': 1.6370471715927124} +03/05/2022 13:04:33 - INFO - codeparrot_training - Step 40496: {'lr': 0.00042107487141079206, 'samples': 20734464, 'steps': 40496, 'loss/train': 1.680106282234192} +03/05/2022 13:04:35 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/05/2022 13:04:39 - INFO - codeparrot_training - Step 40497: {'lr': 0.00042107100168854516, 'samples': 20734976, 'steps': 40497, 'loss/train': 1.5704518556594849} +03/05/2022 13:04:42 - INFO - codeparrot_training - Step 40498: {'lr': 0.00042106713188921647, 'samples': 20735488, 'steps': 40498, 'loss/train': 0.9190590977668762} +03/05/2022 13:04:45 - INFO - codeparrot_training - Skipping example with length 566 (seq_length=1024) +03/05/2022 13:04:47 - INFO - codeparrot_training - Step 40499: {'lr': 0.00042106326201280756, 'samples': 20736000, 'steps': 40499, 'loss/train': 1.778525710105896} +03/05/2022 13:04:50 - INFO - codeparrot_training - Step 40500: {'lr': 0.0004210593920593201, 'samples': 20736512, 'steps': 40500, 'loss/train': 2.459087371826172} +03/05/2022 13:04:53 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/05/2022 13:04:56 - INFO - codeparrot_training - Step 40501: {'lr': 0.000421055522028756, 'samples': 20737024, 'steps': 40501, 'loss/train': 1.4212647676467896} +03/05/2022 13:04:59 - INFO - codeparrot_training - Step 40502: {'lr': 0.00042105165192111684, 'samples': 20737536, 'steps': 40502, 'loss/train': 1.6367316246032715} +03/05/2022 13:05:01 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/05/2022 13:05:04 - INFO - codeparrot_training - Step 40503: {'lr': 0.00042104778173640453, 'samples': 20738048, 'steps': 40503, 'loss/train': 1.6660511493682861} +03/05/2022 13:05:07 - INFO - codeparrot_training - Step 40504: {'lr': 0.0004210439114746206, 'samples': 20738560, 'steps': 40504, 'loss/train': 1.747920036315918} +03/05/2022 13:05:10 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/05/2022 13:05:13 - INFO - codeparrot_training - Step 40505: {'lr': 0.00042104004113576707, 'samples': 20739072, 'steps': 40505, 'loss/train': 1.8140813112258911} +03/05/2022 13:05:16 - INFO - codeparrot_training - Step 40506: {'lr': 0.00042103617071984544, 'samples': 20739584, 'steps': 40506, 'loss/train': 2.1517508029937744} +03/05/2022 13:05:18 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/05/2022 13:05:21 - INFO - codeparrot_training - Step 40507: {'lr': 0.00042103230022685765, 'samples': 20740096, 'steps': 40507, 'loss/train': 1.3578181266784668} +03/05/2022 13:05:24 - INFO - codeparrot_training - Step 40508: {'lr': 0.0004210284296568052, 'samples': 20740608, 'steps': 40508, 'loss/train': 2.1557819843292236} +03/05/2022 13:05:26 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/05/2022 13:05:29 - INFO - codeparrot_training - Step 40509: {'lr': 0.0004210245590096901, 'samples': 20741120, 'steps': 40509, 'loss/train': 1.4781718254089355} +03/05/2022 13:05:33 - INFO - codeparrot_training - Step 40510: {'lr': 0.000421020688285514, 'samples': 20741632, 'steps': 40510, 'loss/train': 1.686828374862671} +03/05/2022 13:05:35 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/05/2022 13:05:38 - INFO - codeparrot_training - Step 40511: {'lr': 0.0004210168174842785, 'samples': 20742144, 'steps': 40511, 'loss/train': 1.8903236389160156} +03/05/2022 13:05:41 - INFO - codeparrot_training - Step 40512: {'lr': 0.00042101294660598556, 'samples': 20742656, 'steps': 40512, 'loss/train': 0.7560727596282959} +03/05/2022 13:05:44 - INFO - codeparrot_training - Step 40513: {'lr': 0.0004210090756506367, 'samples': 20743168, 'steps': 40513, 'loss/train': 2.10577392578125} +03/05/2022 13:05:45 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/05/2022 13:05:50 - INFO - codeparrot_training - Step 40514: {'lr': 0.0004210052046182339, 'samples': 20743680, 'steps': 40514, 'loss/train': 2.329699754714966} +03/05/2022 13:05:53 - INFO - codeparrot_training - Step 40515: {'lr': 0.0004210013335087787, 'samples': 20744192, 'steps': 40515, 'loss/train': 1.962345004081726} +03/05/2022 13:05:53 - INFO - codeparrot_training - Skipping example with length 434 (seq_length=1024) +03/05/2022 13:05:58 - INFO - codeparrot_training - Step 40516: {'lr': 0.000420997462322273, 'samples': 20744704, 'steps': 40516, 'loss/train': 1.3428285121917725} +03/05/2022 13:06:01 - INFO - codeparrot_training - Step 40517: {'lr': 0.00042099359105871856, 'samples': 20745216, 'steps': 40517, 'loss/train': 1.6272666454315186} +03/05/2022 13:06:02 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/05/2022 13:06:07 - INFO - codeparrot_training - Step 40518: {'lr': 0.00042098971971811695, 'samples': 20745728, 'steps': 40518, 'loss/train': 1.5761278867721558} +03/05/2022 13:06:10 - INFO - codeparrot_training - Step 40519: {'lr': 0.00042098584830047004, 'samples': 20746240, 'steps': 40519, 'loss/train': 1.4719390869140625} +03/05/2022 13:06:10 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/05/2022 13:06:15 - INFO - codeparrot_training - Step 40520: {'lr': 0.00042098197680577956, 'samples': 20746752, 'steps': 40520, 'loss/train': 0.7827234864234924} +03/05/2022 13:06:18 - INFO - codeparrot_training - Step 40521: {'lr': 0.00042097810523404714, 'samples': 20747264, 'steps': 40521, 'loss/train': 1.4584271907806396} +03/05/2022 13:06:19 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/05/2022 13:06:24 - INFO - codeparrot_training - Step 40522: {'lr': 0.0004209742335852747, 'samples': 20747776, 'steps': 40522, 'loss/train': 1.4485782384872437} +03/05/2022 13:06:27 - INFO - codeparrot_training - Step 40523: {'lr': 0.0004209703618594639, 'samples': 20748288, 'steps': 40523, 'loss/train': 1.7003995180130005} +03/05/2022 13:06:27 - INFO - codeparrot_training - Skipping example with length 62 (seq_length=1024) +03/05/2022 13:06:32 - INFO - codeparrot_training - Step 40524: {'lr': 0.00042096649005661654, 'samples': 20748800, 'steps': 40524, 'loss/train': 1.6282175779342651} +03/05/2022 13:06:35 - INFO - codeparrot_training - Step 40525: {'lr': 0.00042096261817673423, 'samples': 20749312, 'steps': 40525, 'loss/train': 1.5166821479797363} +03/05/2022 13:06:36 - INFO - codeparrot_training - Skipping example with length 638 (seq_length=1024) +03/05/2022 13:06:41 - INFO - codeparrot_training - Step 40526: {'lr': 0.0004209587462198189, 'samples': 20749824, 'steps': 40526, 'loss/train': 1.4901609420776367} +03/05/2022 13:06:44 - INFO - codeparrot_training - Step 40527: {'lr': 0.0004209548741858721, 'samples': 20750336, 'steps': 40527, 'loss/train': 1.5285699367523193} +03/05/2022 13:06:46 - INFO - codeparrot_training - Skipping example with length 90 (seq_length=1024) +03/05/2022 13:06:49 - INFO - codeparrot_training - Step 40528: {'lr': 0.00042095100207489573, 'samples': 20750848, 'steps': 40528, 'loss/train': 1.6518371105194092} +03/05/2022 13:06:52 - INFO - codeparrot_training - Step 40529: {'lr': 0.0004209471298868914, 'samples': 20751360, 'steps': 40529, 'loss/train': 1.4010772705078125} +03/05/2022 13:06:54 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/05/2022 13:06:58 - INFO - codeparrot_training - Step 40530: {'lr': 0.00042094325762186103, 'samples': 20751872, 'steps': 40530, 'loss/train': 1.8935110569000244} +03/05/2022 13:07:01 - INFO - codeparrot_training - Step 40531: {'lr': 0.0004209393852798062, 'samples': 20752384, 'steps': 40531, 'loss/train': 1.6770039796829224} +03/05/2022 13:07:02 - INFO - codeparrot_training - Skipping example with length 955 (seq_length=1024) +03/05/2022 13:07:06 - INFO - codeparrot_training - Step 40532: {'lr': 0.00042093551286072887, 'samples': 20752896, 'steps': 40532, 'loss/train': 1.8970285654067993} +03/05/2022 13:07:09 - INFO - codeparrot_training - Step 40533: {'lr': 0.00042093164036463045, 'samples': 20753408, 'steps': 40533, 'loss/train': 1.2001363039016724} +03/05/2022 13:07:11 - INFO - codeparrot_training - Skipping example with length 655 (seq_length=1024) +03/05/2022 13:07:15 - INFO - codeparrot_training - Step 40534: {'lr': 0.0004209277677915129, 'samples': 20753920, 'steps': 40534, 'loss/train': 0.7736743688583374} +03/05/2022 13:07:18 - INFO - codeparrot_training - Step 40535: {'lr': 0.000420923895141378, 'samples': 20754432, 'steps': 40535, 'loss/train': 0.4928356111049652} +03/05/2022 13:07:20 - INFO - codeparrot_training - Skipping example with length 187 (seq_length=1024) +03/05/2022 13:07:23 - INFO - codeparrot_training - Step 40536: {'lr': 0.0004209200224142274, 'samples': 20754944, 'steps': 40536, 'loss/train': 1.9714469909667969} +03/05/2022 13:07:26 - INFO - codeparrot_training - Step 40537: {'lr': 0.0004209161496100629, 'samples': 20755456, 'steps': 40537, 'loss/train': 1.5068094730377197} +03/05/2022 13:07:29 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/05/2022 13:07:32 - INFO - codeparrot_training - Step 40538: {'lr': 0.00042091227672888624, 'samples': 20755968, 'steps': 40538, 'loss/train': 2.025617837905884} +03/05/2022 13:07:35 - INFO - codeparrot_training - Step 40539: {'lr': 0.00042090840377069906, 'samples': 20756480, 'steps': 40539, 'loss/train': 0.833613395690918} +03/05/2022 13:07:37 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/05/2022 13:07:40 - INFO - codeparrot_training - Step 40540: {'lr': 0.00042090453073550323, 'samples': 20756992, 'steps': 40540, 'loss/train': 2.2700655460357666} +03/05/2022 13:07:43 - INFO - codeparrot_training - Step 40541: {'lr': 0.0004209006576233004, 'samples': 20757504, 'steps': 40541, 'loss/train': 1.6403255462646484} +03/05/2022 13:07:45 - INFO - codeparrot_training - Skipping example with length 899 (seq_length=1024) +03/05/2022 13:07:48 - INFO - codeparrot_training - Step 40542: {'lr': 0.0004208967844340925, 'samples': 20758016, 'steps': 40542, 'loss/train': 2.2528696060180664} +03/05/2022 13:07:52 - INFO - codeparrot_training - Step 40543: {'lr': 0.0004208929111678811, 'samples': 20758528, 'steps': 40543, 'loss/train': 1.7717382907867432} +03/05/2022 13:07:53 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/05/2022 13:07:57 - INFO - codeparrot_training - Step 40544: {'lr': 0.0004208890378246679, 'samples': 20759040, 'steps': 40544, 'loss/train': 1.4949206113815308} +03/05/2022 13:08:00 - INFO - codeparrot_training - Step 40545: {'lr': 0.00042088516440445486, 'samples': 20759552, 'steps': 40545, 'loss/train': 1.7160180807113647} +03/05/2022 13:08:02 - INFO - codeparrot_training - Skipping example with length 293 (seq_length=1024) +03/05/2022 13:08:05 - INFO - codeparrot_training - Step 40546: {'lr': 0.0004208812909072435, 'samples': 20760064, 'steps': 40546, 'loss/train': 1.3404403924942017} +03/05/2022 13:08:09 - INFO - codeparrot_training - Step 40547: {'lr': 0.00042087741733303575, 'samples': 20760576, 'steps': 40547, 'loss/train': 1.8561149835586548} +03/05/2022 13:08:10 - INFO - codeparrot_training - Skipping example with length 325 (seq_length=1024) +03/05/2022 13:08:14 - INFO - codeparrot_training - Step 40548: {'lr': 0.00042087354368183316, 'samples': 20761088, 'steps': 40548, 'loss/train': 3.070607900619507} +03/05/2022 13:08:17 - INFO - codeparrot_training - Step 40549: {'lr': 0.00042086966995363774, 'samples': 20761600, 'steps': 40549, 'loss/train': 2.358048915863037} +03/05/2022 13:08:18 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/05/2022 13:08:22 - INFO - codeparrot_training - Step 40550: {'lr': 0.000420865796148451, 'samples': 20762112, 'steps': 40550, 'loss/train': 1.846623420715332} +03/05/2022 13:08:25 - INFO - codeparrot_training - Step 40551: {'lr': 0.00042086192226627476, 'samples': 20762624, 'steps': 40551, 'loss/train': 1.8676114082336426} +03/05/2022 13:08:27 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/05/2022 13:08:31 - INFO - codeparrot_training - Step 40552: {'lr': 0.00042085804830711084, 'samples': 20763136, 'steps': 40552, 'loss/train': 1.372423529624939} +03/05/2022 13:08:34 - INFO - codeparrot_training - Step 40553: {'lr': 0.00042085417427096085, 'samples': 20763648, 'steps': 40553, 'loss/train': 1.859683632850647} +03/05/2022 13:08:35 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/05/2022 13:08:39 - INFO - codeparrot_training - Step 40554: {'lr': 0.0004208503001578266, 'samples': 20764160, 'steps': 40554, 'loss/train': 2.288742780685425} +03/05/2022 13:08:42 - INFO - codeparrot_training - Step 40555: {'lr': 0.00042084642596770984, 'samples': 20764672, 'steps': 40555, 'loss/train': 1.6476786136627197} +03/05/2022 13:08:44 - INFO - codeparrot_training - Skipping example with length 192 (seq_length=1024) +03/05/2022 13:08:48 - INFO - codeparrot_training - Step 40556: {'lr': 0.0004208425517006124, 'samples': 20765184, 'steps': 40556, 'loss/train': 2.4635915756225586} +03/05/2022 13:08:51 - INFO - codeparrot_training - Step 40557: {'lr': 0.0004208386773565359, 'samples': 20765696, 'steps': 40557, 'loss/train': 2.1452674865722656} +03/05/2022 13:08:52 - INFO - codeparrot_training - Skipping example with length 261 (seq_length=1024) +03/05/2022 13:08:56 - INFO - codeparrot_training - Step 40558: {'lr': 0.0004208348029354821, 'samples': 20766208, 'steps': 40558, 'loss/train': 1.2970952987670898} +03/05/2022 13:08:59 - INFO - codeparrot_training - Step 40559: {'lr': 0.00042083092843745275, 'samples': 20766720, 'steps': 40559, 'loss/train': 0.6324344277381897} +03/05/2022 13:09:01 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/05/2022 13:09:05 - INFO - codeparrot_training - Step 40560: {'lr': 0.0004208270538624497, 'samples': 20767232, 'steps': 40560, 'loss/train': 2.1046700477600098} +03/05/2022 13:09:08 - INFO - codeparrot_training - Step 40561: {'lr': 0.00042082317921047455, 'samples': 20767744, 'steps': 40561, 'loss/train': 0.9006844162940979} +03/05/2022 13:09:09 - INFO - codeparrot_training - Skipping example with length 286 (seq_length=1024) +03/05/2022 13:09:13 - INFO - codeparrot_training - Step 40562: {'lr': 0.0004208193044815291, 'samples': 20768256, 'steps': 40562, 'loss/train': 0.652239978313446} +03/05/2022 13:09:16 - INFO - codeparrot_training - Step 40563: {'lr': 0.0004208154296756152, 'samples': 20768768, 'steps': 40563, 'loss/train': 1.7635574340820312} +03/05/2022 13:09:17 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/05/2022 13:09:21 - INFO - codeparrot_training - Step 40564: {'lr': 0.0004208115547927345, 'samples': 20769280, 'steps': 40564, 'loss/train': 1.8689101934432983} +03/05/2022 13:09:25 - INFO - codeparrot_training - Step 40565: {'lr': 0.0004208076798328886, 'samples': 20769792, 'steps': 40565, 'loss/train': 1.4304279088974} +03/05/2022 13:09:26 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/05/2022 13:09:30 - INFO - codeparrot_training - Step 40566: {'lr': 0.00042080380479607947, 'samples': 20770304, 'steps': 40566, 'loss/train': 0.0964324027299881} +03/05/2022 13:09:33 - INFO - codeparrot_training - Step 40567: {'lr': 0.00042079992968230886, 'samples': 20770816, 'steps': 40567, 'loss/train': 0.8025344014167786} +03/05/2022 13:09:34 - INFO - codeparrot_training - Skipping example with length 555 (seq_length=1024) +03/05/2022 13:09:39 - INFO - codeparrot_training - Step 40568: {'lr': 0.0004207960544915784, 'samples': 20771328, 'steps': 40568, 'loss/train': 1.008409023284912} +03/05/2022 13:09:42 - INFO - codeparrot_training - Step 40569: {'lr': 0.0004207921792238898, 'samples': 20771840, 'steps': 40569, 'loss/train': 1.5095638036727905} +03/05/2022 13:09:43 - INFO - codeparrot_training - Skipping example with length 247 (seq_length=1024) +03/05/2022 13:09:47 - INFO - codeparrot_training - Step 40570: {'lr': 0.0004207883038792449, 'samples': 20772352, 'steps': 40570, 'loss/train': 2.0227723121643066} +03/05/2022 13:09:50 - INFO - codeparrot_training - Step 40571: {'lr': 0.0004207844284576455, 'samples': 20772864, 'steps': 40571, 'loss/train': 1.214285969734192} +03/05/2022 13:09:52 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/05/2022 13:09:55 - INFO - codeparrot_training - Step 40572: {'lr': 0.0004207805529590932, 'samples': 20773376, 'steps': 40572, 'loss/train': 1.791153073310852} +03/05/2022 13:09:59 - INFO - codeparrot_training - Step 40573: {'lr': 0.0004207766773835899, 'samples': 20773888, 'steps': 40573, 'loss/train': 1.8055709600448608} +03/05/2022 13:10:00 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/05/2022 13:10:04 - INFO - codeparrot_training - Step 40574: {'lr': 0.0004207728017311372, 'samples': 20774400, 'steps': 40574, 'loss/train': 1.912635326385498} +03/05/2022 13:10:07 - INFO - codeparrot_training - Step 40575: {'lr': 0.0004207689260017369, 'samples': 20774912, 'steps': 40575, 'loss/train': 1.6282713413238525} +03/05/2022 13:10:09 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/05/2022 13:10:13 - INFO - codeparrot_training - Step 40576: {'lr': 0.0004207650501953908, 'samples': 20775424, 'steps': 40576, 'loss/train': 1.5444647073745728} +03/05/2022 13:10:16 - INFO - codeparrot_training - Step 40577: {'lr': 0.0004207611743121006, 'samples': 20775936, 'steps': 40577, 'loss/train': 0.8495638966560364} +03/05/2022 13:10:18 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/05/2022 13:10:21 - INFO - codeparrot_training - Step 40578: {'lr': 0.00042075729835186807, 'samples': 20776448, 'steps': 40578, 'loss/train': 1.8111493587493896} +03/05/2022 13:10:24 - INFO - codeparrot_training - Step 40579: {'lr': 0.0004207534223146948, 'samples': 20776960, 'steps': 40579, 'loss/train': 2.057074546813965} +03/05/2022 13:10:26 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/05/2022 13:10:30 - INFO - codeparrot_training - Step 40580: {'lr': 0.0004207495462005828, 'samples': 20777472, 'steps': 40580, 'loss/train': 2.1269750595092773} +03/05/2022 13:10:33 - INFO - codeparrot_training - Step 40581: {'lr': 0.0004207456700095337, 'samples': 20777984, 'steps': 40581, 'loss/train': 1.8115850687026978} +03/05/2022 13:10:34 - INFO - codeparrot_training - Skipping example with length 976 (seq_length=1024) +03/05/2022 13:10:38 - INFO - codeparrot_training - Step 40582: {'lr': 0.0004207417937415492, 'samples': 20778496, 'steps': 40582, 'loss/train': 1.5210784673690796} +03/05/2022 13:10:41 - INFO - codeparrot_training - Step 40583: {'lr': 0.000420737917396631, 'samples': 20779008, 'steps': 40583, 'loss/train': 2.3228540420532227} +03/05/2022 13:10:43 - INFO - codeparrot_training - Skipping example with length 629 (seq_length=1024) +03/05/2022 13:10:46 - INFO - codeparrot_training - Step 40584: {'lr': 0.00042073404097478105, 'samples': 20779520, 'steps': 40584, 'loss/train': 1.0096158981323242} +03/05/2022 13:10:50 - INFO - codeparrot_training - Step 40585: {'lr': 0.000420730164476001, 'samples': 20780032, 'steps': 40585, 'loss/train': 2.4979608058929443} +03/05/2022 13:10:51 - INFO - codeparrot_training - Skipping example with length 841 (seq_length=1024) +03/05/2022 13:10:55 - INFO - codeparrot_training - Step 40586: {'lr': 0.00042072628790029243, 'samples': 20780544, 'steps': 40586, 'loss/train': 1.0904587507247925} +03/05/2022 13:10:58 - INFO - codeparrot_training - Step 40587: {'lr': 0.0004207224112476573, 'samples': 20781056, 'steps': 40587, 'loss/train': 1.5106090307235718} +03/05/2022 13:10:59 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/05/2022 13:11:03 - INFO - codeparrot_training - Step 40588: {'lr': 0.0004207185345180973, 'samples': 20781568, 'steps': 40588, 'loss/train': 1.3290677070617676} +03/05/2022 13:11:07 - INFO - codeparrot_training - Step 40589: {'lr': 0.00042071465771161416, 'samples': 20782080, 'steps': 40589, 'loss/train': 1.0604459047317505} +03/05/2022 13:11:08 - INFO - codeparrot_training - Skipping example with length 812 (seq_length=1024) +03/05/2022 13:11:12 - INFO - codeparrot_training - Step 40590: {'lr': 0.0004207107808282097, 'samples': 20782592, 'steps': 40590, 'loss/train': 2.583747386932373} +03/05/2022 13:11:15 - INFO - codeparrot_training - Step 40591: {'lr': 0.00042070690386788545, 'samples': 20783104, 'steps': 40591, 'loss/train': 2.405487298965454} +03/05/2022 13:11:16 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/05/2022 13:11:20 - INFO - codeparrot_training - Step 40592: {'lr': 0.0004207030268306434, 'samples': 20783616, 'steps': 40592, 'loss/train': 1.3871694803237915} +03/05/2022 13:11:23 - INFO - codeparrot_training - Step 40593: {'lr': 0.00042069914971648516, 'samples': 20784128, 'steps': 40593, 'loss/train': 2.0643138885498047} +03/05/2022 13:11:24 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/05/2022 13:11:29 - INFO - codeparrot_training - Step 40594: {'lr': 0.0004206952725254125, 'samples': 20784640, 'steps': 40594, 'loss/train': 2.036855697631836} +03/05/2022 13:11:32 - INFO - codeparrot_training - Step 40595: {'lr': 0.00042069139525742727, 'samples': 20785152, 'steps': 40595, 'loss/train': 1.753844976425171} +03/05/2022 13:11:33 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/05/2022 13:11:37 - INFO - codeparrot_training - Step 40596: {'lr': 0.000420687517912531, 'samples': 20785664, 'steps': 40596, 'loss/train': 1.7587822675704956} +03/05/2022 13:11:40 - INFO - codeparrot_training - Step 40597: {'lr': 0.0004206836404907257, 'samples': 20786176, 'steps': 40597, 'loss/train': 1.781281590461731} +03/05/2022 13:11:41 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/05/2022 13:11:46 - INFO - codeparrot_training - Step 40598: {'lr': 0.0004206797629920129, 'samples': 20786688, 'steps': 40598, 'loss/train': 2.0872039794921875} +03/05/2022 13:11:49 - INFO - codeparrot_training - Step 40599: {'lr': 0.0004206758854163945, 'samples': 20787200, 'steps': 40599, 'loss/train': 2.264943838119507} +03/05/2022 13:11:50 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/05/2022 13:11:54 - INFO - codeparrot_training - Step 40600: {'lr': 0.00042067200776387215, 'samples': 20787712, 'steps': 40600, 'loss/train': 1.7265651226043701} +03/05/2022 13:11:57 - INFO - codeparrot_training - Step 40601: {'lr': 0.0004206681300344476, 'samples': 20788224, 'steps': 40601, 'loss/train': 2.4974310398101807} +03/05/2022 13:11:58 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/05/2022 13:12:03 - INFO - codeparrot_training - Step 40602: {'lr': 0.0004206642522281227, 'samples': 20788736, 'steps': 40602, 'loss/train': 1.8538384437561035} +03/05/2022 13:12:06 - INFO - codeparrot_training - Step 40603: {'lr': 0.000420660374344899, 'samples': 20789248, 'steps': 40603, 'loss/train': 1.0465166568756104} +03/05/2022 13:12:06 - INFO - codeparrot_training - Skipping example with length 339 (seq_length=1024) +03/05/2022 13:12:11 - INFO - codeparrot_training - Step 40604: {'lr': 0.00042065649638477843, 'samples': 20789760, 'steps': 40604, 'loss/train': 0.7066277265548706} +03/05/2022 13:12:14 - INFO - codeparrot_training - Step 40605: {'lr': 0.0004206526183477627, 'samples': 20790272, 'steps': 40605, 'loss/train': 1.7076364755630493} +03/05/2022 13:12:15 - INFO - codeparrot_training - Skipping example with length 745 (seq_length=1024) +03/05/2022 13:12:20 - INFO - codeparrot_training - Step 40606: {'lr': 0.0004206487402338535, 'samples': 20790784, 'steps': 40606, 'loss/train': 1.8296763896942139} +03/05/2022 13:12:23 - INFO - codeparrot_training - Step 40607: {'lr': 0.00042064486204305263, 'samples': 20791296, 'steps': 40607, 'loss/train': 1.7912205457687378} +03/05/2022 13:12:23 - INFO - codeparrot_training - Skipping example with length 677 (seq_length=1024) +03/05/2022 13:12:28 - INFO - codeparrot_training - Step 40608: {'lr': 0.0004206409837753618, 'samples': 20791808, 'steps': 40608, 'loss/train': 1.1085683107376099} +03/05/2022 13:12:31 - INFO - codeparrot_training - Step 40609: {'lr': 0.00042063710543078283, 'samples': 20792320, 'steps': 40609, 'loss/train': 2.084911823272705} +03/05/2022 13:12:31 - INFO - codeparrot_training - Skipping example with length 81 (seq_length=1024) +03/05/2022 13:12:36 - INFO - codeparrot_training - Step 40610: {'lr': 0.00042063322700931733, 'samples': 20792832, 'steps': 40610, 'loss/train': 1.4777162075042725} +03/05/2022 13:12:40 - INFO - codeparrot_training - Step 40611: {'lr': 0.0004206293485109672, 'samples': 20793344, 'steps': 40611, 'loss/train': 1.5778974294662476} +03/05/2022 13:12:40 - INFO - codeparrot_training - Skipping example with length 570 (seq_length=1024) +03/05/2022 13:12:45 - INFO - codeparrot_training - Step 40612: {'lr': 0.0004206254699357341, 'samples': 20793856, 'steps': 40612, 'loss/train': 1.700527548789978} +03/05/2022 13:12:48 - INFO - codeparrot_training - Step 40613: {'lr': 0.00042062159128361976, 'samples': 20794368, 'steps': 40613, 'loss/train': 1.8158572912216187} +03/05/2022 13:12:48 - INFO - codeparrot_training - Skipping example with length 647 (seq_length=1024) +03/05/2022 13:12:53 - INFO - codeparrot_training - Step 40614: {'lr': 0.000420617712554626, 'samples': 20794880, 'steps': 40614, 'loss/train': 1.7934439182281494} +03/05/2022 13:12:57 - INFO - codeparrot_training - Step 40615: {'lr': 0.0004206138337487545, 'samples': 20795392, 'steps': 40615, 'loss/train': 1.6390564441680908} +03/05/2022 13:12:57 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/05/2022 13:13:02 - INFO - codeparrot_training - Step 40616: {'lr': 0.0004206099548660071, 'samples': 20795904, 'steps': 40616, 'loss/train': 2.2152841091156006} +03/05/2022 13:13:05 - INFO - codeparrot_training - Step 40617: {'lr': 0.00042060607590638547, 'samples': 20796416, 'steps': 40617, 'loss/train': 1.6333396434783936} +03/05/2022 13:13:06 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/05/2022 13:13:10 - INFO - codeparrot_training - Step 40618: {'lr': 0.00042060219686989133, 'samples': 20796928, 'steps': 40618, 'loss/train': 1.8146079778671265} +03/05/2022 13:13:13 - INFO - codeparrot_training - Step 40619: {'lr': 0.00042059831775652644, 'samples': 20797440, 'steps': 40619, 'loss/train': 1.590161681175232} +03/05/2022 13:13:14 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/05/2022 13:13:19 - INFO - codeparrot_training - Step 40620: {'lr': 0.00042059443856629265, 'samples': 20797952, 'steps': 40620, 'loss/train': 1.969254493713379} +03/05/2022 13:13:22 - INFO - codeparrot_training - Step 40621: {'lr': 0.00042059055929919163, 'samples': 20798464, 'steps': 40621, 'loss/train': 1.2693634033203125} +03/05/2022 13:13:23 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/05/2022 13:13:27 - INFO - codeparrot_training - Step 40622: {'lr': 0.00042058667995522513, 'samples': 20798976, 'steps': 40622, 'loss/train': 0.133633092045784} +03/05/2022 13:13:30 - INFO - codeparrot_training - Step 40623: {'lr': 0.0004205828005343949, 'samples': 20799488, 'steps': 40623, 'loss/train': 2.295104742050171} +03/05/2022 13:13:31 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/05/2022 13:13:36 - INFO - codeparrot_training - Step 40624: {'lr': 0.00042057892103670275, 'samples': 20800000, 'steps': 40624, 'loss/train': 2.154745578765869} +03/05/2022 13:13:39 - INFO - codeparrot_training - Step 40625: {'lr': 0.0004205750414621503, 'samples': 20800512, 'steps': 40625, 'loss/train': 2.454918146133423} +03/05/2022 13:13:39 - INFO - codeparrot_training - Skipping example with length 496 (seq_length=1024) +03/05/2022 13:13:44 - INFO - codeparrot_training - Step 40626: {'lr': 0.0004205711618107394, 'samples': 20801024, 'steps': 40626, 'loss/train': 1.6260501146316528} +03/05/2022 13:13:47 - INFO - codeparrot_training - Step 40627: {'lr': 0.00042056728208247175, 'samples': 20801536, 'steps': 40627, 'loss/train': 2.5091705322265625} +03/05/2022 13:13:48 - INFO - codeparrot_training - Skipping example with length 464 (seq_length=1024) +03/05/2022 13:13:52 - INFO - codeparrot_training - Step 40628: {'lr': 0.0004205634022773491, 'samples': 20802048, 'steps': 40628, 'loss/train': 1.998146891593933} +03/05/2022 13:13:56 - INFO - codeparrot_training - Step 40629: {'lr': 0.0004205595223953732, 'samples': 20802560, 'steps': 40629, 'loss/train': 0.7557739615440369} +03/05/2022 13:13:56 - INFO - codeparrot_training - Skipping example with length 755 (seq_length=1024) +03/05/2022 13:14:01 - INFO - codeparrot_training - Step 40630: {'lr': 0.0004205556424365459, 'samples': 20803072, 'steps': 40630, 'loss/train': 2.4227638244628906} +03/05/2022 13:14:04 - INFO - codeparrot_training - Step 40631: {'lr': 0.0004205517624008688, 'samples': 20803584, 'steps': 40631, 'loss/train': 1.3900846242904663} +03/05/2022 13:14:04 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/05/2022 13:14:09 - INFO - codeparrot_training - Step 40632: {'lr': 0.00042054788228834374, 'samples': 20804096, 'steps': 40632, 'loss/train': 2.010648488998413} +03/05/2022 13:14:12 - INFO - codeparrot_training - Step 40633: {'lr': 0.0004205440020989724, 'samples': 20804608, 'steps': 40633, 'loss/train': 1.8523024320602417} +03/05/2022 13:14:12 - INFO - codeparrot_training - Skipping example with length 414 (seq_length=1024) +03/05/2022 13:14:18 - INFO - codeparrot_training - Step 40634: {'lr': 0.0004205401218327565, 'samples': 20805120, 'steps': 40634, 'loss/train': 0.818213701248169} +03/05/2022 13:14:21 - INFO - codeparrot_training - Step 40635: {'lr': 0.0004205362414896979, 'samples': 20805632, 'steps': 40635, 'loss/train': 2.254000663757324} +03/05/2022 13:14:22 - INFO - codeparrot_training - Skipping example with length 798 (seq_length=1024) +03/05/2022 13:14:27 - INFO - codeparrot_training - Step 40636: {'lr': 0.0004205323610697984, 'samples': 20806144, 'steps': 40636, 'loss/train': 1.425824522972107} +03/05/2022 13:14:30 - INFO - codeparrot_training - Step 40637: {'lr': 0.0004205284805730596, 'samples': 20806656, 'steps': 40637, 'loss/train': 1.9774761199951172} +03/05/2022 13:14:31 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/05/2022 13:14:35 - INFO - codeparrot_training - Step 40638: {'lr': 0.00042052459999948323, 'samples': 20807168, 'steps': 40638, 'loss/train': 2.168917655944824} +03/05/2022 13:14:38 - INFO - codeparrot_training - Step 40639: {'lr': 0.00042052071934907116, 'samples': 20807680, 'steps': 40639, 'loss/train': 1.3629273176193237} +03/05/2022 13:14:39 - INFO - codeparrot_training - Skipping example with length 940 (seq_length=1024) +03/05/2022 13:14:43 - INFO - codeparrot_training - Step 40640: {'lr': 0.00042051683862182504, 'samples': 20808192, 'steps': 40640, 'loss/train': 1.6145477294921875} +03/05/2022 13:14:47 - INFO - codeparrot_training - Step 40641: {'lr': 0.0004205129578177467, 'samples': 20808704, 'steps': 40641, 'loss/train': 1.670817494392395} +03/05/2022 13:14:47 - INFO - codeparrot_training - Skipping example with length 319 (seq_length=1024) +03/05/2022 13:14:52 - INFO - codeparrot_training - Step 40642: {'lr': 0.0004205090769368379, 'samples': 20809216, 'steps': 40642, 'loss/train': 0.6168637275695801} +03/05/2022 13:14:55 - INFO - codeparrot_training - Step 40643: {'lr': 0.00042050519597910024, 'samples': 20809728, 'steps': 40643, 'loss/train': 2.1160097122192383} +03/05/2022 13:14:56 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/05/2022 13:15:00 - INFO - codeparrot_training - Step 40644: {'lr': 0.00042050131494453567, 'samples': 20810240, 'steps': 40644, 'loss/train': 1.6015467643737793} +03/05/2022 13:15:03 - INFO - codeparrot_training - Step 40645: {'lr': 0.00042049743383314577, 'samples': 20810752, 'steps': 40645, 'loss/train': 1.812772512435913} +03/05/2022 13:15:04 - INFO - codeparrot_training - Skipping example with length 415 (seq_length=1024) +03/05/2022 13:15:09 - INFO - codeparrot_training - Step 40646: {'lr': 0.0004204935526449324, 'samples': 20811264, 'steps': 40646, 'loss/train': 2.5114407539367676} +03/05/2022 13:15:12 - INFO - codeparrot_training - Step 40647: {'lr': 0.0004204896713798972, 'samples': 20811776, 'steps': 40647, 'loss/train': 0.5237265825271606} +03/05/2022 13:15:12 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/05/2022 13:15:17 - INFO - codeparrot_training - Step 40648: {'lr': 0.00042048579003804205, 'samples': 20812288, 'steps': 40648, 'loss/train': 2.0232467651367188} +03/05/2022 13:15:20 - INFO - codeparrot_training - Step 40649: {'lr': 0.00042048190861936866, 'samples': 20812800, 'steps': 40649, 'loss/train': 1.8211101293563843} +03/05/2022 13:15:22 - INFO - codeparrot_training - Skipping example with length 468 (seq_length=1024) +03/05/2022 13:15:26 - INFO - codeparrot_training - Step 40650: {'lr': 0.0004204780271238786, 'samples': 20813312, 'steps': 40650, 'loss/train': 1.5003325939178467} +03/05/2022 13:15:29 - INFO - codeparrot_training - Step 40651: {'lr': 0.00042047414555157394, 'samples': 20813824, 'steps': 40651, 'loss/train': 1.5937751531600952} +03/05/2022 13:15:30 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/05/2022 13:15:34 - INFO - codeparrot_training - Step 40652: {'lr': 0.0004204702639024562, 'samples': 20814336, 'steps': 40652, 'loss/train': 6.6315016746521} +03/05/2022 13:15:38 - INFO - codeparrot_training - Step 40653: {'lr': 0.00042046638217652717, 'samples': 20814848, 'steps': 40653, 'loss/train': 1.817343831062317} +03/05/2022 13:15:39 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/05/2022 13:15:43 - INFO - codeparrot_training - Step 40654: {'lr': 0.00042046250037378865, 'samples': 20815360, 'steps': 40654, 'loss/train': 1.4928358793258667} +03/05/2022 13:15:46 - INFO - codeparrot_training - Step 40655: {'lr': 0.0004204586184942423, 'samples': 20815872, 'steps': 40655, 'loss/train': 1.568943977355957} +03/05/2022 13:15:47 - INFO - codeparrot_training - Skipping example with length 79 (seq_length=1024) +03/05/2022 13:15:51 - INFO - codeparrot_training - Step 40656: {'lr': 0.00042045473653789004, 'samples': 20816384, 'steps': 40656, 'loss/train': 1.5283026695251465} +03/05/2022 13:15:54 - INFO - codeparrot_training - Step 40657: {'lr': 0.00042045085450473336, 'samples': 20816896, 'steps': 40657, 'loss/train': 1.713057279586792} +03/05/2022 13:15:56 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/05/2022 13:16:00 - INFO - codeparrot_training - Step 40658: {'lr': 0.00042044697239477423, 'samples': 20817408, 'steps': 40658, 'loss/train': 1.8606091737747192} +03/05/2022 13:16:03 - INFO - codeparrot_training - Step 40659: {'lr': 0.00042044309020801434, 'samples': 20817920, 'steps': 40659, 'loss/train': 1.5933631658554077} +03/05/2022 13:16:04 - INFO - codeparrot_training - Skipping example with length 905 (seq_length=1024) +03/05/2022 13:16:08 - INFO - codeparrot_training - Step 40660: {'lr': 0.00042043920794445543, 'samples': 20818432, 'steps': 40660, 'loss/train': 2.512092351913452} +03/05/2022 13:16:12 - INFO - codeparrot_training - Step 40661: {'lr': 0.0004204353256040992, 'samples': 20818944, 'steps': 40661, 'loss/train': 1.8306958675384521} +03/05/2022 13:16:13 - INFO - codeparrot_training - Skipping example with length 238 (seq_length=1024) +03/05/2022 13:16:17 - INFO - codeparrot_training - Step 40662: {'lr': 0.0004204314431869475, 'samples': 20819456, 'steps': 40662, 'loss/train': 1.4826037883758545} +03/05/2022 13:16:20 - INFO - codeparrot_training - Step 40663: {'lr': 0.0004204275606930019, 'samples': 20819968, 'steps': 40663, 'loss/train': 1.8911564350128174} +03/05/2022 13:16:22 - INFO - codeparrot_training - Skipping example with length 462 (seq_length=1024) +03/05/2022 13:16:25 - INFO - codeparrot_training - Step 40664: {'lr': 0.00042042367812226446, 'samples': 20820480, 'steps': 40664, 'loss/train': 0.7729328274726868} +03/05/2022 13:16:28 - INFO - codeparrot_training - Step 40665: {'lr': 0.00042041979547473665, 'samples': 20820992, 'steps': 40665, 'loss/train': 1.8025481700897217} +03/05/2022 13:16:30 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/05/2022 13:16:34 - INFO - codeparrot_training - Step 40666: {'lr': 0.0004204159127504202, 'samples': 20821504, 'steps': 40666, 'loss/train': 2.1365766525268555} +03/05/2022 13:16:37 - INFO - codeparrot_training - Step 40667: {'lr': 0.0004204120299493171, 'samples': 20822016, 'steps': 40667, 'loss/train': 1.489859938621521} +03/05/2022 13:16:39 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/05/2022 13:16:42 - INFO - codeparrot_training - Step 40668: {'lr': 0.0004204081470714289, 'samples': 20822528, 'steps': 40668, 'loss/train': 1.5347779989242554} +03/05/2022 13:16:46 - INFO - codeparrot_training - Step 40669: {'lr': 0.00042040426411675747, 'samples': 20823040, 'steps': 40669, 'loss/train': 1.9578884840011597} +03/05/2022 13:16:48 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/05/2022 13:16:51 - INFO - codeparrot_training - Step 40670: {'lr': 0.0004204003810853045, 'samples': 20823552, 'steps': 40670, 'loss/train': 2.253814935684204} +03/05/2022 13:16:54 - INFO - codeparrot_training - Step 40671: {'lr': 0.00042039649797707176, 'samples': 20824064, 'steps': 40671, 'loss/train': 2.0319814682006836} +03/05/2022 13:16:56 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/05/2022 13:17:00 - INFO - codeparrot_training - Step 40672: {'lr': 0.0004203926147920609, 'samples': 20824576, 'steps': 40672, 'loss/train': 1.5479094982147217} +03/05/2022 13:17:03 - INFO - codeparrot_training - Step 40673: {'lr': 0.0004203887315302739, 'samples': 20825088, 'steps': 40673, 'loss/train': 1.3408563137054443} +03/05/2022 13:17:05 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/05/2022 13:17:08 - INFO - codeparrot_training - Step 40674: {'lr': 0.0004203848481917122, 'samples': 20825600, 'steps': 40674, 'loss/train': 1.7435742616653442} +03/05/2022 13:17:11 - INFO - codeparrot_training - Step 40675: {'lr': 0.00042038096477637786, 'samples': 20826112, 'steps': 40675, 'loss/train': 2.027859687805176} +03/05/2022 13:17:13 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/05/2022 13:17:17 - INFO - codeparrot_training - Step 40676: {'lr': 0.00042037708128427243, 'samples': 20826624, 'steps': 40676, 'loss/train': 2.1915855407714844} +03/05/2022 13:17:20 - INFO - codeparrot_training - Step 40677: {'lr': 0.00042037319771539775, 'samples': 20827136, 'steps': 40677, 'loss/train': 2.164445161819458} +03/05/2022 13:17:22 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/05/2022 13:17:25 - INFO - codeparrot_training - Step 40678: {'lr': 0.00042036931406975547, 'samples': 20827648, 'steps': 40678, 'loss/train': 1.563879132270813} +03/05/2022 13:17:28 - INFO - codeparrot_training - Step 40679: {'lr': 0.0004203654303473474, 'samples': 20828160, 'steps': 40679, 'loss/train': 2.4409964084625244} +03/05/2022 13:17:31 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/05/2022 13:17:34 - INFO - codeparrot_training - Step 40680: {'lr': 0.0004203615465481754, 'samples': 20828672, 'steps': 40680, 'loss/train': 2.276165008544922} +03/05/2022 13:17:37 - INFO - codeparrot_training - Step 40681: {'lr': 0.0004203576626722411, 'samples': 20829184, 'steps': 40681, 'loss/train': 2.1119842529296875} +03/05/2022 13:17:39 - INFO - codeparrot_training - Skipping example with length 311 (seq_length=1024) +03/05/2022 13:17:42 - INFO - codeparrot_training - Step 40682: {'lr': 0.00042035377871954614, 'samples': 20829696, 'steps': 40682, 'loss/train': 1.4772833585739136} +03/05/2022 13:17:45 - INFO - codeparrot_training - Step 40683: {'lr': 0.00042034989469009245, 'samples': 20830208, 'steps': 40683, 'loss/train': 1.0682467222213745} +03/05/2022 13:17:48 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/05/2022 13:17:51 - INFO - codeparrot_training - Step 40684: {'lr': 0.0004203460105838818, 'samples': 20830720, 'steps': 40684, 'loss/train': 0.4374885559082031} +03/05/2022 13:17:54 - INFO - codeparrot_training - Step 40685: {'lr': 0.00042034212640091587, 'samples': 20831232, 'steps': 40685, 'loss/train': 1.3320356607437134} +03/05/2022 13:17:56 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/05/2022 13:17:59 - INFO - codeparrot_training - Step 40686: {'lr': 0.00042033824214119633, 'samples': 20831744, 'steps': 40686, 'loss/train': 1.244914174079895} +03/05/2022 13:18:02 - INFO - codeparrot_training - Step 40687: {'lr': 0.00042033435780472494, 'samples': 20832256, 'steps': 40687, 'loss/train': 1.9542818069458008} +03/05/2022 13:18:04 - INFO - codeparrot_training - Skipping example with length 499 (seq_length=1024) +03/05/2022 13:18:08 - INFO - codeparrot_training - Step 40688: {'lr': 0.00042033047339150363, 'samples': 20832768, 'steps': 40688, 'loss/train': 1.0049687623977661} +03/05/2022 13:18:11 - INFO - codeparrot_training - Step 40689: {'lr': 0.00042032658890153404, 'samples': 20833280, 'steps': 40689, 'loss/train': 1.584216833114624} +03/05/2022 13:18:13 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/05/2022 13:18:16 - INFO - codeparrot_training - Step 40690: {'lr': 0.0004203227043348179, 'samples': 20833792, 'steps': 40690, 'loss/train': 1.8469264507293701} +03/05/2022 13:18:19 - INFO - codeparrot_training - Step 40691: {'lr': 0.000420318819691357, 'samples': 20834304, 'steps': 40691, 'loss/train': 1.9487648010253906} +03/05/2022 13:18:21 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/05/2022 13:18:25 - INFO - codeparrot_training - Step 40692: {'lr': 0.00042031493497115304, 'samples': 20834816, 'steps': 40692, 'loss/train': 1.5793026685714722} +03/05/2022 13:18:28 - INFO - codeparrot_training - Step 40693: {'lr': 0.0004203110501742078, 'samples': 20835328, 'steps': 40693, 'loss/train': 1.4378328323364258} +03/05/2022 13:18:30 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/05/2022 13:18:33 - INFO - codeparrot_training - Step 40694: {'lr': 0.00042030716530052297, 'samples': 20835840, 'steps': 40694, 'loss/train': 0.900818407535553} +03/05/2022 13:18:36 - INFO - codeparrot_training - Step 40695: {'lr': 0.00042030328035010047, 'samples': 20836352, 'steps': 40695, 'loss/train': 1.3767000436782837} +03/05/2022 13:18:38 - INFO - codeparrot_training - Skipping example with length 939 (seq_length=1024) +03/05/2022 13:18:42 - INFO - codeparrot_training - Step 40696: {'lr': 0.0004202993953229418, 'samples': 20836864, 'steps': 40696, 'loss/train': 1.4423713684082031} +03/05/2022 13:18:45 - INFO - codeparrot_training - Step 40697: {'lr': 0.000420295510219049, 'samples': 20837376, 'steps': 40697, 'loss/train': 1.2700212001800537} +03/05/2022 13:18:47 - INFO - codeparrot_training - Skipping example with length 240 (seq_length=1024) +03/05/2022 13:18:50 - INFO - codeparrot_training - Step 40698: {'lr': 0.00042029162503842357, 'samples': 20837888, 'steps': 40698, 'loss/train': 2.078991651535034} +03/05/2022 13:18:53 - INFO - codeparrot_training - Step 40699: {'lr': 0.0004202877397810674, 'samples': 20838400, 'steps': 40699, 'loss/train': 2.2254419326782227} +03/05/2022 13:18:55 - INFO - codeparrot_training - Skipping example with length 939 (seq_length=1024) +03/05/2022 13:18:59 - INFO - codeparrot_training - Step 40700: {'lr': 0.0004202838544469822, 'samples': 20838912, 'steps': 40700, 'loss/train': 1.8732893466949463} +03/05/2022 13:19:02 - INFO - codeparrot_training - Step 40701: {'lr': 0.00042027996903616974, 'samples': 20839424, 'steps': 40701, 'loss/train': 2.293428659439087} +03/05/2022 13:19:04 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/05/2022 13:19:07 - INFO - codeparrot_training - Step 40702: {'lr': 0.0004202760835486317, 'samples': 20839936, 'steps': 40702, 'loss/train': 1.854709506034851} +03/05/2022 13:19:10 - INFO - codeparrot_training - Step 40703: {'lr': 0.00042027219798436996, 'samples': 20840448, 'steps': 40703, 'loss/train': 2.026402711868286} +03/05/2022 13:19:12 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/05/2022 13:19:16 - INFO - codeparrot_training - Step 40704: {'lr': 0.00042026831234338614, 'samples': 20840960, 'steps': 40704, 'loss/train': 2.0766959190368652} +03/05/2022 13:19:19 - INFO - codeparrot_training - Step 40705: {'lr': 0.0004202644266256821, 'samples': 20841472, 'steps': 40705, 'loss/train': 0.8177947402000427} +03/05/2022 13:19:21 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/05/2022 13:19:24 - INFO - codeparrot_training - Step 40706: {'lr': 0.00042026054083125943, 'samples': 20841984, 'steps': 40706, 'loss/train': 1.7729127407073975} +03/05/2022 13:19:28 - INFO - codeparrot_training - Step 40707: {'lr': 0.0004202566549601201, 'samples': 20842496, 'steps': 40707, 'loss/train': 1.4456254243850708} +03/05/2022 13:19:30 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/05/2022 13:19:33 - INFO - codeparrot_training - Step 40708: {'lr': 0.00042025276901226573, 'samples': 20843008, 'steps': 40708, 'loss/train': 1.3970537185668945} +03/05/2022 13:19:36 - INFO - codeparrot_training - Step 40709: {'lr': 0.00042024888298769806, 'samples': 20843520, 'steps': 40709, 'loss/train': 1.4879401922225952} +03/05/2022 13:19:38 - INFO - codeparrot_training - Skipping example with length 771 (seq_length=1024) +03/05/2022 13:19:41 - INFO - codeparrot_training - Step 40710: {'lr': 0.0004202449968864188, 'samples': 20844032, 'steps': 40710, 'loss/train': 0.9619199633598328} +03/05/2022 13:19:45 - INFO - codeparrot_training - Step 40711: {'lr': 0.00042024111070842985, 'samples': 20844544, 'steps': 40711, 'loss/train': 2.071129560470581} +03/05/2022 13:19:47 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/05/2022 13:19:50 - INFO - codeparrot_training - Step 40712: {'lr': 0.0004202372244537329, 'samples': 20845056, 'steps': 40712, 'loss/train': 1.5641220808029175} +03/05/2022 13:19:53 - INFO - codeparrot_training - Step 40713: {'lr': 0.00042023333812232967, 'samples': 20845568, 'steps': 40713, 'loss/train': 1.5630959272384644} +03/05/2022 13:19:55 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/05/2022 13:19:58 - INFO - codeparrot_training - Step 40714: {'lr': 0.0004202294517142219, 'samples': 20846080, 'steps': 40714, 'loss/train': 0.6319063901901245} +03/05/2022 13:20:02 - INFO - codeparrot_training - Step 40715: {'lr': 0.0004202255652294114, 'samples': 20846592, 'steps': 40715, 'loss/train': 1.566847801208496} +03/05/2022 13:20:03 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/05/2022 13:20:07 - INFO - codeparrot_training - Step 40716: {'lr': 0.00042022167866789985, 'samples': 20847104, 'steps': 40716, 'loss/train': 1.939487338066101} +03/05/2022 13:20:10 - INFO - codeparrot_training - Step 40717: {'lr': 0.00042021779202968903, 'samples': 20847616, 'steps': 40717, 'loss/train': 2.013498544692993} +03/05/2022 13:20:12 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/05/2022 13:20:15 - INFO - codeparrot_training - Step 40718: {'lr': 0.0004202139053147808, 'samples': 20848128, 'steps': 40718, 'loss/train': 1.5227608680725098} +03/05/2022 13:20:18 - INFO - codeparrot_training - Step 40719: {'lr': 0.0004202100185231767, 'samples': 20848640, 'steps': 40719, 'loss/train': 2.346557855606079} +03/05/2022 13:20:20 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/05/2022 13:20:24 - INFO - codeparrot_training - Step 40720: {'lr': 0.00042020613165487863, 'samples': 20849152, 'steps': 40720, 'loss/train': 0.7454448938369751} +03/05/2022 13:20:27 - INFO - codeparrot_training - Step 40721: {'lr': 0.0004202022447098883, 'samples': 20849664, 'steps': 40721, 'loss/train': 1.6782424449920654} +03/05/2022 13:20:29 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/05/2022 13:20:32 - INFO - codeparrot_training - Step 40722: {'lr': 0.00042019835768820744, 'samples': 20850176, 'steps': 40722, 'loss/train': 1.8981515169143677} +03/05/2022 13:20:36 - INFO - codeparrot_training - Step 40723: {'lr': 0.00042019447058983786, 'samples': 20850688, 'steps': 40723, 'loss/train': 2.6000986099243164} +03/05/2022 13:20:37 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/05/2022 13:20:41 - INFO - codeparrot_training - Step 40724: {'lr': 0.0004201905834147813, 'samples': 20851200, 'steps': 40724, 'loss/train': 1.7059260606765747} +03/05/2022 13:20:44 - INFO - codeparrot_training - Step 40725: {'lr': 0.0004201866961630395, 'samples': 20851712, 'steps': 40725, 'loss/train': 1.3375991582870483} +03/05/2022 13:20:45 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/05/2022 13:20:49 - INFO - codeparrot_training - Step 40726: {'lr': 0.00042018280883461415, 'samples': 20852224, 'steps': 40726, 'loss/train': 1.8777796030044556} +03/05/2022 13:20:52 - INFO - codeparrot_training - Step 40727: {'lr': 0.000420178921429507, 'samples': 20852736, 'steps': 40727, 'loss/train': 1.6426701545715332} +03/05/2022 13:20:54 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/05/2022 13:20:58 - INFO - codeparrot_training - Step 40728: {'lr': 0.00042017503394771997, 'samples': 20853248, 'steps': 40728, 'loss/train': 2.0072686672210693} +03/05/2022 13:21:01 - INFO - codeparrot_training - Step 40729: {'lr': 0.00042017114638925456, 'samples': 20853760, 'steps': 40729, 'loss/train': 1.1846346855163574} +03/05/2022 13:21:02 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/05/2022 13:21:06 - INFO - codeparrot_training - Step 40730: {'lr': 0.00042016725875411274, 'samples': 20854272, 'steps': 40730, 'loss/train': 2.238532781600952} +03/05/2022 13:21:09 - INFO - codeparrot_training - Step 40731: {'lr': 0.0004201633710422962, 'samples': 20854784, 'steps': 40731, 'loss/train': 2.266195774078369} +03/05/2022 13:21:11 - INFO - codeparrot_training - Skipping example with length 542 (seq_length=1024) +03/05/2022 13:21:15 - INFO - codeparrot_training - Step 40732: {'lr': 0.0004201594832538067, 'samples': 20855296, 'steps': 40732, 'loss/train': 1.3179876804351807} +03/05/2022 13:21:18 - INFO - codeparrot_training - Step 40733: {'lr': 0.0004201555953886459, 'samples': 20855808, 'steps': 40733, 'loss/train': 1.3553972244262695} +03/05/2022 13:21:19 - INFO - codeparrot_training - Skipping example with length 258 (seq_length=1024) +03/05/2022 13:21:23 - INFO - codeparrot_training - Step 40734: {'lr': 0.00042015170744681566, 'samples': 20856320, 'steps': 40734, 'loss/train': 1.3523272275924683} +03/05/2022 13:21:26 - INFO - codeparrot_training - Step 40735: {'lr': 0.00042014781942831757, 'samples': 20856832, 'steps': 40735, 'loss/train': 0.9460923671722412} +03/05/2022 13:21:27 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/05/2022 13:21:32 - INFO - codeparrot_training - Step 40736: {'lr': 0.00042014393133315366, 'samples': 20857344, 'steps': 40736, 'loss/train': 2.0228192806243896} +03/05/2022 13:21:35 - INFO - codeparrot_training - Step 40737: {'lr': 0.00042014004316132537, 'samples': 20857856, 'steps': 40737, 'loss/train': 2.1863186359405518} +03/05/2022 13:21:36 - INFO - codeparrot_training - Skipping example with length 940 (seq_length=1024) +03/05/2022 13:21:40 - INFO - codeparrot_training - Step 40738: {'lr': 0.0004201361549128347, 'samples': 20858368, 'steps': 40738, 'loss/train': 2.541367530822754} +03/05/2022 13:21:43 - INFO - codeparrot_training - Step 40739: {'lr': 0.00042013226658768333, 'samples': 20858880, 'steps': 40739, 'loss/train': 1.6246156692504883} +03/05/2022 13:21:45 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/05/2022 13:21:49 - INFO - codeparrot_training - Step 40740: {'lr': 0.0004201283781858729, 'samples': 20859392, 'steps': 40740, 'loss/train': 3.0495142936706543} +03/05/2022 13:21:52 - INFO - codeparrot_training - Step 40741: {'lr': 0.00042012448970740523, 'samples': 20859904, 'steps': 40741, 'loss/train': 1.0537464618682861} +03/05/2022 13:21:53 - INFO - codeparrot_training - Skipping example with length 596 (seq_length=1024) +03/05/2022 13:21:57 - INFO - codeparrot_training - Step 40742: {'lr': 0.00042012060115228215, 'samples': 20860416, 'steps': 40742, 'loss/train': 1.5998913049697876} +03/05/2022 13:22:00 - INFO - codeparrot_training - Step 40743: {'lr': 0.0004201167125205054, 'samples': 20860928, 'steps': 40743, 'loss/train': 1.635398030281067} +03/05/2022 13:22:02 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/05/2022 13:22:06 - INFO - codeparrot_training - Step 40744: {'lr': 0.0004201128238120766, 'samples': 20861440, 'steps': 40744, 'loss/train': 1.9442859888076782} +03/05/2022 13:22:09 - INFO - codeparrot_training - Step 40745: {'lr': 0.00042010893502699765, 'samples': 20861952, 'steps': 40745, 'loss/train': 1.8034838438034058} +03/05/2022 13:22:10 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/05/2022 13:22:14 - INFO - codeparrot_training - Step 40746: {'lr': 0.0004201050461652702, 'samples': 20862464, 'steps': 40746, 'loss/train': 1.6870543956756592} +03/05/2022 13:22:17 - INFO - codeparrot_training - Step 40747: {'lr': 0.00042010115722689603, 'samples': 20862976, 'steps': 40747, 'loss/train': 1.9916573762893677} +03/05/2022 13:22:19 - INFO - codeparrot_training - Skipping example with length 550 (seq_length=1024) +03/05/2022 13:22:23 - INFO - codeparrot_training - Step 40748: {'lr': 0.0004200972682118769, 'samples': 20863488, 'steps': 40748, 'loss/train': 2.63049578666687} +03/05/2022 13:22:26 - INFO - codeparrot_training - Step 40749: {'lr': 0.0004200933791202146, 'samples': 20864000, 'steps': 40749, 'loss/train': 1.248824119567871} +03/05/2022 13:22:27 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/05/2022 13:22:31 - INFO - codeparrot_training - Step 40750: {'lr': 0.0004200894899519108, 'samples': 20864512, 'steps': 40750, 'loss/train': 1.7309353351593018} +03/05/2022 13:22:34 - INFO - codeparrot_training - Step 40751: {'lr': 0.00042008560070696735, 'samples': 20865024, 'steps': 40751, 'loss/train': 2.0240423679351807} +03/05/2022 13:22:36 - INFO - codeparrot_training - Skipping example with length 574 (seq_length=1024) +03/05/2022 13:22:40 - INFO - codeparrot_training - Step 40752: {'lr': 0.000420081711385386, 'samples': 20865536, 'steps': 40752, 'loss/train': 1.5327491760253906} +03/05/2022 13:22:43 - INFO - codeparrot_training - Step 40753: {'lr': 0.00042007782198716836, 'samples': 20866048, 'steps': 40753, 'loss/train': 1.6291093826293945} +03/05/2022 13:22:44 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/05/2022 13:22:48 - INFO - codeparrot_training - Step 40754: {'lr': 0.0004200739325123163, 'samples': 20866560, 'steps': 40754, 'loss/train': 1.6765512228012085} +03/05/2022 13:22:51 - INFO - codeparrot_training - Step 40755: {'lr': 0.0004200700429608315, 'samples': 20867072, 'steps': 40755, 'loss/train': 0.6853744983673096} +03/05/2022 13:22:53 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/05/2022 13:22:57 - INFO - codeparrot_training - Step 40756: {'lr': 0.00042006615333271585, 'samples': 20867584, 'steps': 40756, 'loss/train': 2.385537624359131} +03/05/2022 13:23:00 - INFO - codeparrot_training - Step 40757: {'lr': 0.000420062263627971, 'samples': 20868096, 'steps': 40757, 'loss/train': 1.3453363180160522} +03/05/2022 13:23:01 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/05/2022 13:23:05 - INFO - codeparrot_training - Step 40758: {'lr': 0.0004200583738465987, 'samples': 20868608, 'steps': 40758, 'loss/train': 2.2151548862457275} +03/05/2022 13:23:08 - INFO - codeparrot_training - Step 40759: {'lr': 0.00042005448398860077, 'samples': 20869120, 'steps': 40759, 'loss/train': 2.1138627529144287} +03/05/2022 13:23:10 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/05/2022 13:23:13 - INFO - codeparrot_training - Step 40760: {'lr': 0.00042005059405397885, 'samples': 20869632, 'steps': 40760, 'loss/train': 3.924450159072876} +03/05/2022 13:23:17 - INFO - codeparrot_training - Step 40761: {'lr': 0.00042004670404273474, 'samples': 20870144, 'steps': 40761, 'loss/train': 4.288048267364502} +03/05/2022 13:23:18 - INFO - codeparrot_training - Skipping example with length 422 (seq_length=1024) +03/05/2022 13:23:22 - INFO - codeparrot_training - Step 40762: {'lr': 0.0004200428139548703, 'samples': 20870656, 'steps': 40762, 'loss/train': 0.6412728428840637} +03/05/2022 13:23:25 - INFO - codeparrot_training - Step 40763: {'lr': 0.0004200389237903871, 'samples': 20871168, 'steps': 40763, 'loss/train': 1.846627116203308} +03/05/2022 13:23:26 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/05/2022 13:23:30 - INFO - codeparrot_training - Step 40764: {'lr': 0.000420035033549287, 'samples': 20871680, 'steps': 40764, 'loss/train': 1.468336820602417} +03/05/2022 13:23:34 - INFO - codeparrot_training - Step 40765: {'lr': 0.0004200311432315718, 'samples': 20872192, 'steps': 40765, 'loss/train': 1.7389856576919556} +03/05/2022 13:23:35 - INFO - codeparrot_training - Skipping example with length 8 (seq_length=1024) +03/05/2022 13:23:39 - INFO - codeparrot_training - Step 40766: {'lr': 0.0004200272528372432, 'samples': 20872704, 'steps': 40766, 'loss/train': 0.35956525802612305} +03/05/2022 13:23:42 - INFO - codeparrot_training - Step 40767: {'lr': 0.0004200233623663028, 'samples': 20873216, 'steps': 40767, 'loss/train': 1.8002536296844482} +03/05/2022 13:23:44 - INFO - codeparrot_training - Skipping example with length 485 (seq_length=1024) +03/05/2022 13:23:48 - INFO - codeparrot_training - Step 40768: {'lr': 0.0004200194718187527, 'samples': 20873728, 'steps': 40768, 'loss/train': 1.7635325193405151} +03/05/2022 13:23:51 - INFO - codeparrot_training - Step 40769: {'lr': 0.0004200155811945943, 'samples': 20874240, 'steps': 40769, 'loss/train': 1.8057714700698853} +03/05/2022 13:23:52 - INFO - codeparrot_training - Skipping example with length 624 (seq_length=1024) +03/05/2022 13:23:56 - INFO - codeparrot_training - Step 40770: {'lr': 0.0004200116904938295, 'samples': 20874752, 'steps': 40770, 'loss/train': 2.0036380290985107} +03/05/2022 13:23:59 - INFO - codeparrot_training - Step 40771: {'lr': 0.00042000779971646007, 'samples': 20875264, 'steps': 40771, 'loss/train': 1.1127572059631348} +03/05/2022 13:24:00 - INFO - codeparrot_training - Skipping example with length 353 (seq_length=1024) +03/05/2022 13:24:05 - INFO - codeparrot_training - Step 40772: {'lr': 0.00042000390886248783, 'samples': 20875776, 'steps': 40772, 'loss/train': 1.6849889755249023} +03/05/2022 13:24:08 - INFO - codeparrot_training - Step 40773: {'lr': 0.0004200000179319144, 'samples': 20876288, 'steps': 40773, 'loss/train': 0.6212167739868164} +03/05/2022 13:24:09 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/05/2022 13:24:13 - INFO - codeparrot_training - Step 40774: {'lr': 0.0004199961269247416, 'samples': 20876800, 'steps': 40774, 'loss/train': 1.3671700954437256} +03/05/2022 13:24:16 - INFO - codeparrot_training - Step 40775: {'lr': 0.0004199922358409711, 'samples': 20877312, 'steps': 40775, 'loss/train': 1.6835066080093384} +03/05/2022 13:24:17 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/05/2022 13:24:22 - INFO - codeparrot_training - Step 40776: {'lr': 0.0004199883446806048, 'samples': 20877824, 'steps': 40776, 'loss/train': 2.938542366027832} +03/05/2022 13:24:25 - INFO - codeparrot_training - Step 40777: {'lr': 0.0004199844534436443, 'samples': 20878336, 'steps': 40777, 'loss/train': 1.2991214990615845} +03/05/2022 13:24:26 - INFO - codeparrot_training - Skipping example with length 475 (seq_length=1024) +03/05/2022 13:24:30 - INFO - codeparrot_training - Step 40778: {'lr': 0.0004199805621300915, 'samples': 20878848, 'steps': 40778, 'loss/train': 2.276879072189331} +03/05/2022 13:24:34 - INFO - codeparrot_training - Step 40779: {'lr': 0.0004199766707399481, 'samples': 20879360, 'steps': 40779, 'loss/train': 1.5247468948364258} +03/05/2022 13:24:35 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/05/2022 13:24:39 - INFO - codeparrot_training - Step 40780: {'lr': 0.0004199727792732158, 'samples': 20879872, 'steps': 40780, 'loss/train': 1.4557139873504639} +03/05/2022 13:24:42 - INFO - codeparrot_training - Step 40781: {'lr': 0.0004199688877298964, 'samples': 20880384, 'steps': 40781, 'loss/train': 1.4280003309249878} +03/05/2022 13:24:44 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/05/2022 13:24:48 - INFO - codeparrot_training - Step 40782: {'lr': 0.00041996499610999163, 'samples': 20880896, 'steps': 40782, 'loss/train': 1.806489109992981} +03/05/2022 13:24:51 - INFO - codeparrot_training - Step 40783: {'lr': 0.00041996110441350323, 'samples': 20881408, 'steps': 40783, 'loss/train': 1.9531545639038086} +03/05/2022 13:24:53 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/05/2022 13:24:56 - INFO - codeparrot_training - Step 40784: {'lr': 0.000419957212640433, 'samples': 20881920, 'steps': 40784, 'loss/train': 1.5148506164550781} +03/05/2022 13:24:59 - INFO - codeparrot_training - Step 40785: {'lr': 0.0004199533207907827, 'samples': 20882432, 'steps': 40785, 'loss/train': 1.8842509984970093} +03/05/2022 13:25:02 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/05/2022 13:25:05 - INFO - codeparrot_training - Step 40786: {'lr': 0.00041994942886455403, 'samples': 20882944, 'steps': 40786, 'loss/train': 1.8247430324554443} +03/05/2022 13:25:08 - INFO - codeparrot_training - Step 40787: {'lr': 0.00041994553686174876, 'samples': 20883456, 'steps': 40787, 'loss/train': 2.536978244781494} +03/05/2022 13:25:10 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/05/2022 13:25:13 - INFO - codeparrot_training - Step 40788: {'lr': 0.0004199416447823686, 'samples': 20883968, 'steps': 40788, 'loss/train': 1.6586405038833618} +03/05/2022 13:25:16 - INFO - codeparrot_training - Step 40789: {'lr': 0.0004199377526264154, 'samples': 20884480, 'steps': 40789, 'loss/train': 1.530704379081726} +03/05/2022 13:25:19 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/05/2022 13:25:22 - INFO - codeparrot_training - Step 40790: {'lr': 0.00041993386039389095, 'samples': 20884992, 'steps': 40790, 'loss/train': 2.4262707233428955} +03/05/2022 13:25:25 - INFO - codeparrot_training - Step 40791: {'lr': 0.0004199299680847969, 'samples': 20885504, 'steps': 40791, 'loss/train': 1.5122448205947876} +03/05/2022 13:25:28 - INFO - codeparrot_training - Step 40792: {'lr': 0.000419926075699135, 'samples': 20886016, 'steps': 40792, 'loss/train': 1.9260776042938232} +03/05/2022 13:25:28 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/05/2022 13:25:33 - INFO - codeparrot_training - Step 40793: {'lr': 0.000419922183236907, 'samples': 20886528, 'steps': 40793, 'loss/train': 3.0515530109405518} +03/05/2022 13:25:37 - INFO - codeparrot_training - Step 40794: {'lr': 0.0004199182906981147, 'samples': 20887040, 'steps': 40794, 'loss/train': 1.0696825981140137} +03/05/2022 13:25:37 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/05/2022 13:25:42 - INFO - codeparrot_training - Step 40795: {'lr': 0.00041991439808275986, 'samples': 20887552, 'steps': 40795, 'loss/train': 1.5575226545333862} +03/05/2022 13:25:45 - INFO - codeparrot_training - Step 40796: {'lr': 0.0004199105053908442, 'samples': 20888064, 'steps': 40796, 'loss/train': 2.042384386062622} +03/05/2022 13:25:45 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/05/2022 13:25:51 - INFO - codeparrot_training - Step 40797: {'lr': 0.0004199066126223695, 'samples': 20888576, 'steps': 40797, 'loss/train': 0.7912163734436035} +03/05/2022 13:25:54 - INFO - codeparrot_training - Step 40798: {'lr': 0.0004199027197773375, 'samples': 20889088, 'steps': 40798, 'loss/train': 1.2568210363388062} +03/05/2022 13:25:54 - INFO - codeparrot_training - Skipping example with length 245 (seq_length=1024) +03/05/2022 13:25:59 - INFO - codeparrot_training - Step 40799: {'lr': 0.00041989882685575, 'samples': 20889600, 'steps': 40799, 'loss/train': 2.3323352336883545} +03/05/2022 13:26:02 - INFO - codeparrot_training - Step 40800: {'lr': 0.0004198949338576086, 'samples': 20890112, 'steps': 40800, 'loss/train': 1.5767987966537476} +03/05/2022 13:26:03 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/05/2022 13:26:07 - INFO - codeparrot_training - Step 40801: {'lr': 0.0004198910407829152, 'samples': 20890624, 'steps': 40801, 'loss/train': 1.5930017232894897} +03/05/2022 13:26:11 - INFO - codeparrot_training - Step 40802: {'lr': 0.00041988714763167156, 'samples': 20891136, 'steps': 40802, 'loss/train': 0.46456414461135864} +03/05/2022 13:26:11 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/05/2022 13:26:16 - INFO - codeparrot_training - Step 40803: {'lr': 0.00041988325440387944, 'samples': 20891648, 'steps': 40803, 'loss/train': 1.6916217803955078} +03/05/2022 13:26:19 - INFO - codeparrot_training - Step 40804: {'lr': 0.00041987936109954047, 'samples': 20892160, 'steps': 40804, 'loss/train': 1.5018059015274048} +03/05/2022 13:26:20 - INFO - codeparrot_training - Skipping example with length 443 (seq_length=1024) +03/05/2022 13:26:25 - INFO - codeparrot_training - Step 40805: {'lr': 0.0004198754677186565, 'samples': 20892672, 'steps': 40805, 'loss/train': 2.2424561977386475} +03/05/2022 13:26:28 - INFO - codeparrot_training - Step 40806: {'lr': 0.0004198715742612292, 'samples': 20893184, 'steps': 40806, 'loss/train': 0.9312177300453186} +03/05/2022 13:26:29 - INFO - codeparrot_training - Skipping example with length 645 (seq_length=1024) +03/05/2022 13:26:33 - INFO - codeparrot_training - Step 40807: {'lr': 0.0004198676807272605, 'samples': 20893696, 'steps': 40807, 'loss/train': 1.2940270900726318} +03/05/2022 13:26:36 - INFO - codeparrot_training - Step 40808: {'lr': 0.000419863787116752, 'samples': 20894208, 'steps': 40808, 'loss/train': 1.3794755935668945} +03/05/2022 13:26:38 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/05/2022 13:26:42 - INFO - codeparrot_training - Step 40809: {'lr': 0.0004198598934297055, 'samples': 20894720, 'steps': 40809, 'loss/train': 1.5394258499145508} +03/05/2022 13:26:45 - INFO - codeparrot_training - Step 40810: {'lr': 0.00041985599966612273, 'samples': 20895232, 'steps': 40810, 'loss/train': 0.7942506670951843} +03/05/2022 13:26:46 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/05/2022 13:26:50 - INFO - codeparrot_training - Step 40811: {'lr': 0.0004198521058260055, 'samples': 20895744, 'steps': 40811, 'loss/train': 0.9397141337394714} +03/05/2022 13:26:53 - INFO - codeparrot_training - Step 40812: {'lr': 0.0004198482119093555, 'samples': 20896256, 'steps': 40812, 'loss/train': 1.5379940271377563} +03/05/2022 13:26:55 - INFO - codeparrot_training - Skipping example with length 293 (seq_length=1024) +03/05/2022 13:26:58 - INFO - codeparrot_training - Step 40813: {'lr': 0.00041984431791617456, 'samples': 20896768, 'steps': 40813, 'loss/train': 1.4108062982559204} +03/05/2022 13:27:02 - INFO - codeparrot_training - Step 40814: {'lr': 0.0004198404238464644, 'samples': 20897280, 'steps': 40814, 'loss/train': 1.377322793006897} +03/05/2022 13:27:03 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/05/2022 13:27:07 - INFO - codeparrot_training - Step 40815: {'lr': 0.0004198365297002267, 'samples': 20897792, 'steps': 40815, 'loss/train': 1.1902726888656616} +03/05/2022 13:27:10 - INFO - codeparrot_training - Step 40816: {'lr': 0.0004198326354774633, 'samples': 20898304, 'steps': 40816, 'loss/train': 1.992532730102539} +03/05/2022 13:27:12 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/05/2022 13:27:16 - INFO - codeparrot_training - Step 40817: {'lr': 0.00041982874117817593, 'samples': 20898816, 'steps': 40817, 'loss/train': 1.8741735219955444} +03/05/2022 13:27:19 - INFO - codeparrot_training - Step 40818: {'lr': 0.00041982484680236636, 'samples': 20899328, 'steps': 40818, 'loss/train': 1.2172486782073975} +03/05/2022 13:27:20 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/05/2022 13:27:24 - INFO - codeparrot_training - Step 40819: {'lr': 0.00041982095235003634, 'samples': 20899840, 'steps': 40819, 'loss/train': 1.6305570602416992} +03/05/2022 13:27:27 - INFO - codeparrot_training - Step 40820: {'lr': 0.0004198170578211877, 'samples': 20900352, 'steps': 40820, 'loss/train': 2.5386273860931396} +03/05/2022 13:27:28 - INFO - codeparrot_training - Skipping example with length 12 (seq_length=1024) +03/05/2022 13:27:32 - INFO - codeparrot_training - Step 40821: {'lr': 0.000419813163215822, 'samples': 20900864, 'steps': 40821, 'loss/train': 1.9734106063842773} +03/05/2022 13:27:36 - INFO - codeparrot_training - Step 40822: {'lr': 0.0004198092685339411, 'samples': 20901376, 'steps': 40822, 'loss/train': 1.1375445127487183} +03/05/2022 13:27:36 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/05/2022 13:27:41 - INFO - codeparrot_training - Step 40823: {'lr': 0.00041980537377554685, 'samples': 20901888, 'steps': 40823, 'loss/train': 2.106945753097534} +03/05/2022 13:27:44 - INFO - codeparrot_training - Step 40824: {'lr': 0.00041980147894064086, 'samples': 20902400, 'steps': 40824, 'loss/train': 2.4027252197265625} +03/05/2022 13:27:45 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/05/2022 13:27:49 - INFO - codeparrot_training - Step 40825: {'lr': 0.00041979758402922496, 'samples': 20902912, 'steps': 40825, 'loss/train': 1.5176688432693481} +03/05/2022 13:27:53 - INFO - codeparrot_training - Step 40826: {'lr': 0.00041979368904130086, 'samples': 20903424, 'steps': 40826, 'loss/train': 1.5179955959320068} +03/05/2022 13:27:53 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/05/2022 13:27:58 - INFO - codeparrot_training - Step 40827: {'lr': 0.00041978979397687047, 'samples': 20903936, 'steps': 40827, 'loss/train': 0.508154571056366} +03/05/2022 13:28:01 - INFO - codeparrot_training - Step 40828: {'lr': 0.00041978589883593525, 'samples': 20904448, 'steps': 40828, 'loss/train': 1.7374714612960815} +03/05/2022 13:28:01 - INFO - codeparrot_training - Skipping example with length 600 (seq_length=1024) +03/05/2022 13:28:06 - INFO - codeparrot_training - Step 40829: {'lr': 0.0004197820036184972, 'samples': 20904960, 'steps': 40829, 'loss/train': 0.4543156921863556} +03/05/2022 13:28:10 - INFO - codeparrot_training - Step 40830: {'lr': 0.000419778108324558, 'samples': 20905472, 'steps': 40830, 'loss/train': 1.666825532913208} +03/05/2022 13:28:10 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/05/2022 13:28:15 - INFO - codeparrot_training - Step 40831: {'lr': 0.00041977421295411944, 'samples': 20905984, 'steps': 40831, 'loss/train': 1.6334055662155151} +03/05/2022 13:28:18 - INFO - codeparrot_training - Step 40832: {'lr': 0.00041977031750718317, 'samples': 20906496, 'steps': 40832, 'loss/train': 1.9684609174728394} +03/05/2022 13:28:18 - INFO - codeparrot_training - Skipping example with length 824 (seq_length=1024) +03/05/2022 13:28:24 - INFO - codeparrot_training - Step 40833: {'lr': 0.000419766421983751, 'samples': 20907008, 'steps': 40833, 'loss/train': 1.2559607028961182} +03/05/2022 13:28:27 - INFO - codeparrot_training - Step 40834: {'lr': 0.00041976252638382483, 'samples': 20907520, 'steps': 40834, 'loss/train': 1.8854939937591553} +03/05/2022 13:28:27 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/05/2022 13:28:32 - INFO - codeparrot_training - Step 40835: {'lr': 0.00041975863070740617, 'samples': 20908032, 'steps': 40835, 'loss/train': 1.1095410585403442} +03/05/2022 13:28:35 - INFO - codeparrot_training - Step 40836: {'lr': 0.0004197547349544969, 'samples': 20908544, 'steps': 40836, 'loss/train': 1.7552261352539062} +03/05/2022 13:28:35 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/05/2022 13:28:40 - INFO - codeparrot_training - Step 40837: {'lr': 0.0004197508391250988, 'samples': 20909056, 'steps': 40837, 'loss/train': 1.7714289426803589} +03/05/2022 13:28:44 - INFO - codeparrot_training - Step 40838: {'lr': 0.0004197469432192136, 'samples': 20909568, 'steps': 40838, 'loss/train': 1.4921175241470337} +03/05/2022 13:28:44 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/05/2022 13:28:49 - INFO - codeparrot_training - Step 40839: {'lr': 0.000419743047236843, 'samples': 20910080, 'steps': 40839, 'loss/train': 2.185542583465576} +03/05/2022 13:28:52 - INFO - codeparrot_training - Step 40840: {'lr': 0.00041973915117798883, 'samples': 20910592, 'steps': 40840, 'loss/train': 0.8426290154457092} +03/05/2022 13:28:52 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/05/2022 13:28:57 - INFO - codeparrot_training - Step 40841: {'lr': 0.0004197352550426528, 'samples': 20911104, 'steps': 40841, 'loss/train': 0.1514931321144104} +03/05/2022 13:29:00 - INFO - codeparrot_training - Skipping example with length 673 (seq_length=1024) +03/05/2022 13:29:03 - INFO - codeparrot_training - Step 40842: {'lr': 0.0004197313588308367, 'samples': 20911616, 'steps': 40842, 'loss/train': 1.7960652112960815} +03/05/2022 13:29:06 - INFO - codeparrot_training - Step 40843: {'lr': 0.0004197274625425423, 'samples': 20912128, 'steps': 40843, 'loss/train': 2.092384099960327} +03/05/2022 13:29:09 - INFO - codeparrot_training - Step 40844: {'lr': 0.0004197235661777713, 'samples': 20912640, 'steps': 40844, 'loss/train': 1.8192914724349976} +03/05/2022 13:29:09 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/05/2022 13:29:14 - INFO - codeparrot_training - Step 40845: {'lr': 0.00041971966973652545, 'samples': 20913152, 'steps': 40845, 'loss/train': 1.556200385093689} +03/05/2022 13:29:17 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/05/2022 13:29:20 - INFO - codeparrot_training - Step 40846: {'lr': 0.00041971577321880656, 'samples': 20913664, 'steps': 40846, 'loss/train': 1.3775132894515991} +03/05/2022 13:29:23 - INFO - codeparrot_training - Step 40847: {'lr': 0.00041971187662461634, 'samples': 20914176, 'steps': 40847, 'loss/train': 1.4390296936035156} +03/05/2022 13:29:25 - INFO - codeparrot_training - Skipping example with length 983 (seq_length=1024) +03/05/2022 13:29:28 - INFO - codeparrot_training - Step 40848: {'lr': 0.0004197079799539566, 'samples': 20914688, 'steps': 40848, 'loss/train': 1.519209861755371} +03/05/2022 13:29:31 - INFO - codeparrot_training - Step 40849: {'lr': 0.0004197040832068291, 'samples': 20915200, 'steps': 40849, 'loss/train': 2.2003796100616455} +03/05/2022 13:29:34 - INFO - codeparrot_training - Skipping example with length 488 (seq_length=1024) +03/05/2022 13:29:37 - INFO - codeparrot_training - Step 40850: {'lr': 0.00041970018638323546, 'samples': 20915712, 'steps': 40850, 'loss/train': 2.002692937850952} +03/05/2022 13:29:40 - INFO - codeparrot_training - Step 40851: {'lr': 0.00041969628948317756, 'samples': 20916224, 'steps': 40851, 'loss/train': 2.022545337677002} +03/05/2022 13:29:42 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/05/2022 13:29:45 - INFO - codeparrot_training - Step 40852: {'lr': 0.00041969239250665716, 'samples': 20916736, 'steps': 40852, 'loss/train': 1.9416857957839966} +03/05/2022 13:29:48 - INFO - codeparrot_training - Step 40853: {'lr': 0.000419688495453676, 'samples': 20917248, 'steps': 40853, 'loss/train': 2.34427809715271} +03/05/2022 13:29:50 - INFO - codeparrot_training - Skipping example with length 175 (seq_length=1024) +03/05/2022 13:29:54 - INFO - codeparrot_training - Step 40854: {'lr': 0.0004196845983242358, 'samples': 20917760, 'steps': 40854, 'loss/train': 2.160083532333374} +03/05/2022 13:29:57 - INFO - codeparrot_training - Step 40855: {'lr': 0.0004196807011183383, 'samples': 20918272, 'steps': 40855, 'loss/train': 1.9553534984588623} +03/05/2022 13:29:59 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/05/2022 13:30:02 - INFO - codeparrot_training - Step 40856: {'lr': 0.00041967680383598536, 'samples': 20918784, 'steps': 40856, 'loss/train': 2.1584765911102295} +03/05/2022 13:30:05 - INFO - codeparrot_training - Step 40857: {'lr': 0.00041967290647717864, 'samples': 20919296, 'steps': 40857, 'loss/train': 1.483283519744873} +03/05/2022 13:30:07 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/05/2022 13:30:10 - INFO - codeparrot_training - Step 40858: {'lr': 0.00041966900904191995, 'samples': 20919808, 'steps': 40858, 'loss/train': 1.388810634613037} +03/05/2022 13:30:14 - INFO - codeparrot_training - Step 40859: {'lr': 0.000419665111530211, 'samples': 20920320, 'steps': 40859, 'loss/train': 2.1027305126190186} +03/05/2022 13:30:15 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/05/2022 13:30:19 - INFO - codeparrot_training - Step 40860: {'lr': 0.00041966121394205357, 'samples': 20920832, 'steps': 40860, 'loss/train': 1.6768385171890259} +03/05/2022 13:30:22 - INFO - codeparrot_training - Step 40861: {'lr': 0.0004196573162774494, 'samples': 20921344, 'steps': 40861, 'loss/train': 2.3207848072052} +03/05/2022 13:30:24 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/05/2022 13:30:28 - INFO - codeparrot_training - Step 40862: {'lr': 0.0004196534185364003, 'samples': 20921856, 'steps': 40862, 'loss/train': 1.8999712467193604} +03/05/2022 13:30:31 - INFO - codeparrot_training - Step 40863: {'lr': 0.00041964952071890795, 'samples': 20922368, 'steps': 40863, 'loss/train': 2.1954541206359863} +03/05/2022 13:30:32 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/05/2022 13:30:36 - INFO - codeparrot_training - Step 40864: {'lr': 0.00041964562282497417, 'samples': 20922880, 'steps': 40864, 'loss/train': 2.052569627761841} +03/05/2022 13:30:39 - INFO - codeparrot_training - Step 40865: {'lr': 0.0004196417248546006, 'samples': 20923392, 'steps': 40865, 'loss/train': 2.286489248275757} +03/05/2022 13:30:41 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/05/2022 13:30:45 - INFO - codeparrot_training - Step 40866: {'lr': 0.0004196378268077893, 'samples': 20923904, 'steps': 40866, 'loss/train': 1.9662140607833862} +03/05/2022 13:30:48 - INFO - codeparrot_training - Step 40867: {'lr': 0.00041963392868454163, 'samples': 20924416, 'steps': 40867, 'loss/train': 1.2052921056747437} +03/05/2022 13:30:50 - INFO - codeparrot_training - Skipping example with length 261 (seq_length=1024) +03/05/2022 13:30:53 - INFO - codeparrot_training - Step 40868: {'lr': 0.0004196300304848596, 'samples': 20924928, 'steps': 40868, 'loss/train': 1.4810441732406616} +03/05/2022 13:30:56 - INFO - codeparrot_training - Step 40869: {'lr': 0.00041962613220874486, 'samples': 20925440, 'steps': 40869, 'loss/train': 1.386450171470642} +03/05/2022 13:30:58 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/05/2022 13:31:01 - INFO - codeparrot_training - Step 40870: {'lr': 0.0004196222338561992, 'samples': 20925952, 'steps': 40870, 'loss/train': 2.079098701477051} +03/05/2022 13:31:05 - INFO - codeparrot_training - Step 40871: {'lr': 0.0004196183354272244, 'samples': 20926464, 'steps': 40871, 'loss/train': 1.1608192920684814} +03/05/2022 13:31:07 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/05/2022 13:31:10 - INFO - codeparrot_training - Step 40872: {'lr': 0.00041961443692182214, 'samples': 20926976, 'steps': 40872, 'loss/train': 2.019590139389038} +03/05/2022 13:31:13 - INFO - codeparrot_training - Step 40873: {'lr': 0.00041961053833999433, 'samples': 20927488, 'steps': 40873, 'loss/train': 1.748381495475769} +03/05/2022 13:31:15 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/05/2022 13:31:18 - INFO - codeparrot_training - Step 40874: {'lr': 0.00041960663968174263, 'samples': 20928000, 'steps': 40874, 'loss/train': 1.3677968978881836} +03/05/2022 13:31:22 - INFO - codeparrot_training - Step 40875: {'lr': 0.0004196027409470687, 'samples': 20928512, 'steps': 40875, 'loss/train': 2.433924436569214} +03/05/2022 13:31:23 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/05/2022 13:31:27 - INFO - codeparrot_training - Step 40876: {'lr': 0.00041959884213597443, 'samples': 20929024, 'steps': 40876, 'loss/train': 1.4287258386611938} +03/05/2022 13:31:30 - INFO - codeparrot_training - Step 40877: {'lr': 0.0004195949432484615, 'samples': 20929536, 'steps': 40877, 'loss/train': 1.5623226165771484} +03/05/2022 13:31:31 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/05/2022 13:31:35 - INFO - codeparrot_training - Step 40878: {'lr': 0.00041959104428453175, 'samples': 20930048, 'steps': 40878, 'loss/train': 1.4454883337020874} +03/05/2022 13:31:38 - INFO - codeparrot_training - Step 40879: {'lr': 0.000419587145244187, 'samples': 20930560, 'steps': 40879, 'loss/train': 2.290069341659546} +03/05/2022 13:31:40 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/05/2022 13:31:44 - INFO - codeparrot_training - Step 40880: {'lr': 0.0004195832461274288, 'samples': 20931072, 'steps': 40880, 'loss/train': 1.7995747327804565} +03/05/2022 13:31:47 - INFO - codeparrot_training - Step 40881: {'lr': 0.00041957934693425894, 'samples': 20931584, 'steps': 40881, 'loss/train': 2.2749710083007812} +03/05/2022 13:31:48 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/05/2022 13:31:52 - INFO - codeparrot_training - Step 40882: {'lr': 0.0004195754476646793, 'samples': 20932096, 'steps': 40882, 'loss/train': 0.8822826147079468} +03/05/2022 13:31:55 - INFO - codeparrot_training - Step 40883: {'lr': 0.0004195715483186916, 'samples': 20932608, 'steps': 40883, 'loss/train': 0.6965515613555908} +03/05/2022 13:31:57 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/05/2022 13:32:01 - INFO - codeparrot_training - Step 40884: {'lr': 0.00041956764889629756, 'samples': 20933120, 'steps': 40884, 'loss/train': 2.496389389038086} +03/05/2022 13:32:04 - INFO - codeparrot_training - Step 40885: {'lr': 0.000419563749397499, 'samples': 20933632, 'steps': 40885, 'loss/train': 1.9617961645126343} +03/05/2022 13:32:08 - INFO - codeparrot_training - Step 40886: {'lr': 0.00041955984982229756, 'samples': 20934144, 'steps': 40886, 'loss/train': 4.610908508300781} +03/05/2022 13:32:08 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/05/2022 13:32:13 - INFO - codeparrot_training - Step 40887: {'lr': 0.0004195559501706951, 'samples': 20934656, 'steps': 40887, 'loss/train': 1.8994956016540527} +03/05/2022 13:32:16 - INFO - codeparrot_training - Step 40888: {'lr': 0.0004195520504426933, 'samples': 20935168, 'steps': 40888, 'loss/train': 1.786508560180664} +03/05/2022 13:32:17 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/05/2022 13:32:21 - INFO - codeparrot_training - Step 40889: {'lr': 0.000419548150638294, 'samples': 20935680, 'steps': 40889, 'loss/train': 1.107291579246521} +03/05/2022 13:32:25 - INFO - codeparrot_training - Step 40890: {'lr': 0.0004195442507574989, 'samples': 20936192, 'steps': 40890, 'loss/train': 1.3861072063446045} +03/05/2022 13:32:26 - INFO - codeparrot_training - Skipping example with length 144 (seq_length=1024) +03/05/2022 13:32:30 - INFO - codeparrot_training - Step 40891: {'lr': 0.00041954035080030985, 'samples': 20936704, 'steps': 40891, 'loss/train': 2.0044682025909424} +03/05/2022 13:32:33 - INFO - codeparrot_training - Step 40892: {'lr': 0.0004195364507667284, 'samples': 20937216, 'steps': 40892, 'loss/train': 1.2277030944824219} +03/05/2022 13:32:34 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/05/2022 13:32:38 - INFO - codeparrot_training - Step 40893: {'lr': 0.0004195325506567566, 'samples': 20937728, 'steps': 40893, 'loss/train': 2.602017641067505} +03/05/2022 13:32:41 - INFO - codeparrot_training - Step 40894: {'lr': 0.00041952865047039604, 'samples': 20938240, 'steps': 40894, 'loss/train': 1.9105859994888306} +03/05/2022 13:32:42 - INFO - codeparrot_training - Skipping example with length 482 (seq_length=1024) +03/05/2022 13:32:47 - INFO - codeparrot_training - Step 40895: {'lr': 0.00041952475020764834, 'samples': 20938752, 'steps': 40895, 'loss/train': 1.128890872001648} +03/05/2022 13:32:50 - INFO - codeparrot_training - Step 40896: {'lr': 0.00041952084986851546, 'samples': 20939264, 'steps': 40896, 'loss/train': 2.159511089324951} +03/05/2022 13:32:51 - INFO - codeparrot_training - Skipping example with length 808 (seq_length=1024) +03/05/2022 13:32:55 - INFO - codeparrot_training - Step 40897: {'lr': 0.0004195169494529991, 'samples': 20939776, 'steps': 40897, 'loss/train': 0.9826498031616211} +03/05/2022 13:32:58 - INFO - codeparrot_training - Step 40898: {'lr': 0.0004195130489611011, 'samples': 20940288, 'steps': 40898, 'loss/train': 2.002592086791992} +03/05/2022 13:32:59 - INFO - codeparrot_training - Skipping example with length 737 (seq_length=1024) +03/05/2022 13:33:04 - INFO - codeparrot_training - Step 40899: {'lr': 0.0004195091483928231, 'samples': 20940800, 'steps': 40899, 'loss/train': 1.979048490524292} +03/05/2022 13:33:07 - INFO - codeparrot_training - Step 40900: {'lr': 0.0004195052477481669, 'samples': 20941312, 'steps': 40900, 'loss/train': 1.730732798576355} +03/05/2022 13:33:07 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/05/2022 13:33:12 - INFO - codeparrot_training - Step 40901: {'lr': 0.00041950134702713415, 'samples': 20941824, 'steps': 40901, 'loss/train': 2.1525495052337646} +03/05/2022 13:33:15 - INFO - codeparrot_training - Step 40902: {'lr': 0.0004194974462297268, 'samples': 20942336, 'steps': 40902, 'loss/train': 1.4320118427276611} +03/05/2022 13:33:16 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/05/2022 13:33:21 - INFO - codeparrot_training - Step 40903: {'lr': 0.00041949354535594655, 'samples': 20942848, 'steps': 40903, 'loss/train': 1.8561313152313232} +03/05/2022 13:33:24 - INFO - codeparrot_training - Step 40904: {'lr': 0.000419489644405795, 'samples': 20943360, 'steps': 40904, 'loss/train': 2.1206531524658203} +03/05/2022 13:33:24 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/05/2022 13:33:29 - INFO - codeparrot_training - Step 40905: {'lr': 0.00041948574337927414, 'samples': 20943872, 'steps': 40905, 'loss/train': 2.100924253463745} +03/05/2022 13:33:32 - INFO - codeparrot_training - Step 40906: {'lr': 0.0004194818422763856, 'samples': 20944384, 'steps': 40906, 'loss/train': 2.9402620792388916} +03/05/2022 13:33:33 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/05/2022 13:33:38 - INFO - codeparrot_training - Step 40907: {'lr': 0.00041947794109713113, 'samples': 20944896, 'steps': 40907, 'loss/train': 2.1062545776367188} +03/05/2022 13:33:41 - INFO - codeparrot_training - Step 40908: {'lr': 0.0004194740398415125, 'samples': 20945408, 'steps': 40908, 'loss/train': 1.3431569337844849} +03/05/2022 13:33:41 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/05/2022 13:33:46 - INFO - codeparrot_training - Step 40909: {'lr': 0.00041947013850953156, 'samples': 20945920, 'steps': 40909, 'loss/train': 2.1743621826171875} +03/05/2022 13:33:50 - INFO - codeparrot_training - Step 40910: {'lr': 0.00041946623710118993, 'samples': 20946432, 'steps': 40910, 'loss/train': 0.7801947593688965} +03/05/2022 13:33:51 - INFO - codeparrot_training - Skipping example with length 298 (seq_length=1024) +03/05/2022 13:33:55 - INFO - codeparrot_training - Step 40911: {'lr': 0.0004194623356164894, 'samples': 20946944, 'steps': 40911, 'loss/train': 1.059708833694458} +03/05/2022 13:33:58 - INFO - codeparrot_training - Step 40912: {'lr': 0.0004194584340554318, 'samples': 20947456, 'steps': 40912, 'loss/train': 1.8934693336486816} +03/05/2022 13:34:00 - INFO - codeparrot_training - Skipping example with length 691 (seq_length=1024) +03/05/2022 13:34:03 - INFO - codeparrot_training - Step 40913: {'lr': 0.0004194545324180188, 'samples': 20947968, 'steps': 40913, 'loss/train': 1.8113795518875122} +03/05/2022 13:34:07 - INFO - codeparrot_training - Step 40914: {'lr': 0.00041945063070425226, 'samples': 20948480, 'steps': 40914, 'loss/train': 1.6471275091171265} +03/05/2022 13:34:09 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/05/2022 13:34:12 - INFO - codeparrot_training - Step 40915: {'lr': 0.0004194467289141339, 'samples': 20948992, 'steps': 40915, 'loss/train': 1.251671314239502} +03/05/2022 13:34:16 - INFO - codeparrot_training - Step 40916: {'lr': 0.00041944282704766534, 'samples': 20949504, 'steps': 40916, 'loss/train': 1.3881092071533203} +03/05/2022 13:34:17 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/05/2022 13:34:21 - INFO - codeparrot_training - Step 40917: {'lr': 0.0004194389251048486, 'samples': 20950016, 'steps': 40917, 'loss/train': 1.9799801111221313} +03/05/2022 13:34:24 - INFO - codeparrot_training - Step 40918: {'lr': 0.00041943502308568523, 'samples': 20950528, 'steps': 40918, 'loss/train': 2.951422929763794} +03/05/2022 13:34:29 - INFO - codeparrot_training - Step 40919: {'lr': 0.000419431120990177, 'samples': 20951040, 'steps': 40919, 'loss/train': 1.9971290826797485} +03/05/2022 13:34:33 - INFO - codeparrot_training - Step 40920: {'lr': 0.0004194272188183258, 'samples': 20951552, 'steps': 40920, 'loss/train': 1.3606972694396973} +03/05/2022 13:34:34 - INFO - codeparrot_training - Skipping example with length 262 (seq_length=1024) +03/05/2022 13:34:38 - INFO - codeparrot_training - Step 40921: {'lr': 0.0004194233165701333, 'samples': 20952064, 'steps': 40921, 'loss/train': 2.123211145401001} +03/05/2022 13:34:41 - INFO - codeparrot_training - Step 40922: {'lr': 0.0004194194142456013, 'samples': 20952576, 'steps': 40922, 'loss/train': 1.9404993057250977} +03/05/2022 13:34:43 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/05/2022 13:34:47 - INFO - codeparrot_training - Step 40923: {'lr': 0.00041941551184473144, 'samples': 20953088, 'steps': 40923, 'loss/train': 0.9128438830375671} +03/05/2022 13:34:50 - INFO - codeparrot_training - Step 40924: {'lr': 0.0004194116093675256, 'samples': 20953600, 'steps': 40924, 'loss/train': 1.061419129371643} +03/05/2022 13:34:51 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/05/2022 13:34:55 - INFO - codeparrot_training - Step 40925: {'lr': 0.0004194077068139855, 'samples': 20954112, 'steps': 40925, 'loss/train': 2.398571252822876} +03/05/2022 13:34:58 - INFO - codeparrot_training - Step 40926: {'lr': 0.00041940380418411296, 'samples': 20954624, 'steps': 40926, 'loss/train': 0.8524813055992126} +03/05/2022 13:35:00 - INFO - codeparrot_training - Skipping example with length 771 (seq_length=1024) +03/05/2022 13:35:04 - INFO - codeparrot_training - Step 40927: {'lr': 0.00041939990147790956, 'samples': 20955136, 'steps': 40927, 'loss/train': 1.661476492881775} +03/05/2022 13:35:07 - INFO - codeparrot_training - Step 40928: {'lr': 0.00041939599869537724, 'samples': 20955648, 'steps': 40928, 'loss/train': 0.5725401043891907} +03/05/2022 13:35:09 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/05/2022 13:35:12 - INFO - codeparrot_training - Step 40929: {'lr': 0.00041939209583651774, 'samples': 20956160, 'steps': 40929, 'loss/train': 2.043174982070923} +03/05/2022 13:35:15 - INFO - codeparrot_training - Step 40930: {'lr': 0.0004193881929013327, 'samples': 20956672, 'steps': 40930, 'loss/train': 1.9881279468536377} +03/05/2022 13:35:18 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/05/2022 13:35:21 - INFO - codeparrot_training - Step 40931: {'lr': 0.00041938428988982403, 'samples': 20957184, 'steps': 40931, 'loss/train': 1.9028513431549072} +03/05/2022 13:35:24 - INFO - codeparrot_training - Step 40932: {'lr': 0.00041938038680199333, 'samples': 20957696, 'steps': 40932, 'loss/train': 1.348423957824707} +03/05/2022 13:35:26 - INFO - codeparrot_training - Skipping example with length 5 (seq_length=1024) +03/05/2022 13:35:29 - INFO - codeparrot_training - Step 40933: {'lr': 0.0004193764836378425, 'samples': 20958208, 'steps': 40933, 'loss/train': 1.7691069841384888} +03/05/2022 13:35:32 - INFO - codeparrot_training - Step 40934: {'lr': 0.0004193725803973732, 'samples': 20958720, 'steps': 40934, 'loss/train': 1.549912452697754} +03/05/2022 13:35:35 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/05/2022 13:35:38 - INFO - codeparrot_training - Step 40935: {'lr': 0.0004193686770805873, 'samples': 20959232, 'steps': 40935, 'loss/train': 1.402387261390686} +03/05/2022 13:35:41 - INFO - codeparrot_training - Step 40936: {'lr': 0.00041936477368748645, 'samples': 20959744, 'steps': 40936, 'loss/train': 1.004008173942566} +03/05/2022 13:35:43 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/05/2022 13:35:46 - INFO - codeparrot_training - Step 40937: {'lr': 0.00041936087021807243, 'samples': 20960256, 'steps': 40937, 'loss/train': 1.6439785957336426} +03/05/2022 13:35:49 - INFO - codeparrot_training - Step 40938: {'lr': 0.000419356966672347, 'samples': 20960768, 'steps': 40938, 'loss/train': 1.5676209926605225} +03/05/2022 13:35:52 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/05/2022 13:35:55 - INFO - codeparrot_training - Step 40939: {'lr': 0.00041935306305031195, 'samples': 20961280, 'steps': 40939, 'loss/train': 2.307340383529663} +03/05/2022 13:35:58 - INFO - codeparrot_training - Step 40940: {'lr': 0.000419349159351969, 'samples': 20961792, 'steps': 40940, 'loss/train': 2.4367105960845947} +03/05/2022 13:36:00 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/05/2022 13:36:03 - INFO - codeparrot_training - Step 40941: {'lr': 0.00041934525557732005, 'samples': 20962304, 'steps': 40941, 'loss/train': 1.6212553977966309} +03/05/2022 13:36:06 - INFO - codeparrot_training - Step 40942: {'lr': 0.00041934135172636667, 'samples': 20962816, 'steps': 40942, 'loss/train': 1.2108005285263062} +03/05/2022 13:36:09 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/05/2022 13:36:11 - INFO - codeparrot_training - Step 40943: {'lr': 0.00041933744779911066, 'samples': 20963328, 'steps': 40943, 'loss/train': 1.7508940696716309} +03/05/2022 13:36:15 - INFO - codeparrot_training - Step 40944: {'lr': 0.00041933354379555376, 'samples': 20963840, 'steps': 40944, 'loss/train': 1.8667831420898438} +03/05/2022 13:36:17 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/05/2022 13:36:21 - INFO - codeparrot_training - Step 40945: {'lr': 0.00041932963971569786, 'samples': 20964352, 'steps': 40945, 'loss/train': 1.935478687286377} +03/05/2022 13:36:24 - INFO - codeparrot_training - Step 40946: {'lr': 0.0004193257355595446, 'samples': 20964864, 'steps': 40946, 'loss/train': 0.7698574662208557} +03/05/2022 13:36:27 - INFO - codeparrot_training - Step 40947: {'lr': 0.00041932183132709587, 'samples': 20965376, 'steps': 40947, 'loss/train': 2.0890626907348633} +03/05/2022 13:36:29 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/05/2022 13:36:32 - INFO - codeparrot_training - Step 40948: {'lr': 0.00041931792701835325, 'samples': 20965888, 'steps': 40948, 'loss/train': 1.6626851558685303} +03/05/2022 13:36:35 - INFO - codeparrot_training - Step 40949: {'lr': 0.00041931402263331856, 'samples': 20966400, 'steps': 40949, 'loss/train': 0.8640086650848389} +03/05/2022 13:36:38 - INFO - codeparrot_training - Skipping example with length 591 (seq_length=1024) +03/05/2022 13:36:41 - INFO - codeparrot_training - Step 40950: {'lr': 0.0004193101181719936, 'samples': 20966912, 'steps': 40950, 'loss/train': 1.3986080884933472} +03/05/2022 13:36:44 - INFO - codeparrot_training - Step 40951: {'lr': 0.00041930621363438014, 'samples': 20967424, 'steps': 40951, 'loss/train': 0.9665238857269287} +03/05/2022 13:36:46 - INFO - codeparrot_training - Skipping example with length 153 (seq_length=1024) +03/05/2022 13:36:49 - INFO - codeparrot_training - Step 40952: {'lr': 0.0004193023090204799, 'samples': 20967936, 'steps': 40952, 'loss/train': 2.0724000930786133} +03/05/2022 13:36:52 - INFO - codeparrot_training - Step 40953: {'lr': 0.0004192984043302947, 'samples': 20968448, 'steps': 40953, 'loss/train': 2.046949863433838} +03/05/2022 13:36:55 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/05/2022 13:36:57 - INFO - codeparrot_training - Step 40954: {'lr': 0.00041929449956382625, 'samples': 20968960, 'steps': 40954, 'loss/train': 1.2734843492507935} +03/05/2022 13:37:01 - INFO - codeparrot_training - Step 40955: {'lr': 0.0004192905947210762, 'samples': 20969472, 'steps': 40955, 'loss/train': 0.21160177886486053} +03/05/2022 13:37:03 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/05/2022 13:37:06 - INFO - codeparrot_training - Step 40956: {'lr': 0.00041928668980204653, 'samples': 20969984, 'steps': 40956, 'loss/train': 2.0453641414642334} +03/05/2022 13:37:09 - INFO - codeparrot_training - Step 40957: {'lr': 0.00041928278480673884, 'samples': 20970496, 'steps': 40957, 'loss/train': 1.0631027221679688} +03/05/2022 13:37:12 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/05/2022 13:37:15 - INFO - codeparrot_training - Step 40958: {'lr': 0.00041927887973515493, 'samples': 20971008, 'steps': 40958, 'loss/train': 1.5014387369155884} +03/05/2022 13:37:18 - INFO - codeparrot_training - Step 40959: {'lr': 0.0004192749745872966, 'samples': 20971520, 'steps': 40959, 'loss/train': 1.5051966905593872} +03/05/2022 13:37:20 - INFO - codeparrot_training - Skipping example with length 968 (seq_length=1024) +03/05/2022 13:37:23 - INFO - codeparrot_training - Step 40960: {'lr': 0.00041927106936316563, 'samples': 20972032, 'steps': 40960, 'loss/train': 1.3896242380142212} +03/05/2022 13:37:26 - INFO - codeparrot_training - Step 40961: {'lr': 0.00041926716406276367, 'samples': 20972544, 'steps': 40961, 'loss/train': 1.7977592945098877} +03/05/2022 13:37:29 - INFO - codeparrot_training - Skipping example with length 369 (seq_length=1024) +03/05/2022 13:37:32 - INFO - codeparrot_training - Step 40962: {'lr': 0.00041926325868609247, 'samples': 20973056, 'steps': 40962, 'loss/train': 1.337714433670044} +03/05/2022 13:37:35 - INFO - codeparrot_training - Step 40963: {'lr': 0.0004192593532331539, 'samples': 20973568, 'steps': 40963, 'loss/train': 1.8626798391342163} +03/05/2022 13:37:37 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/05/2022 13:37:40 - INFO - codeparrot_training - Step 40964: {'lr': 0.00041925544770394976, 'samples': 20974080, 'steps': 40964, 'loss/train': 1.9915268421173096} +03/05/2022 13:37:43 - INFO - codeparrot_training - Step 40965: {'lr': 0.0004192515420984816, 'samples': 20974592, 'steps': 40965, 'loss/train': 1.7338155508041382} +03/05/2022 13:37:45 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/05/2022 13:37:49 - INFO - codeparrot_training - Step 40966: {'lr': 0.0004192476364167514, 'samples': 20975104, 'steps': 40966, 'loss/train': 1.7280715703964233} +03/05/2022 13:37:52 - INFO - codeparrot_training - Step 40967: {'lr': 0.0004192437306587608, 'samples': 20975616, 'steps': 40967, 'loss/train': 1.925923228263855} +03/05/2022 13:37:54 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/05/2022 13:37:57 - INFO - codeparrot_training - Step 40968: {'lr': 0.0004192398248245116, 'samples': 20976128, 'steps': 40968, 'loss/train': 1.7177178859710693} +03/05/2022 13:38:00 - INFO - codeparrot_training - Step 40969: {'lr': 0.00041923591891400555, 'samples': 20976640, 'steps': 40969, 'loss/train': 1.7692251205444336} +03/05/2022 13:38:02 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/05/2022 13:38:05 - INFO - codeparrot_training - Step 40970: {'lr': 0.00041923201292724436, 'samples': 20977152, 'steps': 40970, 'loss/train': 1.3442301750183105} +03/05/2022 13:38:09 - INFO - codeparrot_training - Step 40971: {'lr': 0.00041922810686422987, 'samples': 20977664, 'steps': 40971, 'loss/train': 1.791410207748413} +03/05/2022 13:38:11 - INFO - codeparrot_training - Skipping example with length 431 (seq_length=1024) +03/05/2022 13:38:14 - INFO - codeparrot_training - Step 40972: {'lr': 0.00041922420072496383, 'samples': 20978176, 'steps': 40972, 'loss/train': 1.2592841386795044} +03/05/2022 13:38:17 - INFO - codeparrot_training - Step 40973: {'lr': 0.00041922029450944785, 'samples': 20978688, 'steps': 40973, 'loss/train': 1.7606834173202515} +03/05/2022 13:38:19 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/05/2022 13:38:22 - INFO - codeparrot_training - Step 40974: {'lr': 0.000419216388217684, 'samples': 20979200, 'steps': 40974, 'loss/train': 2.3748607635498047} +03/05/2022 13:38:26 - INFO - codeparrot_training - Step 40975: {'lr': 0.00041921248184967374, 'samples': 20979712, 'steps': 40975, 'loss/train': 1.847153663635254} +03/05/2022 13:38:27 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/05/2022 13:38:31 - INFO - codeparrot_training - Step 40976: {'lr': 0.000419208575405419, 'samples': 20980224, 'steps': 40976, 'loss/train': 1.8363513946533203} +03/05/2022 13:38:34 - INFO - codeparrot_training - Step 40977: {'lr': 0.00041920466888492147, 'samples': 20980736, 'steps': 40977, 'loss/train': 1.5221658945083618} +03/05/2022 13:38:35 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/05/2022 13:38:39 - INFO - codeparrot_training - Step 40978: {'lr': 0.00041920076228818293, 'samples': 20981248, 'steps': 40978, 'loss/train': 1.1469863653182983} +03/05/2022 13:38:42 - INFO - codeparrot_training - Step 40979: {'lr': 0.0004191968556152051, 'samples': 20981760, 'steps': 40979, 'loss/train': 2.035182476043701} +03/05/2022 13:38:44 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/05/2022 13:38:48 - INFO - codeparrot_training - Step 40980: {'lr': 0.0004191929488659898, 'samples': 20982272, 'steps': 40980, 'loss/train': 2.027689218521118} +03/05/2022 13:38:51 - INFO - codeparrot_training - Step 40981: {'lr': 0.00041918904204053874, 'samples': 20982784, 'steps': 40981, 'loss/train': 1.9462558031082153} +03/05/2022 13:38:52 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/05/2022 13:38:56 - INFO - codeparrot_training - Step 40982: {'lr': 0.0004191851351388538, 'samples': 20983296, 'steps': 40982, 'loss/train': 0.6240033507347107} +03/05/2022 13:38:59 - INFO - codeparrot_training - Step 40983: {'lr': 0.0004191812281609366, 'samples': 20983808, 'steps': 40983, 'loss/train': 1.6453315019607544} +03/05/2022 13:39:01 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/05/2022 13:39:05 - INFO - codeparrot_training - Step 40984: {'lr': 0.00041917732110678896, 'samples': 20984320, 'steps': 40984, 'loss/train': 0.23897503316402435} +03/05/2022 13:39:08 - INFO - codeparrot_training - Step 40985: {'lr': 0.0004191734139764126, 'samples': 20984832, 'steps': 40985, 'loss/train': 3.1144537925720215} +03/05/2022 13:39:09 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/05/2022 13:39:13 - INFO - codeparrot_training - Step 40986: {'lr': 0.00041916950676980933, 'samples': 20985344, 'steps': 40986, 'loss/train': 1.1711925268173218} +03/05/2022 13:39:16 - INFO - codeparrot_training - Step 40987: {'lr': 0.0004191655994869809, 'samples': 20985856, 'steps': 40987, 'loss/train': 1.3133169412612915} +03/05/2022 13:39:17 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/05/2022 13:39:21 - INFO - codeparrot_training - Step 40988: {'lr': 0.000419161692127929, 'samples': 20986368, 'steps': 40988, 'loss/train': 1.712204933166504} +03/05/2022 13:39:25 - INFO - codeparrot_training - Step 40989: {'lr': 0.00041915778469265555, 'samples': 20986880, 'steps': 40989, 'loss/train': 1.3019118309020996} +03/05/2022 13:39:26 - INFO - codeparrot_training - Skipping example with length 462 (seq_length=1024) +03/05/2022 13:39:30 - INFO - codeparrot_training - Step 40990: {'lr': 0.0004191538771811621, 'samples': 20987392, 'steps': 40990, 'loss/train': 1.3221882581710815} +03/05/2022 13:39:33 - INFO - codeparrot_training - Step 40991: {'lr': 0.00041914996959345057, 'samples': 20987904, 'steps': 40991, 'loss/train': 1.387258768081665} +03/05/2022 13:39:34 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/05/2022 13:39:38 - INFO - codeparrot_training - Step 40992: {'lr': 0.0004191460619295227, 'samples': 20988416, 'steps': 40992, 'loss/train': 1.239571213722229} +03/05/2022 13:39:42 - INFO - codeparrot_training - Step 40993: {'lr': 0.0004191421541893802, 'samples': 20988928, 'steps': 40993, 'loss/train': 1.3564966917037964} +03/05/2022 13:39:42 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/05/2022 13:39:47 - INFO - codeparrot_training - Step 40994: {'lr': 0.0004191382463730249, 'samples': 20989440, 'steps': 40994, 'loss/train': 1.7799856662750244} +03/05/2022 13:39:50 - INFO - codeparrot_training - Step 40995: {'lr': 0.00041913433848045844, 'samples': 20989952, 'steps': 40995, 'loss/train': 1.5568857192993164} +03/05/2022 13:39:52 - INFO - codeparrot_training - Skipping example with length 343 (seq_length=1024) +03/05/2022 13:39:56 - INFO - codeparrot_training - Step 40996: {'lr': 0.00041913043051168276, 'samples': 20990464, 'steps': 40996, 'loss/train': 1.8372198343276978} +03/05/2022 13:39:59 - INFO - codeparrot_training - Step 40997: {'lr': 0.00041912652246669943, 'samples': 20990976, 'steps': 40997, 'loss/train': 1.5470447540283203} +03/05/2022 13:40:01 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/05/2022 13:40:04 - INFO - codeparrot_training - Step 40998: {'lr': 0.0004191226143455103, 'samples': 20991488, 'steps': 40998, 'loss/train': 1.61585533618927} +03/05/2022 13:40:07 - INFO - codeparrot_training - Step 40999: {'lr': 0.00041911870614811715, 'samples': 20992000, 'steps': 40999, 'loss/train': 2.052797317504883} +03/05/2022 13:40:09 - INFO - codeparrot_training - Skipping example with length 308 (seq_length=1024) +03/05/2022 13:40:12 - INFO - codeparrot_training - Step 41000: {'lr': 0.00041911479787452177, 'samples': 20992512, 'steps': 41000, 'loss/train': 1.4795676469802856} +03/05/2022 13:40:16 - INFO - codeparrot_training - Step 41001: {'lr': 0.0004191108895247258, 'samples': 20993024, 'steps': 41001, 'loss/train': 1.6969348192214966} +03/05/2022 13:40:17 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/05/2022 13:40:21 - INFO - codeparrot_training - Step 41002: {'lr': 0.00041910698109873116, 'samples': 20993536, 'steps': 41002, 'loss/train': 2.0577280521392822} +03/05/2022 13:40:24 - INFO - codeparrot_training - Step 41003: {'lr': 0.0004191030725965394, 'samples': 20994048, 'steps': 41003, 'loss/train': 1.6729868650436401} +03/05/2022 13:40:26 - INFO - codeparrot_training - Skipping example with length 648 (seq_length=1024) +03/05/2022 13:40:30 - INFO - codeparrot_training - Step 41004: {'lr': 0.00041909916401815245, 'samples': 20994560, 'steps': 41004, 'loss/train': 0.7721620798110962} +03/05/2022 13:40:33 - INFO - codeparrot_training - Step 41005: {'lr': 0.00041909525536357206, 'samples': 20995072, 'steps': 41005, 'loss/train': 0.7144091725349426} +03/05/2022 13:40:36 - INFO - codeparrot_training - Step 41006: {'lr': 0.0004190913466327999, 'samples': 20995584, 'steps': 41006, 'loss/train': 2.2180771827697754} +03/05/2022 13:40:38 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/05/2022 13:40:41 - INFO - codeparrot_training - Step 41007: {'lr': 0.00041908743782583793, 'samples': 20996096, 'steps': 41007, 'loss/train': 1.0636593103408813} +03/05/2022 13:40:45 - INFO - codeparrot_training - Step 41008: {'lr': 0.00041908352894268766, 'samples': 20996608, 'steps': 41008, 'loss/train': 1.0929205417633057} +03/05/2022 13:40:46 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/05/2022 13:40:50 - INFO - codeparrot_training - Step 41009: {'lr': 0.00041907961998335094, 'samples': 20997120, 'steps': 41009, 'loss/train': 1.8538089990615845} +03/05/2022 13:40:53 - INFO - codeparrot_training - Step 41010: {'lr': 0.0004190757109478296, 'samples': 20997632, 'steps': 41010, 'loss/train': 1.7064621448516846} +03/05/2022 13:40:54 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/05/2022 13:40:58 - INFO - codeparrot_training - Step 41011: {'lr': 0.00041907180183612525, 'samples': 20998144, 'steps': 41011, 'loss/train': 1.7933845520019531} +03/05/2022 13:41:01 - INFO - codeparrot_training - Step 41012: {'lr': 0.00041906789264823985, 'samples': 20998656, 'steps': 41012, 'loss/train': 1.492444634437561} +03/05/2022 13:41:03 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/05/2022 13:41:07 - INFO - codeparrot_training - Step 41013: {'lr': 0.00041906398338417504, 'samples': 20999168, 'steps': 41013, 'loss/train': 1.9002152681350708} +03/05/2022 13:41:10 - INFO - codeparrot_training - Step 41014: {'lr': 0.00041906007404393273, 'samples': 20999680, 'steps': 41014, 'loss/train': 1.4806840419769287} +03/05/2022 13:41:12 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/05/2022 13:41:15 - INFO - codeparrot_training - Step 41015: {'lr': 0.0004190561646275144, 'samples': 21000192, 'steps': 41015, 'loss/train': 0.7887071967124939} +03/05/2022 13:41:18 - INFO - codeparrot_training - Step 41016: {'lr': 0.0004190522551349221, 'samples': 21000704, 'steps': 41016, 'loss/train': 2.177182674407959} +03/05/2022 13:41:20 - INFO - codeparrot_training - Skipping example with length 211 (seq_length=1024) +03/05/2022 13:41:24 - INFO - codeparrot_training - Step 41017: {'lr': 0.00041904834556615733, 'samples': 21001216, 'steps': 41017, 'loss/train': 1.8582791090011597} +03/05/2022 13:41:27 - INFO - codeparrot_training - Step 41018: {'lr': 0.000419044435921222, 'samples': 21001728, 'steps': 41018, 'loss/train': 1.7931479215621948} +03/05/2022 13:41:28 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/05/2022 13:41:32 - INFO - codeparrot_training - Step 41019: {'lr': 0.0004190405262001179, 'samples': 21002240, 'steps': 41019, 'loss/train': 2.078005790710449} +03/05/2022 13:41:35 - INFO - codeparrot_training - Step 41020: {'lr': 0.00041903661640284675, 'samples': 21002752, 'steps': 41020, 'loss/train': 2.0085599422454834} +03/05/2022 13:41:36 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/05/2022 13:41:41 - INFO - codeparrot_training - Step 41021: {'lr': 0.0004190327065294104, 'samples': 21003264, 'steps': 41021, 'loss/train': 1.4311343431472778} +03/05/2022 13:41:44 - INFO - codeparrot_training - Step 41022: {'lr': 0.00041902879657981036, 'samples': 21003776, 'steps': 41022, 'loss/train': 1.1949687004089355} +03/05/2022 13:41:45 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/05/2022 13:41:49 - INFO - codeparrot_training - Step 41023: {'lr': 0.00041902488655404864, 'samples': 21004288, 'steps': 41023, 'loss/train': 1.5510125160217285} +03/05/2022 13:41:52 - INFO - codeparrot_training - Step 41024: {'lr': 0.0004190209764521269, 'samples': 21004800, 'steps': 41024, 'loss/train': 1.7241042852401733} +03/05/2022 13:41:53 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/05/2022 13:41:57 - INFO - codeparrot_training - Step 41025: {'lr': 0.0004190170662740469, 'samples': 21005312, 'steps': 41025, 'loss/train': 1.844301462173462} +03/05/2022 13:42:01 - INFO - codeparrot_training - Step 41026: {'lr': 0.0004190131560198104, 'samples': 21005824, 'steps': 41026, 'loss/train': 1.3409844636917114} +03/05/2022 13:42:01 - INFO - codeparrot_training - Skipping example with length 235 (seq_length=1024) +03/05/2022 13:42:06 - INFO - codeparrot_training - Step 41027: {'lr': 0.00041900924568941925, 'samples': 21006336, 'steps': 41027, 'loss/train': 1.69524347782135} +03/05/2022 13:42:09 - INFO - codeparrot_training - Step 41028: {'lr': 0.0004190053352828751, 'samples': 21006848, 'steps': 41028, 'loss/train': 2.278975009918213} +03/05/2022 13:42:10 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/05/2022 13:42:14 - INFO - codeparrot_training - Step 41029: {'lr': 0.00041900142480017974, 'samples': 21007360, 'steps': 41029, 'loss/train': 0.1403326541185379} +03/05/2022 13:42:18 - INFO - codeparrot_training - Step 41030: {'lr': 0.0004189975142413349, 'samples': 21007872, 'steps': 41030, 'loss/train': 2.4323794841766357} +03/05/2022 13:42:18 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/05/2022 13:42:23 - INFO - codeparrot_training - Step 41031: {'lr': 0.00041899360360634247, 'samples': 21008384, 'steps': 41031, 'loss/train': 0.4056245982646942} +03/05/2022 13:42:26 - INFO - codeparrot_training - Step 41032: {'lr': 0.0004189896928952041, 'samples': 21008896, 'steps': 41032, 'loss/train': 2.012449264526367} +03/05/2022 13:42:27 - INFO - codeparrot_training - Skipping example with length 928 (seq_length=1024) +03/05/2022 13:42:31 - INFO - codeparrot_training - Step 41033: {'lr': 0.0004189857821079216, 'samples': 21009408, 'steps': 41033, 'loss/train': 1.9453412294387817} +03/05/2022 13:42:34 - INFO - codeparrot_training - Step 41034: {'lr': 0.0004189818712444967, 'samples': 21009920, 'steps': 41034, 'loss/train': 1.399137258529663} +03/05/2022 13:42:36 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/05/2022 13:42:40 - INFO - codeparrot_training - Step 41035: {'lr': 0.0004189779603049312, 'samples': 21010432, 'steps': 41035, 'loss/train': 1.1407676935195923} +03/05/2022 13:42:43 - INFO - codeparrot_training - Step 41036: {'lr': 0.0004189740492892268, 'samples': 21010944, 'steps': 41036, 'loss/train': 1.5945695638656616} +03/05/2022 13:42:44 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/05/2022 13:42:48 - INFO - codeparrot_training - Step 41037: {'lr': 0.0004189701381973853, 'samples': 21011456, 'steps': 41037, 'loss/train': 1.605156660079956} +03/05/2022 13:42:51 - INFO - codeparrot_training - Step 41038: {'lr': 0.00041896622702940846, 'samples': 21011968, 'steps': 41038, 'loss/train': 0.2411782592535019} +03/05/2022 13:42:54 - INFO - codeparrot_training - Skipping example with length 936 (seq_length=1024) +03/05/2022 13:42:57 - INFO - codeparrot_training - Step 41039: {'lr': 0.0004189623157852981, 'samples': 21012480, 'steps': 41039, 'loss/train': 2.3039276599884033} +03/05/2022 13:43:00 - INFO - codeparrot_training - Step 41040: {'lr': 0.0004189584044650559, 'samples': 21012992, 'steps': 41040, 'loss/train': 2.154139280319214} +03/05/2022 13:43:03 - INFO - codeparrot_training - Skipping example with length 623 (seq_length=1024) +03/05/2022 13:43:05 - INFO - codeparrot_training - Step 41041: {'lr': 0.0004189544930686837, 'samples': 21013504, 'steps': 41041, 'loss/train': 1.8504512310028076} +03/05/2022 13:43:09 - INFO - codeparrot_training - Step 41042: {'lr': 0.0004189505815961831, 'samples': 21014016, 'steps': 41042, 'loss/train': 2.499389886856079} +03/05/2022 13:43:12 - INFO - codeparrot_training - Step 41043: {'lr': 0.000418946670047556, 'samples': 21014528, 'steps': 41043, 'loss/train': 2.3691928386688232} +03/05/2022 13:43:12 - INFO - codeparrot_training - Skipping example with length 879 (seq_length=1024) +03/05/2022 13:43:17 - INFO - codeparrot_training - Step 41044: {'lr': 0.0004189427584228042, 'samples': 21015040, 'steps': 41044, 'loss/train': 2.185183048248291} +03/05/2022 13:43:20 - INFO - codeparrot_training - Step 41045: {'lr': 0.0004189388467219294, 'samples': 21015552, 'steps': 41045, 'loss/train': 1.5055705308914185} +03/05/2022 13:43:20 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/05/2022 13:43:25 - INFO - codeparrot_training - Step 41046: {'lr': 0.0004189349349449333, 'samples': 21016064, 'steps': 41046, 'loss/train': 1.6246016025543213} +03/05/2022 13:43:29 - INFO - codeparrot_training - Step 41047: {'lr': 0.00041893102309181773, 'samples': 21016576, 'steps': 41047, 'loss/train': 1.6436207294464111} +03/05/2022 13:43:29 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/05/2022 13:43:34 - INFO - codeparrot_training - Step 41048: {'lr': 0.00041892711116258454, 'samples': 21017088, 'steps': 41048, 'loss/train': 1.7194164991378784} +03/05/2022 13:43:37 - INFO - codeparrot_training - Step 41049: {'lr': 0.00041892319915723533, 'samples': 21017600, 'steps': 41049, 'loss/train': 1.5419973134994507} +03/05/2022 13:43:37 - INFO - codeparrot_training - Skipping example with length 434 (seq_length=1024) +03/05/2022 13:43:42 - INFO - codeparrot_training - Step 41050: {'lr': 0.0004189192870757719, 'samples': 21018112, 'steps': 41050, 'loss/train': 1.261130928993225} +03/05/2022 13:43:46 - INFO - codeparrot_training - Step 41051: {'lr': 0.0004189153749181961, 'samples': 21018624, 'steps': 41051, 'loss/train': 1.9548683166503906} +03/05/2022 13:43:46 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/05/2022 13:43:51 - INFO - codeparrot_training - Step 41052: {'lr': 0.00041891146268450963, 'samples': 21019136, 'steps': 41052, 'loss/train': 0.46127182245254517} +03/05/2022 13:43:54 - INFO - codeparrot_training - Step 41053: {'lr': 0.0004189075503747142, 'samples': 21019648, 'steps': 41053, 'loss/train': 2.3016364574432373} +03/05/2022 13:43:54 - INFO - codeparrot_training - Skipping example with length 536 (seq_length=1024) +03/05/2022 13:43:59 - INFO - codeparrot_training - Step 41054: {'lr': 0.0004189036379888117, 'samples': 21020160, 'steps': 41054, 'loss/train': 1.670820951461792} +03/05/2022 13:44:02 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/05/2022 13:44:05 - INFO - codeparrot_training - Step 41055: {'lr': 0.00041889972552680387, 'samples': 21020672, 'steps': 41055, 'loss/train': 1.149959683418274} +03/05/2022 13:44:08 - INFO - codeparrot_training - Step 41056: {'lr': 0.0004188958129886924, 'samples': 21021184, 'steps': 41056, 'loss/train': 1.500604271888733} +03/05/2022 13:44:11 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/05/2022 13:44:13 - INFO - codeparrot_training - Step 41057: {'lr': 0.000418891900374479, 'samples': 21021696, 'steps': 41057, 'loss/train': 1.4841886758804321} +03/05/2022 13:44:16 - INFO - codeparrot_training - Step 41058: {'lr': 0.0004188879876841656, 'samples': 21022208, 'steps': 41058, 'loss/train': 1.8293884992599487} +03/05/2022 13:44:19 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/05/2022 13:44:21 - INFO - codeparrot_training - Step 41059: {'lr': 0.0004188840749177538, 'samples': 21022720, 'steps': 41059, 'loss/train': 1.3883912563323975} +03/05/2022 13:44:25 - INFO - codeparrot_training - Step 41060: {'lr': 0.0004188801620752455, 'samples': 21023232, 'steps': 41060, 'loss/train': 0.4288281798362732} +03/05/2022 13:44:27 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/05/2022 13:44:30 - INFO - codeparrot_training - Step 41061: {'lr': 0.00041887624915664247, 'samples': 21023744, 'steps': 41061, 'loss/train': 1.8984296321868896} +03/05/2022 13:44:33 - INFO - codeparrot_training - Step 41062: {'lr': 0.0004188723361619463, 'samples': 21024256, 'steps': 41062, 'loss/train': 1.6621243953704834} +03/05/2022 13:44:36 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/05/2022 13:44:38 - INFO - codeparrot_training - Step 41063: {'lr': 0.0004188684230911589, 'samples': 21024768, 'steps': 41063, 'loss/train': 0.9106316566467285} +03/05/2022 13:44:41 - INFO - codeparrot_training - Step 41064: {'lr': 0.00041886450994428197, 'samples': 21025280, 'steps': 41064, 'loss/train': 1.8130619525909424} +03/05/2022 13:44:44 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/05/2022 13:44:47 - INFO - codeparrot_training - Step 41065: {'lr': 0.0004188605967213174, 'samples': 21025792, 'steps': 41065, 'loss/train': 0.6807711720466614} +03/05/2022 13:44:50 - INFO - codeparrot_training - Step 41066: {'lr': 0.0004188566834222667, 'samples': 21026304, 'steps': 41066, 'loss/train': 1.2132093906402588} +03/05/2022 13:44:52 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/05/2022 13:44:55 - INFO - codeparrot_training - Step 41067: {'lr': 0.00041885277004713185, 'samples': 21026816, 'steps': 41067, 'loss/train': 1.5936646461486816} +03/05/2022 13:44:58 - INFO - codeparrot_training - Step 41068: {'lr': 0.0004188488565959146, 'samples': 21027328, 'steps': 41068, 'loss/train': 1.8115161657333374} +03/05/2022 13:45:01 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/05/2022 13:45:04 - INFO - codeparrot_training - Step 41069: {'lr': 0.0004188449430686166, 'samples': 21027840, 'steps': 41069, 'loss/train': 1.6091536283493042} +03/05/2022 13:45:07 - INFO - codeparrot_training - Step 41070: {'lr': 0.00041884102946523964, 'samples': 21028352, 'steps': 41070, 'loss/train': 1.4992191791534424} +03/05/2022 13:45:09 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/05/2022 13:45:12 - INFO - codeparrot_training - Step 41071: {'lr': 0.0004188371157857856, 'samples': 21028864, 'steps': 41071, 'loss/train': 1.2206181287765503} +03/05/2022 13:45:15 - INFO - codeparrot_training - Step 41072: {'lr': 0.0004188332020302561, 'samples': 21029376, 'steps': 41072, 'loss/train': 1.7516181468963623} +03/05/2022 13:45:17 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/05/2022 13:45:20 - INFO - codeparrot_training - Step 41073: {'lr': 0.000418829288198653, 'samples': 21029888, 'steps': 41073, 'loss/train': 1.7559072971343994} +03/05/2022 13:45:24 - INFO - codeparrot_training - Step 41074: {'lr': 0.00041882537429097804, 'samples': 21030400, 'steps': 41074, 'loss/train': 1.6876798868179321} +03/05/2022 13:45:25 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/05/2022 13:45:29 - INFO - codeparrot_training - Step 41075: {'lr': 0.00041882146030723297, 'samples': 21030912, 'steps': 41075, 'loss/train': 2.428175687789917} +03/05/2022 13:45:32 - INFO - codeparrot_training - Step 41076: {'lr': 0.0004188175462474195, 'samples': 21031424, 'steps': 41076, 'loss/train': 1.1477277278900146} +03/05/2022 13:45:34 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/05/2022 13:45:37 - INFO - codeparrot_training - Step 41077: {'lr': 0.0004188136321115395, 'samples': 21031936, 'steps': 41077, 'loss/train': 1.3877673149108887} +03/05/2022 13:45:40 - INFO - codeparrot_training - Step 41078: {'lr': 0.00041880971789959466, 'samples': 21032448, 'steps': 41078, 'loss/train': 1.7370747327804565} +03/05/2022 13:45:43 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/05/2022 13:45:46 - INFO - codeparrot_training - Step 41079: {'lr': 0.0004188058036115868, 'samples': 21032960, 'steps': 41079, 'loss/train': 2.0579209327697754} +03/05/2022 13:45:49 - INFO - codeparrot_training - Step 41080: {'lr': 0.0004188018892475176, 'samples': 21033472, 'steps': 41080, 'loss/train': 1.8121000528335571} +03/05/2022 13:45:51 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/05/2022 13:45:54 - INFO - codeparrot_training - Step 41081: {'lr': 0.0004187979748073889, 'samples': 21033984, 'steps': 41081, 'loss/train': 1.2361116409301758} +03/05/2022 13:45:57 - INFO - codeparrot_training - Step 41082: {'lr': 0.0004187940602912024, 'samples': 21034496, 'steps': 41082, 'loss/train': 1.5722579956054688} +03/05/2022 13:45:59 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/05/2022 13:46:03 - INFO - codeparrot_training - Step 41083: {'lr': 0.00041879014569895994, 'samples': 21035008, 'steps': 41083, 'loss/train': 0.3866863250732422} +03/05/2022 13:46:06 - INFO - codeparrot_training - Step 41084: {'lr': 0.0004187862310306633, 'samples': 21035520, 'steps': 41084, 'loss/train': 1.1784895658493042} +03/05/2022 13:46:08 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/05/2022 13:46:11 - INFO - codeparrot_training - Step 41085: {'lr': 0.00041878231628631406, 'samples': 21036032, 'steps': 41085, 'loss/train': 2.5081589221954346} +03/05/2022 13:46:14 - INFO - codeparrot_training - Step 41086: {'lr': 0.0004187784014659142, 'samples': 21036544, 'steps': 41086, 'loss/train': 1.7863125801086426} +03/05/2022 13:46:16 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/05/2022 13:46:20 - INFO - codeparrot_training - Step 41087: {'lr': 0.0004187744865694654, 'samples': 21037056, 'steps': 41087, 'loss/train': 1.8587837219238281} +03/05/2022 13:46:23 - INFO - codeparrot_training - Step 41088: {'lr': 0.0004187705715969694, 'samples': 21037568, 'steps': 41088, 'loss/train': 2.090730667114258} +03/05/2022 13:46:24 - INFO - codeparrot_training - Skipping example with length 345 (seq_length=1024) +03/05/2022 13:46:28 - INFO - codeparrot_training - Step 41089: {'lr': 0.0004187666565484279, 'samples': 21038080, 'steps': 41089, 'loss/train': 1.724141240119934} +03/05/2022 13:46:31 - INFO - codeparrot_training - Step 41090: {'lr': 0.0004187627414238428, 'samples': 21038592, 'steps': 41090, 'loss/train': 0.9857847690582275} +03/05/2022 13:46:33 - INFO - codeparrot_training - Skipping example with length 142 (seq_length=1024) +03/05/2022 13:46:36 - INFO - codeparrot_training - Step 41091: {'lr': 0.0004187588262232159, 'samples': 21039104, 'steps': 41091, 'loss/train': 1.4247395992279053} +03/05/2022 13:46:40 - INFO - codeparrot_training - Step 41092: {'lr': 0.00041875491094654885, 'samples': 21039616, 'steps': 41092, 'loss/train': 1.6186214685440063} +03/05/2022 13:46:41 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/05/2022 13:46:45 - INFO - codeparrot_training - Step 41093: {'lr': 0.0004187509955938434, 'samples': 21040128, 'steps': 41093, 'loss/train': 2.1064372062683105} +03/05/2022 13:46:48 - INFO - codeparrot_training - Step 41094: {'lr': 0.0004187470801651013, 'samples': 21040640, 'steps': 41094, 'loss/train': 1.3506468534469604} +03/05/2022 13:46:50 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/05/2022 13:46:54 - INFO - codeparrot_training - Step 41095: {'lr': 0.0004187431646603245, 'samples': 21041152, 'steps': 41095, 'loss/train': 2.1444263458251953} +03/05/2022 13:46:57 - INFO - codeparrot_training - Step 41096: {'lr': 0.0004187392490795146, 'samples': 21041664, 'steps': 41096, 'loss/train': 1.2249301671981812} +03/05/2022 13:46:59 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/05/2022 13:47:02 - INFO - codeparrot_training - Step 41097: {'lr': 0.00041873533342267336, 'samples': 21042176, 'steps': 41097, 'loss/train': 1.4259041547775269} +03/05/2022 13:47:05 - INFO - codeparrot_training - Step 41098: {'lr': 0.0004187314176898026, 'samples': 21042688, 'steps': 41098, 'loss/train': 1.2251845598220825} +03/05/2022 13:47:08 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/05/2022 13:47:11 - INFO - codeparrot_training - Step 41099: {'lr': 0.000418727501880904, 'samples': 21043200, 'steps': 41099, 'loss/train': 1.470958948135376} +03/05/2022 13:47:14 - INFO - codeparrot_training - Step 41100: {'lr': 0.00041872358599597947, 'samples': 21043712, 'steps': 41100, 'loss/train': 2.103548288345337} +03/05/2022 13:47:17 - INFO - codeparrot_training - Step 41101: {'lr': 0.00041871967003503073, 'samples': 21044224, 'steps': 41101, 'loss/train': 1.7502964735031128} +03/05/2022 13:47:17 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/05/2022 13:47:22 - INFO - codeparrot_training - Step 41102: {'lr': 0.00041871575399805947, 'samples': 21044736, 'steps': 41102, 'loss/train': 1.6916648149490356} +03/05/2022 13:47:26 - INFO - codeparrot_training - Step 41103: {'lr': 0.0004187118378850674, 'samples': 21045248, 'steps': 41103, 'loss/train': 1.1363284587860107} +03/05/2022 13:47:26 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/05/2022 13:47:31 - INFO - codeparrot_training - Step 41104: {'lr': 0.00041870792169605654, 'samples': 21045760, 'steps': 41104, 'loss/train': 1.6929562091827393} +03/05/2022 13:47:34 - INFO - codeparrot_training - Step 41105: {'lr': 0.0004187040054310284, 'samples': 21046272, 'steps': 41105, 'loss/train': 1.268493413925171} +03/05/2022 13:47:34 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/05/2022 13:47:39 - INFO - codeparrot_training - Step 41106: {'lr': 0.0004187000890899848, 'samples': 21046784, 'steps': 41106, 'loss/train': 1.3727134466171265} +03/05/2022 13:47:42 - INFO - codeparrot_training - Step 41107: {'lr': 0.0004186961726729276, 'samples': 21047296, 'steps': 41107, 'loss/train': 2.0852911472320557} +03/05/2022 13:47:42 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/05/2022 13:47:48 - INFO - codeparrot_training - Step 41108: {'lr': 0.0004186922561798585, 'samples': 21047808, 'steps': 41108, 'loss/train': 1.6850175857543945} +03/05/2022 13:47:51 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/05/2022 13:47:53 - INFO - codeparrot_training - Step 41109: {'lr': 0.00041868833961077935, 'samples': 21048320, 'steps': 41109, 'loss/train': 1.2906277179718018} +03/05/2022 13:47:56 - INFO - codeparrot_training - Step 41110: {'lr': 0.0004186844229656917, 'samples': 21048832, 'steps': 41110, 'loss/train': 2.163666248321533} +03/05/2022 13:47:59 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/05/2022 13:48:01 - INFO - codeparrot_training - Step 41111: {'lr': 0.0004186805062445975, 'samples': 21049344, 'steps': 41111, 'loss/train': 1.5065124034881592} +03/05/2022 13:48:05 - INFO - codeparrot_training - Step 41112: {'lr': 0.00041867658944749856, 'samples': 21049856, 'steps': 41112, 'loss/train': 2.1178276538848877} +03/05/2022 13:48:07 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/05/2022 13:48:10 - INFO - codeparrot_training - Step 41113: {'lr': 0.00041867267257439644, 'samples': 21050368, 'steps': 41113, 'loss/train': 2.029750347137451} +03/05/2022 13:48:13 - INFO - codeparrot_training - Step 41114: {'lr': 0.00041866875562529305, 'samples': 21050880, 'steps': 41114, 'loss/train': 2.126662492752075} +03/05/2022 13:48:16 - INFO - codeparrot_training - Skipping example with length 335 (seq_length=1024) +03/05/2022 13:48:18 - INFO - codeparrot_training - Step 41115: {'lr': 0.0004186648386001901, 'samples': 21051392, 'steps': 41115, 'loss/train': 1.650421380996704} +03/05/2022 13:48:21 - INFO - codeparrot_training - Step 41116: {'lr': 0.0004186609214990894, 'samples': 21051904, 'steps': 41116, 'loss/train': 1.5452206134796143} +03/05/2022 13:48:24 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/05/2022 13:48:27 - INFO - codeparrot_training - Step 41117: {'lr': 0.0004186570043219927, 'samples': 21052416, 'steps': 41117, 'loss/train': 1.725466012954712} +03/05/2022 13:48:30 - INFO - codeparrot_training - Step 41118: {'lr': 0.0004186530870689017, 'samples': 21052928, 'steps': 41118, 'loss/train': 0.2288942039012909} +03/05/2022 13:48:33 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/05/2022 13:48:35 - INFO - codeparrot_training - Step 41119: {'lr': 0.00041864916973981833, 'samples': 21053440, 'steps': 41119, 'loss/train': 2.752560615539551} +03/05/2022 13:48:38 - INFO - codeparrot_training - Step 41120: {'lr': 0.0004186452523347442, 'samples': 21053952, 'steps': 41120, 'loss/train': 1.6091042757034302} +03/05/2022 13:48:41 - INFO - codeparrot_training - Skipping example with length 152 (seq_length=1024) +03/05/2022 13:48:44 - INFO - codeparrot_training - Step 41121: {'lr': 0.00041864133485368106, 'samples': 21054464, 'steps': 41121, 'loss/train': 1.6022865772247314} +03/05/2022 13:48:47 - INFO - codeparrot_training - Step 41122: {'lr': 0.0004186374172966308, 'samples': 21054976, 'steps': 41122, 'loss/train': 1.3499544858932495} +03/05/2022 13:48:49 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/05/2022 13:48:52 - INFO - codeparrot_training - Step 41123: {'lr': 0.0004186334996635951, 'samples': 21055488, 'steps': 41123, 'loss/train': 0.8171908259391785} +03/05/2022 13:48:55 - INFO - codeparrot_training - Step 41124: {'lr': 0.00041862958195457574, 'samples': 21056000, 'steps': 41124, 'loss/train': 1.62832510471344} +03/05/2022 13:48:58 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/05/2022 13:49:01 - INFO - codeparrot_training - Step 41125: {'lr': 0.0004186256641695745, 'samples': 21056512, 'steps': 41125, 'loss/train': 0.9367873668670654} +03/05/2022 13:49:04 - INFO - codeparrot_training - Step 41126: {'lr': 0.00041862174630859315, 'samples': 21057024, 'steps': 41126, 'loss/train': 1.959223747253418} +03/05/2022 13:49:07 - INFO - codeparrot_training - Step 41127: {'lr': 0.0004186178283716334, 'samples': 21057536, 'steps': 41127, 'loss/train': 0.9000644683837891} +03/05/2022 13:49:07 - INFO - codeparrot_training - Skipping example with length 201 (seq_length=1024) +03/05/2022 13:49:12 - INFO - codeparrot_training - Step 41128: {'lr': 0.0004186139103586971, 'samples': 21058048, 'steps': 41128, 'loss/train': 3.513206720352173} +03/05/2022 13:49:16 - INFO - codeparrot_training - Step 41129: {'lr': 0.00041860999226978605, 'samples': 21058560, 'steps': 41129, 'loss/train': 0.5529110431671143} +03/05/2022 13:49:16 - INFO - codeparrot_training - Skipping example with length 975 (seq_length=1024) +03/05/2022 13:49:21 - INFO - codeparrot_training - Step 41130: {'lr': 0.0004186060741049018, 'samples': 21059072, 'steps': 41130, 'loss/train': 2.1105990409851074} +03/05/2022 13:49:24 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/05/2022 13:49:26 - INFO - codeparrot_training - Step 41131: {'lr': 0.00041860215586404624, 'samples': 21059584, 'steps': 41131, 'loss/train': 0.7395280003547668} +03/05/2022 13:49:29 - INFO - codeparrot_training - Step 41132: {'lr': 0.00041859823754722127, 'samples': 21060096, 'steps': 41132, 'loss/train': 2.056844711303711} +03/05/2022 13:49:32 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/05/2022 13:49:35 - INFO - codeparrot_training - Step 41133: {'lr': 0.00041859431915442847, 'samples': 21060608, 'steps': 41133, 'loss/train': 1.7224339246749878} +03/05/2022 13:49:38 - INFO - codeparrot_training - Step 41134: {'lr': 0.0004185904006856697, 'samples': 21061120, 'steps': 41134, 'loss/train': 0.9022955894470215} +03/05/2022 13:49:40 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/05/2022 13:49:43 - INFO - codeparrot_training - Step 41135: {'lr': 0.0004185864821409467, 'samples': 21061632, 'steps': 41135, 'loss/train': 1.5830967426300049} +03/05/2022 13:49:46 - INFO - codeparrot_training - Step 41136: {'lr': 0.00041858256352026124, 'samples': 21062144, 'steps': 41136, 'loss/train': 1.231931209564209} +03/05/2022 13:49:50 - INFO - codeparrot_training - Step 41137: {'lr': 0.0004185786448236151, 'samples': 21062656, 'steps': 41137, 'loss/train': 2.28576397895813} +03/05/2022 13:49:55 - INFO - codeparrot_training - Step 41138: {'lr': 0.0004185747260510099, 'samples': 21063168, 'steps': 41138, 'loss/train': 1.1137070655822754} +03/05/2022 13:49:58 - INFO - codeparrot_training - Step 41139: {'lr': 0.0004185708072024476, 'samples': 21063680, 'steps': 41139, 'loss/train': 1.7347255945205688} +03/05/2022 13:49:59 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/05/2022 13:50:03 - INFO - codeparrot_training - Step 41140: {'lr': 0.0004185668882779299, 'samples': 21064192, 'steps': 41140, 'loss/train': 1.5595333576202393} +03/05/2022 13:50:06 - INFO - codeparrot_training - Step 41141: {'lr': 0.00041856296927745857, 'samples': 21064704, 'steps': 41141, 'loss/train': 1.7757669687271118} +03/05/2022 13:50:07 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/05/2022 13:50:12 - INFO - codeparrot_training - Step 41142: {'lr': 0.00041855905020103543, 'samples': 21065216, 'steps': 41142, 'loss/train': 1.018153429031372} +03/05/2022 13:50:15 - INFO - codeparrot_training - Step 41143: {'lr': 0.00041855513104866203, 'samples': 21065728, 'steps': 41143, 'loss/train': 1.8286634683609009} +03/05/2022 13:50:15 - INFO - codeparrot_training - Skipping example with length 355 (seq_length=1024) +03/05/2022 13:50:20 - INFO - codeparrot_training - Step 41144: {'lr': 0.00041855121182034037, 'samples': 21066240, 'steps': 41144, 'loss/train': 1.716732144355774} +03/05/2022 13:50:23 - INFO - codeparrot_training - Step 41145: {'lr': 0.00041854729251607214, 'samples': 21066752, 'steps': 41145, 'loss/train': 2.0188302993774414} +03/05/2022 13:50:25 - INFO - codeparrot_training - Skipping example with length 702 (seq_length=1024) +03/05/2022 13:50:29 - INFO - codeparrot_training - Step 41146: {'lr': 0.00041854337313585913, 'samples': 21067264, 'steps': 41146, 'loss/train': 1.2001093626022339} +03/05/2022 13:50:32 - INFO - codeparrot_training - Step 41147: {'lr': 0.000418539453679703, 'samples': 21067776, 'steps': 41147, 'loss/train': 2.3629586696624756} +03/05/2022 13:50:33 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/05/2022 13:50:37 - INFO - codeparrot_training - Step 41148: {'lr': 0.0004185355341476057, 'samples': 21068288, 'steps': 41148, 'loss/train': 1.9879628419876099} +03/05/2022 13:50:40 - INFO - codeparrot_training - Step 41149: {'lr': 0.00041853161453956885, 'samples': 21068800, 'steps': 41149, 'loss/train': 1.8345636129379272} +03/05/2022 13:50:41 - INFO - codeparrot_training - Skipping example with length 45 (seq_length=1024) +03/05/2022 13:50:46 - INFO - codeparrot_training - Step 41150: {'lr': 0.0004185276948555942, 'samples': 21069312, 'steps': 41150, 'loss/train': 1.3249355554580688} +03/05/2022 13:50:49 - INFO - codeparrot_training - Step 41151: {'lr': 0.0004185237750956836, 'samples': 21069824, 'steps': 41151, 'loss/train': 1.3783522844314575} +03/05/2022 13:50:50 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/05/2022 13:50:54 - INFO - codeparrot_training - Step 41152: {'lr': 0.0004185198552598388, 'samples': 21070336, 'steps': 41152, 'loss/train': 1.0323559045791626} +03/05/2022 13:50:57 - INFO - codeparrot_training - Step 41153: {'lr': 0.00041851593534806154, 'samples': 21070848, 'steps': 41153, 'loss/train': 1.7826143503189087} +03/05/2022 13:50:58 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/05/2022 13:51:03 - INFO - codeparrot_training - Step 41154: {'lr': 0.0004185120153603536, 'samples': 21071360, 'steps': 41154, 'loss/train': 2.2439186573028564} +03/05/2022 13:51:06 - INFO - codeparrot_training - Step 41155: {'lr': 0.0004185080952967168, 'samples': 21071872, 'steps': 41155, 'loss/train': 2.7682158946990967} +03/05/2022 13:51:07 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/05/2022 13:51:11 - INFO - codeparrot_training - Step 41156: {'lr': 0.00041850417515715277, 'samples': 21072384, 'steps': 41156, 'loss/train': 2.0421481132507324} +03/05/2022 13:51:14 - INFO - codeparrot_training - Step 41157: {'lr': 0.00041850025494166346, 'samples': 21072896, 'steps': 41157, 'loss/train': 2.000418186187744} +03/05/2022 13:51:15 - INFO - codeparrot_training - Skipping example with length 701 (seq_length=1024) +03/05/2022 13:51:19 - INFO - codeparrot_training - Step 41158: {'lr': 0.0004184963346502504, 'samples': 21073408, 'steps': 41158, 'loss/train': 0.8653706908226013} +03/05/2022 13:51:23 - INFO - codeparrot_training - Step 41159: {'lr': 0.00041849241428291555, 'samples': 21073920, 'steps': 41159, 'loss/train': 1.4489761590957642} +03/05/2022 13:51:23 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/05/2022 13:51:28 - INFO - codeparrot_training - Step 41160: {'lr': 0.00041848849383966063, 'samples': 21074432, 'steps': 41160, 'loss/train': 2.005323886871338} +03/05/2022 13:51:31 - INFO - codeparrot_training - Step 41161: {'lr': 0.0004184845733204874, 'samples': 21074944, 'steps': 41161, 'loss/train': 1.868302822113037} +03/05/2022 13:51:32 - INFO - codeparrot_training - Skipping example with length 789 (seq_length=1024) +03/05/2022 13:51:36 - INFO - codeparrot_training - Step 41162: {'lr': 0.00041848065272539765, 'samples': 21075456, 'steps': 41162, 'loss/train': 1.3440266847610474} +03/05/2022 13:51:40 - INFO - codeparrot_training - Step 41163: {'lr': 0.00041847673205439305, 'samples': 21075968, 'steps': 41163, 'loss/train': 1.754974603652954} +03/05/2022 13:51:40 - INFO - codeparrot_training - Skipping example with length 355 (seq_length=1024) +03/05/2022 13:51:45 - INFO - codeparrot_training - Step 41164: {'lr': 0.0004184728113074755, 'samples': 21076480, 'steps': 41164, 'loss/train': 1.8097138404846191} +03/05/2022 13:51:48 - INFO - codeparrot_training - Step 41165: {'lr': 0.00041846889048464665, 'samples': 21076992, 'steps': 41165, 'loss/train': 1.2375239133834839} +03/05/2022 13:51:49 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/05/2022 13:51:53 - INFO - codeparrot_training - Step 41166: {'lr': 0.0004184649695859083, 'samples': 21077504, 'steps': 41166, 'loss/train': 1.6322208642959595} +03/05/2022 13:51:56 - INFO - codeparrot_training - Step 41167: {'lr': 0.00041846104861126233, 'samples': 21078016, 'steps': 41167, 'loss/train': 1.8964475393295288} +03/05/2022 13:51:57 - INFO - codeparrot_training - Skipping example with length 135 (seq_length=1024) +03/05/2022 13:52:02 - INFO - codeparrot_training - Step 41168: {'lr': 0.0004184571275607103, 'samples': 21078528, 'steps': 41168, 'loss/train': 1.865138053894043} +03/05/2022 13:52:05 - INFO - codeparrot_training - Step 41169: {'lr': 0.0004184532064342542, 'samples': 21079040, 'steps': 41169, 'loss/train': 2.323068857192993} +03/05/2022 13:52:06 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/05/2022 13:52:10 - INFO - codeparrot_training - Step 41170: {'lr': 0.0004184492852318956, 'samples': 21079552, 'steps': 41170, 'loss/train': 1.8194564580917358} +03/05/2022 13:52:13 - INFO - codeparrot_training - Step 41171: {'lr': 0.00041844536395363636, 'samples': 21080064, 'steps': 41171, 'loss/train': 1.66019606590271} +03/05/2022 13:52:14 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/05/2022 13:52:19 - INFO - codeparrot_training - Step 41172: {'lr': 0.00041844144259947825, 'samples': 21080576, 'steps': 41172, 'loss/train': 1.712084412574768} +03/05/2022 13:52:22 - INFO - codeparrot_training - Step 41173: {'lr': 0.000418437521169423, 'samples': 21081088, 'steps': 41173, 'loss/train': 1.9518848657608032} +03/05/2022 13:52:23 - INFO - codeparrot_training - Skipping example with length 926 (seq_length=1024) +03/05/2022 13:52:27 - INFO - codeparrot_training - Step 41174: {'lr': 0.0004184335996634725, 'samples': 21081600, 'steps': 41174, 'loss/train': 1.5979403257369995} +03/05/2022 13:52:30 - INFO - codeparrot_training - Step 41175: {'lr': 0.00041842967808162834, 'samples': 21082112, 'steps': 41175, 'loss/train': 1.5912139415740967} +03/05/2022 13:52:31 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/05/2022 13:52:35 - INFO - codeparrot_training - Step 41176: {'lr': 0.0004184257564238924, 'samples': 21082624, 'steps': 41176, 'loss/train': 1.5366384983062744} +03/05/2022 13:52:39 - INFO - codeparrot_training - Step 41177: {'lr': 0.0004184218346902663, 'samples': 21083136, 'steps': 41177, 'loss/train': 2.2308242321014404} +03/05/2022 13:52:39 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/05/2022 13:52:44 - INFO - codeparrot_training - Step 41178: {'lr': 0.00041841791288075203, 'samples': 21083648, 'steps': 41178, 'loss/train': 0.6569976806640625} +03/05/2022 13:52:47 - INFO - codeparrot_training - Step 41179: {'lr': 0.0004184139909953513, 'samples': 21084160, 'steps': 41179, 'loss/train': 2.3749375343322754} +03/05/2022 13:52:48 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/05/2022 13:52:52 - INFO - codeparrot_training - Step 41180: {'lr': 0.0004184100690340657, 'samples': 21084672, 'steps': 41180, 'loss/train': 1.403815507888794} +03/05/2022 13:52:55 - INFO - codeparrot_training - Step 41181: {'lr': 0.00041840614699689715, 'samples': 21085184, 'steps': 41181, 'loss/train': 2.528808832168579} +03/05/2022 13:52:56 - INFO - codeparrot_training - Skipping example with length 447 (seq_length=1024) +03/05/2022 13:53:01 - INFO - codeparrot_training - Step 41182: {'lr': 0.00041840222488384745, 'samples': 21085696, 'steps': 41182, 'loss/train': 1.4024723768234253} +03/05/2022 13:53:04 - INFO - codeparrot_training - Step 41183: {'lr': 0.00041839830269491823, 'samples': 21086208, 'steps': 41183, 'loss/train': 1.2699087858200073} +03/05/2022 13:53:04 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/05/2022 13:53:09 - INFO - codeparrot_training - Step 41184: {'lr': 0.0004183943804301114, 'samples': 21086720, 'steps': 41184, 'loss/train': 1.3773987293243408} +03/05/2022 13:53:12 - INFO - codeparrot_training - Step 41185: {'lr': 0.0004183904580894287, 'samples': 21087232, 'steps': 41185, 'loss/train': 1.5106024742126465} +03/05/2022 13:53:13 - INFO - codeparrot_training - Skipping example with length 142 (seq_length=1024) +03/05/2022 13:53:18 - INFO - codeparrot_training - Step 41186: {'lr': 0.0004183865356728717, 'samples': 21087744, 'steps': 41186, 'loss/train': 1.817452311515808} +03/05/2022 13:53:21 - INFO - codeparrot_training - Step 41187: {'lr': 0.0004183826131804424, 'samples': 21088256, 'steps': 41187, 'loss/train': 1.856398344039917} +03/05/2022 13:53:21 - INFO - codeparrot_training - Skipping example with length 845 (seq_length=1024) +03/05/2022 13:53:26 - INFO - codeparrot_training - Step 41188: {'lr': 0.0004183786906121425, 'samples': 21088768, 'steps': 41188, 'loss/train': 1.7994089126586914} +03/05/2022 13:53:29 - INFO - codeparrot_training - Step 41189: {'lr': 0.0004183747679679738, 'samples': 21089280, 'steps': 41189, 'loss/train': 1.376569390296936} +03/05/2022 13:53:30 - INFO - codeparrot_training - Skipping example with length 716 (seq_length=1024) +03/05/2022 13:53:35 - INFO - codeparrot_training - Step 41190: {'lr': 0.000418370845247938, 'samples': 21089792, 'steps': 41190, 'loss/train': 2.3518338203430176} +03/05/2022 13:53:38 - INFO - codeparrot_training - Step 41191: {'lr': 0.0004183669224520369, 'samples': 21090304, 'steps': 41191, 'loss/train': 1.971714735031128} +03/05/2022 13:53:39 - INFO - codeparrot_training - Skipping example with length 770 (seq_length=1024) +03/05/2022 13:53:43 - INFO - codeparrot_training - Step 41192: {'lr': 0.00041836299958027226, 'samples': 21090816, 'steps': 41192, 'loss/train': 2.3321354389190674} +03/05/2022 13:53:47 - INFO - codeparrot_training - Step 41193: {'lr': 0.00041835907663264585, 'samples': 21091328, 'steps': 41193, 'loss/train': 2.0686402320861816} +03/05/2022 13:53:52 - INFO - codeparrot_training - Step 41194: {'lr': 0.0004183551536091594, 'samples': 21091840, 'steps': 41194, 'loss/train': 1.8655033111572266} +03/05/2022 13:53:55 - INFO - codeparrot_training - Step 41195: {'lr': 0.00041835123050981476, 'samples': 21092352, 'steps': 41195, 'loss/train': 2.6990668773651123} +03/05/2022 13:53:56 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/05/2022 13:54:00 - INFO - codeparrot_training - Step 41196: {'lr': 0.00041834730733461366, 'samples': 21092864, 'steps': 41196, 'loss/train': 1.1371666193008423} +03/05/2022 13:54:04 - INFO - codeparrot_training - Step 41197: {'lr': 0.0004183433840835578, 'samples': 21093376, 'steps': 41197, 'loss/train': 1.8363350629806519} +03/05/2022 13:54:05 - INFO - codeparrot_training - Skipping example with length 919 (seq_length=1024) +03/05/2022 13:54:09 - INFO - codeparrot_training - Step 41198: {'lr': 0.0004183394607566491, 'samples': 21093888, 'steps': 41198, 'loss/train': 1.2701176404953003} +03/05/2022 13:54:12 - INFO - codeparrot_training - Step 41199: {'lr': 0.0004183355373538892, 'samples': 21094400, 'steps': 41199, 'loss/train': 1.422743558883667} +03/05/2022 13:54:13 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/05/2022 13:54:17 - INFO - codeparrot_training - Step 41200: {'lr': 0.00041833161387527985, 'samples': 21094912, 'steps': 41200, 'loss/train': 1.5286613702774048} +03/05/2022 13:54:20 - INFO - codeparrot_training - Step 41201: {'lr': 0.0004183276903208228, 'samples': 21095424, 'steps': 41201, 'loss/train': 1.5519806146621704} +03/05/2022 13:54:22 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/05/2022 13:54:26 - INFO - codeparrot_training - Step 41202: {'lr': 0.0004183237666905201, 'samples': 21095936, 'steps': 41202, 'loss/train': 1.311574935913086} +03/05/2022 13:54:29 - INFO - codeparrot_training - Step 41203: {'lr': 0.0004183198429843732, 'samples': 21096448, 'steps': 41203, 'loss/train': 0.9975860118865967} +03/05/2022 13:54:30 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/05/2022 13:54:34 - INFO - codeparrot_training - Step 41204: {'lr': 0.00041831591920238396, 'samples': 21096960, 'steps': 41204, 'loss/train': 0.8370639681816101} +03/05/2022 13:54:37 - INFO - codeparrot_training - Step 41205: {'lr': 0.0004183119953445542, 'samples': 21097472, 'steps': 41205, 'loss/train': 2.3062281608581543} +03/05/2022 13:54:38 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/05/2022 13:54:43 - INFO - codeparrot_training - Step 41206: {'lr': 0.00041830807141088566, 'samples': 21097984, 'steps': 41206, 'loss/train': 1.3624444007873535} +03/05/2022 13:54:46 - INFO - codeparrot_training - Step 41207: {'lr': 0.0004183041474013801, 'samples': 21098496, 'steps': 41207, 'loss/train': 1.3393625020980835} +03/05/2022 13:54:47 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/05/2022 13:54:51 - INFO - codeparrot_training - Step 41208: {'lr': 0.00041830022331603925, 'samples': 21099008, 'steps': 41208, 'loss/train': 2.300197124481201} +03/05/2022 13:54:54 - INFO - codeparrot_training - Step 41209: {'lr': 0.000418296299154865, 'samples': 21099520, 'steps': 41209, 'loss/train': 2.056607246398926} +03/05/2022 13:54:55 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/05/2022 13:55:00 - INFO - codeparrot_training - Step 41210: {'lr': 0.000418292374917859, 'samples': 21100032, 'steps': 41210, 'loss/train': 1.9059653282165527} +03/05/2022 13:55:03 - INFO - codeparrot_training - Step 41211: {'lr': 0.00041828845060502297, 'samples': 21100544, 'steps': 41211, 'loss/train': 1.9192699193954468} +03/05/2022 13:55:04 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/05/2022 13:55:08 - INFO - codeparrot_training - Step 41212: {'lr': 0.00041828452621635884, 'samples': 21101056, 'steps': 41212, 'loss/train': 1.03080415725708} +03/05/2022 13:55:11 - INFO - codeparrot_training - Step 41213: {'lr': 0.0004182806017518682, 'samples': 21101568, 'steps': 41213, 'loss/train': 0.5803110599517822} +03/05/2022 13:55:13 - INFO - codeparrot_training - Skipping example with length 192 (seq_length=1024) +03/05/2022 13:55:17 - INFO - codeparrot_training - Step 41214: {'lr': 0.00041827667721155303, 'samples': 21102080, 'steps': 41214, 'loss/train': 1.7304335832595825} +03/05/2022 13:55:20 - INFO - codeparrot_training - Step 41215: {'lr': 0.000418272752595415, 'samples': 21102592, 'steps': 41215, 'loss/train': 1.7343441247940063} +03/05/2022 13:55:21 - INFO - codeparrot_training - Skipping example with length 691 (seq_length=1024) +03/05/2022 13:55:25 - INFO - codeparrot_training - Step 41216: {'lr': 0.00041826882790345577, 'samples': 21103104, 'steps': 41216, 'loss/train': 2.00553822517395} +03/05/2022 13:55:28 - INFO - codeparrot_training - Step 41217: {'lr': 0.00041826490313567725, 'samples': 21103616, 'steps': 41217, 'loss/train': 2.670588731765747} +03/05/2022 13:55:30 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/05/2022 13:55:34 - INFO - codeparrot_training - Step 41218: {'lr': 0.0004182609782920812, 'samples': 21104128, 'steps': 41218, 'loss/train': 0.8838427662849426} +03/05/2022 13:55:37 - INFO - codeparrot_training - Step 41219: {'lr': 0.0004182570533726693, 'samples': 21104640, 'steps': 41219, 'loss/train': 1.7907472848892212} +03/05/2022 13:55:39 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/05/2022 13:55:42 - INFO - codeparrot_training - Step 41220: {'lr': 0.00041825312837744333, 'samples': 21105152, 'steps': 41220, 'loss/train': 1.9209403991699219} +03/05/2022 13:55:45 - INFO - codeparrot_training - Step 41221: {'lr': 0.00041824920330640517, 'samples': 21105664, 'steps': 41221, 'loss/train': 2.2037532329559326} +03/05/2022 13:55:47 - INFO - codeparrot_training - Skipping example with length 175 (seq_length=1024) +03/05/2022 13:55:51 - INFO - codeparrot_training - Step 41222: {'lr': 0.0004182452781595565, 'samples': 21106176, 'steps': 41222, 'loss/train': 2.070722818374634} +03/05/2022 13:55:54 - INFO - codeparrot_training - Step 41223: {'lr': 0.0004182413529368991, 'samples': 21106688, 'steps': 41223, 'loss/train': 1.9367988109588623} +03/05/2022 13:55:56 - INFO - codeparrot_training - Skipping example with length 830 (seq_length=1024) +03/05/2022 13:55:59 - INFO - codeparrot_training - Step 41224: {'lr': 0.0004182374276384347, 'samples': 21107200, 'steps': 41224, 'loss/train': 1.5406345129013062} +03/05/2022 13:56:02 - INFO - codeparrot_training - Step 41225: {'lr': 0.0004182335022641651, 'samples': 21107712, 'steps': 41225, 'loss/train': 0.8368693590164185} +03/05/2022 13:56:05 - INFO - codeparrot_training - Skipping example with length 524 (seq_length=1024) +03/05/2022 13:56:08 - INFO - codeparrot_training - Step 41226: {'lr': 0.00041822957681409215, 'samples': 21108224, 'steps': 41226, 'loss/train': 2.128279685974121} +03/05/2022 13:56:11 - INFO - codeparrot_training - Step 41227: {'lr': 0.00041822565128821757, 'samples': 21108736, 'steps': 41227, 'loss/train': 1.3355735540390015} +03/05/2022 13:56:13 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/05/2022 13:56:16 - INFO - codeparrot_training - Step 41228: {'lr': 0.00041822172568654306, 'samples': 21109248, 'steps': 41228, 'loss/train': 1.3409826755523682} +03/05/2022 13:56:19 - INFO - codeparrot_training - Step 41229: {'lr': 0.0004182178000090704, 'samples': 21109760, 'steps': 41229, 'loss/train': 1.667718529701233} +03/05/2022 13:56:22 - INFO - codeparrot_training - Skipping example with length 432 (seq_length=1024) +03/05/2022 13:56:25 - INFO - codeparrot_training - Step 41230: {'lr': 0.0004182138742558015, 'samples': 21110272, 'steps': 41230, 'loss/train': 1.5114679336547852} +03/05/2022 13:56:28 - INFO - codeparrot_training - Step 41231: {'lr': 0.00041820994842673787, 'samples': 21110784, 'steps': 41231, 'loss/train': 1.5509049892425537} +03/05/2022 13:56:30 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/05/2022 13:56:33 - INFO - codeparrot_training - Step 41232: {'lr': 0.00041820602252188156, 'samples': 21111296, 'steps': 41232, 'loss/train': 1.3173776865005493} +03/05/2022 13:56:36 - INFO - codeparrot_training - Step 41233: {'lr': 0.00041820209654123416, 'samples': 21111808, 'steps': 41233, 'loss/train': 2.1119136810302734} +03/05/2022 13:56:39 - INFO - codeparrot_training - Skipping example with length 955 (seq_length=1024) +03/05/2022 13:56:42 - INFO - codeparrot_training - Step 41234: {'lr': 0.00041819817048479745, 'samples': 21112320, 'steps': 41234, 'loss/train': 2.3715312480926514} +03/05/2022 13:56:45 - INFO - codeparrot_training - Step 41235: {'lr': 0.0004181942443525734, 'samples': 21112832, 'steps': 41235, 'loss/train': 0.9136230945587158} +03/05/2022 13:56:47 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/05/2022 13:56:50 - INFO - codeparrot_training - Step 41236: {'lr': 0.00041819031814456346, 'samples': 21113344, 'steps': 41236, 'loss/train': 0.9060670733451843} +03/05/2022 13:56:53 - INFO - codeparrot_training - Step 41237: {'lr': 0.0004181863918607696, 'samples': 21113856, 'steps': 41237, 'loss/train': 1.602925181388855} +03/05/2022 13:56:55 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/05/2022 13:56:59 - INFO - codeparrot_training - Step 41238: {'lr': 0.00041818246550119354, 'samples': 21114368, 'steps': 41238, 'loss/train': 1.183271050453186} +03/05/2022 13:57:02 - INFO - codeparrot_training - Step 41239: {'lr': 0.00041817853906583706, 'samples': 21114880, 'steps': 41239, 'loss/train': 1.553720474243164} +03/05/2022 13:57:03 - INFO - codeparrot_training - Skipping example with length 119 (seq_length=1024) +03/05/2022 13:57:07 - INFO - codeparrot_training - Step 41240: {'lr': 0.000418174612554702, 'samples': 21115392, 'steps': 41240, 'loss/train': 1.6230710744857788} +03/05/2022 13:57:10 - INFO - codeparrot_training - Step 41241: {'lr': 0.00041817068596778994, 'samples': 21115904, 'steps': 41241, 'loss/train': 1.4995888471603394} +03/05/2022 13:57:12 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/05/2022 13:57:15 - INFO - codeparrot_training - Step 41242: {'lr': 0.0004181667593051028, 'samples': 21116416, 'steps': 41242, 'loss/train': 2.0025620460510254} +03/05/2022 13:57:19 - INFO - codeparrot_training - Step 41243: {'lr': 0.0004181628325666424, 'samples': 21116928, 'steps': 41243, 'loss/train': 0.7987639904022217} +03/05/2022 13:57:21 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/05/2022 13:57:24 - INFO - codeparrot_training - Step 41244: {'lr': 0.0004181589057524103, 'samples': 21117440, 'steps': 41244, 'loss/train': 1.9542142152786255} +03/05/2022 13:57:27 - INFO - codeparrot_training - Step 41245: {'lr': 0.0004181549788624085, 'samples': 21117952, 'steps': 41245, 'loss/train': 1.5663301944732666} +03/05/2022 13:57:29 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/05/2022 13:57:32 - INFO - codeparrot_training - Step 41246: {'lr': 0.0004181510518966386, 'samples': 21118464, 'steps': 41246, 'loss/train': 1.8118267059326172} +03/05/2022 13:57:36 - INFO - codeparrot_training - Step 41247: {'lr': 0.00041814712485510245, 'samples': 21118976, 'steps': 41247, 'loss/train': 1.9556540250778198} +03/05/2022 13:57:37 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/05/2022 13:57:41 - INFO - codeparrot_training - Step 41248: {'lr': 0.0004181431977378017, 'samples': 21119488, 'steps': 41248, 'loss/train': 1.8735289573669434} +03/05/2022 13:57:44 - INFO - codeparrot_training - Step 41249: {'lr': 0.00041813927054473835, 'samples': 21120000, 'steps': 41249, 'loss/train': 2.5114214420318604} +03/05/2022 13:57:46 - INFO - codeparrot_training - Skipping example with length 271 (seq_length=1024) +03/05/2022 13:57:50 - INFO - codeparrot_training - Step 41250: {'lr': 0.000418135343275914, 'samples': 21120512, 'steps': 41250, 'loss/train': 1.3334943056106567} +03/05/2022 13:57:53 - INFO - codeparrot_training - Step 41251: {'lr': 0.0004181314159313305, 'samples': 21121024, 'steps': 41251, 'loss/train': 2.671445846557617} +03/05/2022 13:57:56 - INFO - codeparrot_training - Step 41252: {'lr': 0.0004181274885109895, 'samples': 21121536, 'steps': 41252, 'loss/train': 1.1606566905975342} +03/05/2022 13:57:56 - INFO - codeparrot_training - Skipping example with length 258 (seq_length=1024) +03/05/2022 13:58:01 - INFO - codeparrot_training - Step 41253: {'lr': 0.0004181235610148929, 'samples': 21122048, 'steps': 41253, 'loss/train': 1.0835375785827637} +03/05/2022 13:58:05 - INFO - codeparrot_training - Step 41254: {'lr': 0.0004181196334430424, 'samples': 21122560, 'steps': 41254, 'loss/train': 1.1294230222702026} +03/05/2022 13:58:05 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/05/2022 13:58:10 - INFO - codeparrot_training - Step 41255: {'lr': 0.00041811570579543977, 'samples': 21123072, 'steps': 41255, 'loss/train': 1.5641123056411743} +03/05/2022 13:58:13 - INFO - codeparrot_training - Step 41256: {'lr': 0.0004181117780720868, 'samples': 21123584, 'steps': 41256, 'loss/train': 1.7094708681106567} +03/05/2022 13:58:13 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/05/2022 13:58:18 - INFO - codeparrot_training - Step 41257: {'lr': 0.00041810785027298524, 'samples': 21124096, 'steps': 41257, 'loss/train': 1.301736831665039} +03/05/2022 13:58:22 - INFO - codeparrot_training - Step 41258: {'lr': 0.00041810392239813695, 'samples': 21124608, 'steps': 41258, 'loss/train': 1.2787635326385498} +03/05/2022 13:58:22 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/05/2022 13:58:27 - INFO - codeparrot_training - Step 41259: {'lr': 0.00041809999444754353, 'samples': 21125120, 'steps': 41259, 'loss/train': 1.453263759613037} +03/05/2022 13:58:30 - INFO - codeparrot_training - Step 41260: {'lr': 0.0004180960664212069, 'samples': 21125632, 'steps': 41260, 'loss/train': 1.7683976888656616} +03/05/2022 13:58:30 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/05/2022 13:58:35 - INFO - codeparrot_training - Step 41261: {'lr': 0.00041809213831912884, 'samples': 21126144, 'steps': 41261, 'loss/train': 1.874083399772644} +03/05/2022 13:58:39 - INFO - codeparrot_training - Step 41262: {'lr': 0.0004180882101413109, 'samples': 21126656, 'steps': 41262, 'loss/train': 0.9839652180671692} +03/05/2022 13:58:39 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/05/2022 13:58:44 - INFO - codeparrot_training - Step 41263: {'lr': 0.00041808428188775515, 'samples': 21127168, 'steps': 41263, 'loss/train': 2.279764175415039} +03/05/2022 13:58:47 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/05/2022 13:58:49 - INFO - codeparrot_training - Step 41264: {'lr': 0.0004180803535584632, 'samples': 21127680, 'steps': 41264, 'loss/train': 1.8770774602890015} +03/05/2022 13:58:52 - INFO - codeparrot_training - Step 41265: {'lr': 0.0004180764251534368, 'samples': 21128192, 'steps': 41265, 'loss/train': 1.9482192993164062} +03/05/2022 13:58:56 - INFO - codeparrot_training - Step 41266: {'lr': 0.0004180724966726778, 'samples': 21128704, 'steps': 41266, 'loss/train': 0.642632007598877} +03/05/2022 13:58:56 - INFO - codeparrot_training - Skipping example with length 875 (seq_length=1024) +03/05/2022 13:59:01 - INFO - codeparrot_training - Step 41267: {'lr': 0.00041806856811618784, 'samples': 21129216, 'steps': 41267, 'loss/train': 2.1821072101593018} +03/05/2022 13:59:04 - INFO - codeparrot_training - Step 41268: {'lr': 0.00041806463948396876, 'samples': 21129728, 'steps': 41268, 'loss/train': 0.2621918320655823} +03/05/2022 13:59:04 - INFO - codeparrot_training - Skipping example with length 971 (seq_length=1024) +03/05/2022 13:59:10 - INFO - codeparrot_training - Step 41269: {'lr': 0.0004180607107760225, 'samples': 21130240, 'steps': 41269, 'loss/train': 2.301719903945923} +03/05/2022 13:59:13 - INFO - codeparrot_training - Step 41270: {'lr': 0.0004180567819923505, 'samples': 21130752, 'steps': 41270, 'loss/train': 0.679695188999176} +03/05/2022 13:59:13 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/05/2022 13:59:18 - INFO - codeparrot_training - Step 41271: {'lr': 0.0004180528531329548, 'samples': 21131264, 'steps': 41271, 'loss/train': 0.995402991771698} +03/05/2022 13:59:21 - INFO - codeparrot_training - Step 41272: {'lr': 0.00041804892419783715, 'samples': 21131776, 'steps': 41272, 'loss/train': 1.6979581117630005} +03/05/2022 13:59:21 - INFO - codeparrot_training - Skipping example with length 355 (seq_length=1024) +03/05/2022 13:59:27 - INFO - codeparrot_training - Step 41273: {'lr': 0.0004180449951869991, 'samples': 21132288, 'steps': 41273, 'loss/train': 1.1068729162216187} +03/05/2022 13:59:30 - INFO - codeparrot_training - Step 41274: {'lr': 0.00041804106610044263, 'samples': 21132800, 'steps': 41274, 'loss/train': 2.1111841201782227} +03/05/2022 13:59:30 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/05/2022 13:59:35 - INFO - codeparrot_training - Step 41275: {'lr': 0.00041803713693816947, 'samples': 21133312, 'steps': 41275, 'loss/train': 1.8688689470291138} +03/05/2022 13:59:38 - INFO - codeparrot_training - Step 41276: {'lr': 0.0004180332077001814, 'samples': 21133824, 'steps': 41276, 'loss/train': 1.699403166770935} +03/05/2022 13:59:39 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/05/2022 13:59:44 - INFO - codeparrot_training - Step 41277: {'lr': 0.0004180292783864801, 'samples': 21134336, 'steps': 41277, 'loss/train': 1.6565762758255005} +03/05/2022 13:59:47 - INFO - codeparrot_training - Step 41278: {'lr': 0.00041802534899706734, 'samples': 21134848, 'steps': 41278, 'loss/train': 1.1082160472869873} +03/05/2022 13:59:47 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/05/2022 13:59:52 - INFO - codeparrot_training - Step 41279: {'lr': 0.0004180214195319451, 'samples': 21135360, 'steps': 41279, 'loss/train': 2.430673599243164} +03/05/2022 13:59:55 - INFO - codeparrot_training - Step 41280: {'lr': 0.00041801748999111487, 'samples': 21135872, 'steps': 41280, 'loss/train': 2.0502943992614746} +03/05/2022 13:59:56 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/05/2022 14:00:01 - INFO - codeparrot_training - Step 41281: {'lr': 0.0004180135603745786, 'samples': 21136384, 'steps': 41281, 'loss/train': 1.9685955047607422} +03/05/2022 14:00:04 - INFO - codeparrot_training - Step 41282: {'lr': 0.000418009630682338, 'samples': 21136896, 'steps': 41282, 'loss/train': 2.05086612701416} +03/05/2022 14:00:04 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/05/2022 14:00:09 - INFO - codeparrot_training - Step 41283: {'lr': 0.00041800570091439493, 'samples': 21137408, 'steps': 41283, 'loss/train': 1.986607551574707} +03/05/2022 14:00:12 - INFO - codeparrot_training - Skipping example with length 196 (seq_length=1024) +03/05/2022 14:00:15 - INFO - codeparrot_training - Step 41284: {'lr': 0.000418001771070751, 'samples': 21137920, 'steps': 41284, 'loss/train': 1.4903050661087036} +03/05/2022 14:00:18 - INFO - codeparrot_training - Step 41285: {'lr': 0.0004179978411514081, 'samples': 21138432, 'steps': 41285, 'loss/train': 1.240086317062378} +03/05/2022 14:00:21 - INFO - codeparrot_training - Step 41286: {'lr': 0.000417993911156368, 'samples': 21138944, 'steps': 41286, 'loss/train': 2.352900743484497} +03/05/2022 14:00:21 - INFO - codeparrot_training - Skipping example with length 330 (seq_length=1024) +03/05/2022 14:00:26 - INFO - codeparrot_training - Step 41287: {'lr': 0.00041798998108563234, 'samples': 21139456, 'steps': 41287, 'loss/train': 0.9419495463371277} +03/05/2022 14:00:29 - INFO - codeparrot_training - Step 41288: {'lr': 0.00041798605093920307, 'samples': 21139968, 'steps': 41288, 'loss/train': 1.733852744102478} +03/05/2022 14:00:30 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/05/2022 14:00:35 - INFO - codeparrot_training - Step 41289: {'lr': 0.00041798212071708185, 'samples': 21140480, 'steps': 41289, 'loss/train': 1.440408706665039} +03/05/2022 14:00:38 - INFO - codeparrot_training - Step 41290: {'lr': 0.0004179781904192704, 'samples': 21140992, 'steps': 41290, 'loss/train': 1.816108226776123} +03/05/2022 14:00:38 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/05/2022 14:00:43 - INFO - codeparrot_training - Step 41291: {'lr': 0.00041797426004577066, 'samples': 21141504, 'steps': 41291, 'loss/train': 1.5101360082626343} +03/05/2022 14:00:46 - INFO - codeparrot_training - Skipping example with length 565 (seq_length=1024) +03/05/2022 14:00:48 - INFO - codeparrot_training - Step 41292: {'lr': 0.00041797032959658433, 'samples': 21142016, 'steps': 41292, 'loss/train': 1.3297199010849} +03/05/2022 14:00:52 - INFO - codeparrot_training - Step 41293: {'lr': 0.0004179663990717131, 'samples': 21142528, 'steps': 41293, 'loss/train': 1.9037166833877563} +03/05/2022 14:00:55 - INFO - codeparrot_training - Step 41294: {'lr': 0.0004179624684711588, 'samples': 21143040, 'steps': 41294, 'loss/train': 1.7262598276138306} +03/05/2022 14:00:55 - INFO - codeparrot_training - Skipping example with length 477 (seq_length=1024) +03/05/2022 14:01:00 - INFO - codeparrot_training - Step 41295: {'lr': 0.0004179585377949232, 'samples': 21143552, 'steps': 41295, 'loss/train': 1.7740334272384644} +03/05/2022 14:01:03 - INFO - codeparrot_training - Step 41296: {'lr': 0.0004179546070430082, 'samples': 21144064, 'steps': 41296, 'loss/train': 1.5776933431625366} +03/05/2022 14:01:03 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/05/2022 14:01:09 - INFO - codeparrot_training - Step 41297: {'lr': 0.0004179506762154153, 'samples': 21144576, 'steps': 41297, 'loss/train': 2.5094475746154785} +03/05/2022 14:01:12 - INFO - codeparrot_training - Step 41298: {'lr': 0.0004179467453121465, 'samples': 21145088, 'steps': 41298, 'loss/train': 0.9548091292381287} +03/05/2022 14:01:12 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/05/2022 14:01:17 - INFO - codeparrot_training - Step 41299: {'lr': 0.0004179428143332035, 'samples': 21145600, 'steps': 41299, 'loss/train': 1.590989112854004} +03/05/2022 14:01:20 - INFO - codeparrot_training - Step 41300: {'lr': 0.000417938883278588, 'samples': 21146112, 'steps': 41300, 'loss/train': 1.2943713665008545} +03/05/2022 14:01:20 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/05/2022 14:01:25 - INFO - codeparrot_training - Step 41301: {'lr': 0.0004179349521483018, 'samples': 21146624, 'steps': 41301, 'loss/train': 0.7039263844490051} +03/05/2022 14:01:29 - INFO - codeparrot_training - Step 41302: {'lr': 0.00041793102094234673, 'samples': 21147136, 'steps': 41302, 'loss/train': 1.7996389865875244} +03/05/2022 14:01:29 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/05/2022 14:01:34 - INFO - codeparrot_training - Step 41303: {'lr': 0.00041792708966072455, 'samples': 21147648, 'steps': 41303, 'loss/train': 1.233795166015625} +03/05/2022 14:01:37 - INFO - codeparrot_training - Step 41304: {'lr': 0.0004179231583034371, 'samples': 21148160, 'steps': 41304, 'loss/train': 1.6007835865020752} +03/05/2022 14:01:37 - INFO - codeparrot_training - Skipping example with length 896 (seq_length=1024) +03/05/2022 14:01:42 - INFO - codeparrot_training - Step 41305: {'lr': 0.0004179192268704859, 'samples': 21148672, 'steps': 41305, 'loss/train': 1.6707309484481812} +03/05/2022 14:01:46 - INFO - codeparrot_training - Step 41306: {'lr': 0.000417915295361873, 'samples': 21149184, 'steps': 41306, 'loss/train': 1.8995290994644165} +03/05/2022 14:01:46 - INFO - codeparrot_training - Skipping example with length 12 (seq_length=1024) +03/05/2022 14:01:51 - INFO - codeparrot_training - Step 41307: {'lr': 0.0004179113637776, 'samples': 21149696, 'steps': 41307, 'loss/train': 1.7555502653121948} +03/05/2022 14:01:54 - INFO - codeparrot_training - Step 41308: {'lr': 0.0004179074321176688, 'samples': 21150208, 'steps': 41308, 'loss/train': 1.8940308094024658} +03/05/2022 14:01:54 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/05/2022 14:01:59 - INFO - codeparrot_training - Step 41309: {'lr': 0.000417903500382081, 'samples': 21150720, 'steps': 41309, 'loss/train': 1.9318616390228271} +03/05/2022 14:02:02 - INFO - codeparrot_training - Step 41310: {'lr': 0.00041789956857083853, 'samples': 21151232, 'steps': 41310, 'loss/train': 1.3472323417663574} +03/05/2022 14:02:03 - INFO - codeparrot_training - Skipping example with length 428 (seq_length=1024) +03/05/2022 14:02:08 - INFO - codeparrot_training - Step 41311: {'lr': 0.00041789563668394314, 'samples': 21151744, 'steps': 41311, 'loss/train': 1.6799806356430054} +03/05/2022 14:02:12 - INFO - codeparrot_training - Step 41312: {'lr': 0.0004178917047213965, 'samples': 21152256, 'steps': 41312, 'loss/train': 1.6138566732406616} +03/05/2022 14:02:14 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/05/2022 14:02:17 - INFO - codeparrot_training - Step 41313: {'lr': 0.00041788777268320055, 'samples': 21152768, 'steps': 41313, 'loss/train': 1.8129351139068604} +03/05/2022 14:02:20 - INFO - codeparrot_training - Step 41314: {'lr': 0.00041788384056935693, 'samples': 21153280, 'steps': 41314, 'loss/train': 1.854110836982727} +03/05/2022 14:02:22 - INFO - codeparrot_training - Skipping example with length 342 (seq_length=1024) +03/05/2022 14:02:25 - INFO - codeparrot_training - Step 41315: {'lr': 0.0004178799083798673, 'samples': 21153792, 'steps': 41315, 'loss/train': 1.7234530448913574} +03/05/2022 14:02:28 - INFO - codeparrot_training - Step 41316: {'lr': 0.00041787597611473375, 'samples': 21154304, 'steps': 41316, 'loss/train': 2.0414717197418213} +03/05/2022 14:02:31 - INFO - codeparrot_training - Skipping example with length 161 (seq_length=1024) +03/05/2022 14:02:34 - INFO - codeparrot_training - Step 41317: {'lr': 0.00041787204377395783, 'samples': 21154816, 'steps': 41317, 'loss/train': 1.5770747661590576} +03/05/2022 14:02:37 - INFO - codeparrot_training - Step 41318: {'lr': 0.0004178681113575413, 'samples': 21155328, 'steps': 41318, 'loss/train': 2.080266237258911} +03/05/2022 14:02:40 - INFO - codeparrot_training - Skipping example with length 609 (seq_length=1024) +03/05/2022 14:02:42 - INFO - codeparrot_training - Step 41319: {'lr': 0.00041786417886548606, 'samples': 21155840, 'steps': 41319, 'loss/train': 2.4362993240356445} +03/05/2022 14:02:45 - INFO - codeparrot_training - Step 41320: {'lr': 0.0004178602462977937, 'samples': 21156352, 'steps': 41320, 'loss/train': 1.4934478998184204} +03/05/2022 14:02:48 - INFO - codeparrot_training - Skipping example with length 1002 (seq_length=1024) +03/05/2022 14:02:51 - INFO - codeparrot_training - Step 41321: {'lr': 0.0004178563136544662, 'samples': 21156864, 'steps': 41321, 'loss/train': 1.558426856994629} +03/05/2022 14:02:54 - INFO - codeparrot_training - Step 41322: {'lr': 0.0004178523809355053, 'samples': 21157376, 'steps': 41322, 'loss/train': 1.611952304840088} +03/05/2022 14:02:57 - INFO - codeparrot_training - Skipping example with length 289 (seq_length=1024) +03/05/2022 14:02:59 - INFO - codeparrot_training - Step 41323: {'lr': 0.00041784844814091263, 'samples': 21157888, 'steps': 41323, 'loss/train': 1.8530995845794678} +03/05/2022 14:03:02 - INFO - codeparrot_training - Step 41324: {'lr': 0.00041784451527069, 'samples': 21158400, 'steps': 41324, 'loss/train': 1.7262744903564453} +03/05/2022 14:03:05 - INFO - codeparrot_training - Skipping example with length 124 (seq_length=1024) +03/05/2022 14:03:08 - INFO - codeparrot_training - Step 41325: {'lr': 0.0004178405823248392, 'samples': 21158912, 'steps': 41325, 'loss/train': 0.9384953379631042} +03/05/2022 14:03:11 - INFO - codeparrot_training - Step 41326: {'lr': 0.0004178366493033621, 'samples': 21159424, 'steps': 41326, 'loss/train': 0.8365280628204346} +03/05/2022 14:03:13 - INFO - codeparrot_training - Skipping example with length 593 (seq_length=1024) +03/05/2022 14:03:16 - INFO - codeparrot_training - Step 41327: {'lr': 0.0004178327162062604, 'samples': 21159936, 'steps': 41327, 'loss/train': 1.593304991722107} +03/05/2022 14:03:19 - INFO - codeparrot_training - Step 41328: {'lr': 0.00041782878303353577, 'samples': 21160448, 'steps': 41328, 'loss/train': 1.928503155708313} +03/05/2022 14:03:21 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/05/2022 14:03:25 - INFO - codeparrot_training - Step 41329: {'lr': 0.0004178248497851902, 'samples': 21160960, 'steps': 41329, 'loss/train': 1.412644624710083} +03/05/2022 14:03:28 - INFO - codeparrot_training - Step 41330: {'lr': 0.00041782091646122533, 'samples': 21161472, 'steps': 41330, 'loss/train': 2.297308921813965} +03/05/2022 14:03:30 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/05/2022 14:03:33 - INFO - codeparrot_training - Step 41331: {'lr': 0.00041781698306164283, 'samples': 21161984, 'steps': 41331, 'loss/train': 1.157702088356018} +03/05/2022 14:03:36 - INFO - codeparrot_training - Step 41332: {'lr': 0.0004178130495864447, 'samples': 21162496, 'steps': 41332, 'loss/train': 0.6593053340911865} +03/05/2022 14:03:38 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/05/2022 14:03:42 - INFO - codeparrot_training - Step 41333: {'lr': 0.00041780911603563254, 'samples': 21163008, 'steps': 41333, 'loss/train': 1.9654645919799805} +03/05/2022 14:03:45 - INFO - codeparrot_training - Step 41334: {'lr': 0.00041780518240920817, 'samples': 21163520, 'steps': 41334, 'loss/train': 1.7016255855560303} +03/05/2022 14:03:47 - INFO - codeparrot_training - Skipping example with length 339 (seq_length=1024) +03/05/2022 14:03:50 - INFO - codeparrot_training - Step 41335: {'lr': 0.0004178012487071734, 'samples': 21164032, 'steps': 41335, 'loss/train': 1.8197054862976074} +03/05/2022 14:03:53 - INFO - codeparrot_training - Step 41336: {'lr': 0.00041779731492953, 'samples': 21164544, 'steps': 41336, 'loss/train': 1.7972651720046997} +03/05/2022 14:03:56 - INFO - codeparrot_training - Skipping example with length 626 (seq_length=1024) +03/05/2022 14:03:59 - INFO - codeparrot_training - Step 41337: {'lr': 0.0004177933810762797, 'samples': 21165056, 'steps': 41337, 'loss/train': 1.511178970336914} +03/05/2022 14:04:02 - INFO - codeparrot_training - Step 41338: {'lr': 0.00041778944714742435, 'samples': 21165568, 'steps': 41338, 'loss/train': 1.0209686756134033} +03/05/2022 14:04:04 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/05/2022 14:04:07 - INFO - codeparrot_training - Step 41339: {'lr': 0.00041778551314296556, 'samples': 21166080, 'steps': 41339, 'loss/train': 1.4412848949432373} +03/05/2022 14:04:10 - INFO - codeparrot_training - Step 41340: {'lr': 0.00041778157906290525, 'samples': 21166592, 'steps': 41340, 'loss/train': 1.6467466354370117} +03/05/2022 14:04:13 - INFO - codeparrot_training - Skipping example with length 189 (seq_length=1024) +03/05/2022 14:04:16 - INFO - codeparrot_training - Step 41341: {'lr': 0.00041777764490724515, 'samples': 21167104, 'steps': 41341, 'loss/train': 1.8432217836380005} +03/05/2022 14:04:19 - INFO - codeparrot_training - Step 41342: {'lr': 0.00041777371067598705, 'samples': 21167616, 'steps': 41342, 'loss/train': 1.842079758644104} +03/05/2022 14:04:21 - INFO - codeparrot_training - Skipping example with length 649 (seq_length=1024) +03/05/2022 14:04:24 - INFO - codeparrot_training - Step 41343: {'lr': 0.00041776977636913274, 'samples': 21168128, 'steps': 41343, 'loss/train': 1.6550568342208862} +03/05/2022 14:04:27 - INFO - codeparrot_training - Step 41344: {'lr': 0.0004177658419866839, 'samples': 21168640, 'steps': 41344, 'loss/train': 1.43836510181427} +03/05/2022 14:04:30 - INFO - codeparrot_training - Skipping example with length 406 (seq_length=1024) +03/05/2022 14:04:32 - INFO - codeparrot_training - Step 41345: {'lr': 0.0004177619075286424, 'samples': 21169152, 'steps': 41345, 'loss/train': 2.48230242729187} +03/05/2022 14:04:36 - INFO - codeparrot_training - Step 41346: {'lr': 0.00041775797299500997, 'samples': 21169664, 'steps': 41346, 'loss/train': 1.5289934873580933} +03/05/2022 14:04:38 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/05/2022 14:04:41 - INFO - codeparrot_training - Step 41347: {'lr': 0.0004177540383857883, 'samples': 21170176, 'steps': 41347, 'loss/train': 1.085195541381836} +03/05/2022 14:04:44 - INFO - codeparrot_training - Step 41348: {'lr': 0.0004177501037009793, 'samples': 21170688, 'steps': 41348, 'loss/train': 1.1430209875106812} +03/05/2022 14:04:47 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/05/2022 14:04:49 - INFO - codeparrot_training - Step 41349: {'lr': 0.0004177461689405847, 'samples': 21171200, 'steps': 41349, 'loss/train': 1.6293845176696777} +03/05/2022 14:04:53 - INFO - codeparrot_training - Step 41350: {'lr': 0.00041774223410460633, 'samples': 21171712, 'steps': 41350, 'loss/train': 1.8101744651794434} +03/05/2022 14:04:55 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/05/2022 14:04:58 - INFO - codeparrot_training - Step 41351: {'lr': 0.00041773829919304584, 'samples': 21172224, 'steps': 41351, 'loss/train': 1.8233696222305298} +03/05/2022 14:05:01 - INFO - codeparrot_training - Step 41352: {'lr': 0.000417734364205905, 'samples': 21172736, 'steps': 41352, 'loss/train': 1.3833693265914917} +03/05/2022 14:05:04 - INFO - codeparrot_training - Step 41353: {'lr': 0.0004177304291431857, 'samples': 21173248, 'steps': 41353, 'loss/train': 1.973091721534729} +03/05/2022 14:05:05 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/05/2022 14:05:10 - INFO - codeparrot_training - Step 41354: {'lr': 0.00041772649400488967, 'samples': 21173760, 'steps': 41354, 'loss/train': 0.7227315306663513} +03/05/2022 14:05:13 - INFO - codeparrot_training - Step 41355: {'lr': 0.0004177225587910186, 'samples': 21174272, 'steps': 41355, 'loss/train': 1.5179675817489624} +03/05/2022 14:05:13 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/05/2022 14:05:18 - INFO - codeparrot_training - Step 41356: {'lr': 0.0004177186235015744, 'samples': 21174784, 'steps': 41356, 'loss/train': 2.006666898727417} +03/05/2022 14:05:21 - INFO - codeparrot_training - Step 41357: {'lr': 0.0004177146881365588, 'samples': 21175296, 'steps': 41357, 'loss/train': 1.361484169960022} +03/05/2022 14:05:22 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/05/2022 14:05:27 - INFO - codeparrot_training - Step 41358: {'lr': 0.00041771075269597354, 'samples': 21175808, 'steps': 41358, 'loss/train': 0.18406705558300018} +03/05/2022 14:05:30 - INFO - codeparrot_training - Step 41359: {'lr': 0.0004177068171798204, 'samples': 21176320, 'steps': 41359, 'loss/train': 1.806839942932129} +03/05/2022 14:05:30 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/05/2022 14:05:35 - INFO - codeparrot_training - Step 41360: {'lr': 0.0004177028815881011, 'samples': 21176832, 'steps': 41360, 'loss/train': 0.7810460925102234} +03/05/2022 14:05:38 - INFO - codeparrot_training - Step 41361: {'lr': 0.00041769894592081746, 'samples': 21177344, 'steps': 41361, 'loss/train': 1.4558087587356567} +03/05/2022 14:05:39 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/05/2022 14:05:44 - INFO - codeparrot_training - Step 41362: {'lr': 0.0004176950101779713, 'samples': 21177856, 'steps': 41362, 'loss/train': 3.566206455230713} +03/05/2022 14:05:47 - INFO - codeparrot_training - Step 41363: {'lr': 0.00041769107435956444, 'samples': 21178368, 'steps': 41363, 'loss/train': 1.2696261405944824} +03/05/2022 14:05:48 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/05/2022 14:05:52 - INFO - codeparrot_training - Step 41364: {'lr': 0.00041768713846559844, 'samples': 21178880, 'steps': 41364, 'loss/train': 1.3685111999511719} +03/05/2022 14:05:56 - INFO - codeparrot_training - Step 41365: {'lr': 0.00041768320249607527, 'samples': 21179392, 'steps': 41365, 'loss/train': 1.4516844749450684} +03/05/2022 14:05:57 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/05/2022 14:06:01 - INFO - codeparrot_training - Step 41366: {'lr': 0.00041767926645099664, 'samples': 21179904, 'steps': 41366, 'loss/train': 1.7407466173171997} +03/05/2022 14:06:04 - INFO - codeparrot_training - Step 41367: {'lr': 0.00041767533033036425, 'samples': 21180416, 'steps': 41367, 'loss/train': 1.7720054388046265} +03/05/2022 14:06:05 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/05/2022 14:06:09 - INFO - codeparrot_training - Step 41368: {'lr': 0.00041767139413418, 'samples': 21180928, 'steps': 41368, 'loss/train': 1.6418370008468628} +03/05/2022 14:06:12 - INFO - codeparrot_training - Step 41369: {'lr': 0.00041766745786244564, 'samples': 21181440, 'steps': 41369, 'loss/train': 1.5588502883911133} +03/05/2022 14:06:13 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/05/2022 14:06:18 - INFO - codeparrot_training - Step 41370: {'lr': 0.00041766352151516284, 'samples': 21181952, 'steps': 41370, 'loss/train': 1.1523798704147339} +03/05/2022 14:06:21 - INFO - codeparrot_training - Step 41371: {'lr': 0.0004176595850923335, 'samples': 21182464, 'steps': 41371, 'loss/train': 1.9474139213562012} +03/05/2022 14:06:22 - INFO - codeparrot_training - Skipping example with length 906 (seq_length=1024) +03/05/2022 14:06:26 - INFO - codeparrot_training - Step 41372: {'lr': 0.0004176556485939593, 'samples': 21182976, 'steps': 41372, 'loss/train': 0.9940188527107239} +03/05/2022 14:06:29 - INFO - codeparrot_training - Step 41373: {'lr': 0.00041765171202004205, 'samples': 21183488, 'steps': 41373, 'loss/train': 1.9338175058364868} +03/05/2022 14:06:30 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/05/2022 14:06:35 - INFO - codeparrot_training - Step 41374: {'lr': 0.00041764777537058354, 'samples': 21184000, 'steps': 41374, 'loss/train': 2.2829370498657227} +03/05/2022 14:06:38 - INFO - codeparrot_training - Step 41375: {'lr': 0.0004176438386455855, 'samples': 21184512, 'steps': 41375, 'loss/train': 1.968406081199646} +03/05/2022 14:06:39 - INFO - codeparrot_training - Skipping example with length 980 (seq_length=1024) +03/05/2022 14:06:43 - INFO - codeparrot_training - Step 41376: {'lr': 0.00041763990184504984, 'samples': 21185024, 'steps': 41376, 'loss/train': 1.9372787475585938} +03/05/2022 14:06:46 - INFO - codeparrot_training - Step 41377: {'lr': 0.00041763596496897817, 'samples': 21185536, 'steps': 41377, 'loss/train': 2.1158833503723145} +03/05/2022 14:06:47 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/05/2022 14:06:52 - INFO - codeparrot_training - Step 41378: {'lr': 0.00041763202801737225, 'samples': 21186048, 'steps': 41378, 'loss/train': 1.9344704151153564} +03/05/2022 14:06:55 - INFO - codeparrot_training - Step 41379: {'lr': 0.00041762809099023403, 'samples': 21186560, 'steps': 41379, 'loss/train': 1.58267080783844} +03/05/2022 14:06:56 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/05/2022 14:07:00 - INFO - codeparrot_training - Step 41380: {'lr': 0.00041762415388756514, 'samples': 21187072, 'steps': 41380, 'loss/train': 1.8077571392059326} +03/05/2022 14:07:03 - INFO - codeparrot_training - Step 41381: {'lr': 0.00041762021670936736, 'samples': 21187584, 'steps': 41381, 'loss/train': 1.012739658355713} +03/05/2022 14:07:05 - INFO - codeparrot_training - Skipping example with length 434 (seq_length=1024) +03/05/2022 14:07:08 - INFO - codeparrot_training - Step 41382: {'lr': 0.0004176162794556425, 'samples': 21188096, 'steps': 41382, 'loss/train': 0.18582558631896973} +03/05/2022 14:07:12 - INFO - codeparrot_training - Step 41383: {'lr': 0.0004176123421263923, 'samples': 21188608, 'steps': 41383, 'loss/train': 1.7358652353286743} +03/05/2022 14:07:13 - INFO - codeparrot_training - Skipping example with length 90 (seq_length=1024) +03/05/2022 14:07:17 - INFO - codeparrot_training - Step 41384: {'lr': 0.00041760840472161866, 'samples': 21189120, 'steps': 41384, 'loss/train': 1.191225290298462} +03/05/2022 14:07:20 - INFO - codeparrot_training - Step 41385: {'lr': 0.0004176044672413232, 'samples': 21189632, 'steps': 41385, 'loss/train': 1.1092634201049805} +03/05/2022 14:07:21 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/05/2022 14:07:25 - INFO - codeparrot_training - Step 41386: {'lr': 0.00041760052968550776, 'samples': 21190144, 'steps': 41386, 'loss/train': 1.3499988317489624} +03/05/2022 14:07:28 - INFO - codeparrot_training - Step 41387: {'lr': 0.0004175965920541741, 'samples': 21190656, 'steps': 41387, 'loss/train': 2.5596742630004883} +03/05/2022 14:07:30 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/05/2022 14:07:34 - INFO - codeparrot_training - Step 41388: {'lr': 0.00041759265434732404, 'samples': 21191168, 'steps': 41388, 'loss/train': 1.742496132850647} +03/05/2022 14:07:37 - INFO - codeparrot_training - Step 41389: {'lr': 0.00041758871656495927, 'samples': 21191680, 'steps': 41389, 'loss/train': 1.6967554092407227} +03/05/2022 14:07:38 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/05/2022 14:07:42 - INFO - codeparrot_training - Step 41390: {'lr': 0.00041758477870708165, 'samples': 21192192, 'steps': 41390, 'loss/train': 0.1480623185634613} +03/05/2022 14:07:45 - INFO - codeparrot_training - Step 41391: {'lr': 0.0004175808407736929, 'samples': 21192704, 'steps': 41391, 'loss/train': 1.4847979545593262} +03/05/2022 14:07:46 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/05/2022 14:07:51 - INFO - codeparrot_training - Step 41392: {'lr': 0.00041757690276479474, 'samples': 21193216, 'steps': 41392, 'loss/train': 1.476781964302063} +03/05/2022 14:07:54 - INFO - codeparrot_training - Step 41393: {'lr': 0.0004175729646803891, 'samples': 21193728, 'steps': 41393, 'loss/train': 2.365790367126465} +03/05/2022 14:07:55 - INFO - codeparrot_training - Skipping example with length 940 (seq_length=1024) +03/05/2022 14:07:59 - INFO - codeparrot_training - Step 41394: {'lr': 0.00041756902652047767, 'samples': 21194240, 'steps': 41394, 'loss/train': 0.8488802313804626} +03/05/2022 14:08:02 - INFO - codeparrot_training - Step 41395: {'lr': 0.0004175650882850622, 'samples': 21194752, 'steps': 41395, 'loss/train': 2.3593897819519043} +03/05/2022 14:08:03 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/05/2022 14:08:07 - INFO - codeparrot_training - Step 41396: {'lr': 0.0004175611499741445, 'samples': 21195264, 'steps': 41396, 'loss/train': 1.9838229417800903} +03/05/2022 14:08:11 - INFO - codeparrot_training - Step 41397: {'lr': 0.00041755721158772633, 'samples': 21195776, 'steps': 41397, 'loss/train': 1.662850260734558} +03/05/2022 14:08:12 - INFO - codeparrot_training - Skipping example with length 114 (seq_length=1024) +03/05/2022 14:08:16 - INFO - codeparrot_training - Step 41398: {'lr': 0.00041755327312580944, 'samples': 21196288, 'steps': 41398, 'loss/train': 2.2923905849456787} +03/05/2022 14:08:19 - INFO - codeparrot_training - Step 41399: {'lr': 0.0004175493345883956, 'samples': 21196800, 'steps': 41399, 'loss/train': 1.8831506967544556} +03/05/2022 14:08:20 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/05/2022 14:08:24 - INFO - codeparrot_training - Step 41400: {'lr': 0.0004175453959754867, 'samples': 21197312, 'steps': 41400, 'loss/train': 2.2388203144073486} +03/05/2022 14:08:27 - INFO - codeparrot_training - Step 41401: {'lr': 0.00041754145728708434, 'samples': 21197824, 'steps': 41401, 'loss/train': 1.5298492908477783} +03/05/2022 14:08:29 - INFO - codeparrot_training - Skipping example with length 175 (seq_length=1024) +03/05/2022 14:08:33 - INFO - codeparrot_training - Step 41402: {'lr': 0.0004175375185231904, 'samples': 21198336, 'steps': 41402, 'loss/train': 1.3990254402160645} +03/05/2022 14:08:36 - INFO - codeparrot_training - Step 41403: {'lr': 0.00041753357968380675, 'samples': 21198848, 'steps': 41403, 'loss/train': 0.2592299282550812} +03/05/2022 14:08:37 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/05/2022 14:08:41 - INFO - codeparrot_training - Step 41404: {'lr': 0.00041752964076893496, 'samples': 21199360, 'steps': 41404, 'loss/train': 1.2508511543273926} +03/05/2022 14:08:44 - INFO - codeparrot_training - Step 41405: {'lr': 0.00041752570177857695, 'samples': 21199872, 'steps': 41405, 'loss/train': 1.6720975637435913} +03/05/2022 14:08:45 - INFO - codeparrot_training - Skipping example with length 638 (seq_length=1024) +03/05/2022 14:08:50 - INFO - codeparrot_training - Step 41406: {'lr': 0.0004175217627127344, 'samples': 21200384, 'steps': 41406, 'loss/train': 1.7520833015441895} +03/05/2022 14:08:53 - INFO - codeparrot_training - Step 41407: {'lr': 0.0004175178235714091, 'samples': 21200896, 'steps': 41407, 'loss/train': 1.8849515914916992} +03/05/2022 14:08:54 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/05/2022 14:08:58 - INFO - codeparrot_training - Step 41408: {'lr': 0.0004175138843546029, 'samples': 21201408, 'steps': 41408, 'loss/train': 1.5459226369857788} +03/05/2022 14:09:01 - INFO - codeparrot_training - Step 41409: {'lr': 0.00041750994506231756, 'samples': 21201920, 'steps': 41409, 'loss/train': 1.860771656036377} +03/05/2022 14:09:02 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/05/2022 14:09:06 - INFO - codeparrot_training - Step 41410: {'lr': 0.00041750600569455474, 'samples': 21202432, 'steps': 41410, 'loss/train': 1.562726616859436} +03/05/2022 14:09:10 - INFO - codeparrot_training - Step 41411: {'lr': 0.0004175020662513164, 'samples': 21202944, 'steps': 41411, 'loss/train': 0.881607711315155} +03/05/2022 14:09:10 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/05/2022 14:09:15 - INFO - codeparrot_training - Step 41412: {'lr': 0.0004174981267326041, 'samples': 21203456, 'steps': 41412, 'loss/train': 2.0347695350646973} +03/05/2022 14:09:18 - INFO - codeparrot_training - Step 41413: {'lr': 0.0004174941871384198, 'samples': 21203968, 'steps': 41413, 'loss/train': 0.8473787307739258} +03/05/2022 14:09:19 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/05/2022 14:09:23 - INFO - codeparrot_training - Step 41414: {'lr': 0.00041749024746876517, 'samples': 21204480, 'steps': 41414, 'loss/train': 1.2882137298583984} +03/05/2022 14:09:26 - INFO - codeparrot_training - Step 41415: {'lr': 0.00041748630772364204, 'samples': 21204992, 'steps': 41415, 'loss/train': 2.007486581802368} +03/05/2022 14:09:27 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/05/2022 14:09:32 - INFO - codeparrot_training - Step 41416: {'lr': 0.00041748236790305215, 'samples': 21205504, 'steps': 41416, 'loss/train': 2.064770221710205} +03/05/2022 14:09:35 - INFO - codeparrot_training - Step 41417: {'lr': 0.0004174784280069973, 'samples': 21206016, 'steps': 41417, 'loss/train': 2.051490545272827} +03/05/2022 14:09:36 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/05/2022 14:09:40 - INFO - codeparrot_training - Step 41418: {'lr': 0.00041747448803547925, 'samples': 21206528, 'steps': 41418, 'loss/train': 2.0988495349884033} +03/05/2022 14:09:43 - INFO - codeparrot_training - Step 41419: {'lr': 0.0004174705479884998, 'samples': 21207040, 'steps': 41419, 'loss/train': 1.997206211090088} +03/05/2022 14:09:44 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/05/2022 14:09:49 - INFO - codeparrot_training - Step 41420: {'lr': 0.0004174666078660607, 'samples': 21207552, 'steps': 41420, 'loss/train': 1.4939026832580566} +03/05/2022 14:09:52 - INFO - codeparrot_training - Step 41421: {'lr': 0.00041746266766816377, 'samples': 21208064, 'steps': 41421, 'loss/train': 1.4540772438049316} +03/05/2022 14:09:53 - INFO - codeparrot_training - Skipping example with length 579 (seq_length=1024) +03/05/2022 14:09:57 - INFO - codeparrot_training - Step 41422: {'lr': 0.0004174587273948106, 'samples': 21208576, 'steps': 41422, 'loss/train': 2.216372013092041} +03/05/2022 14:10:00 - INFO - codeparrot_training - Step 41423: {'lr': 0.0004174547870460033, 'samples': 21209088, 'steps': 41423, 'loss/train': 1.4860442876815796} +03/05/2022 14:10:01 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/05/2022 14:10:06 - INFO - codeparrot_training - Step 41424: {'lr': 0.0004174508466217434, 'samples': 21209600, 'steps': 41424, 'loss/train': 1.4776396751403809} +03/05/2022 14:10:09 - INFO - codeparrot_training - Step 41425: {'lr': 0.00041744690612203263, 'samples': 21210112, 'steps': 41425, 'loss/train': 1.2215474843978882} +03/05/2022 14:10:10 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/05/2022 14:10:14 - INFO - codeparrot_training - Step 41426: {'lr': 0.00041744296554687294, 'samples': 21210624, 'steps': 41426, 'loss/train': 1.6561205387115479} +03/05/2022 14:10:17 - INFO - codeparrot_training - Step 41427: {'lr': 0.00041743902489626606, 'samples': 21211136, 'steps': 41427, 'loss/train': 2.2217650413513184} +03/05/2022 14:10:18 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/05/2022 14:10:22 - INFO - codeparrot_training - Step 41428: {'lr': 0.0004174350841702137, 'samples': 21211648, 'steps': 41428, 'loss/train': 1.8328044414520264} +03/05/2022 14:10:26 - INFO - codeparrot_training - Step 41429: {'lr': 0.0004174311433687177, 'samples': 21212160, 'steps': 41429, 'loss/train': 1.4709471464157104} +03/05/2022 14:10:27 - INFO - codeparrot_training - Skipping example with length 690 (seq_length=1024) +03/05/2022 14:10:31 - INFO - codeparrot_training - Step 41430: {'lr': 0.00041742720249177975, 'samples': 21212672, 'steps': 41430, 'loss/train': 1.2375367879867554} +03/05/2022 14:10:34 - INFO - codeparrot_training - Step 41431: {'lr': 0.0004174232615394018, 'samples': 21213184, 'steps': 41431, 'loss/train': 1.8222976922988892} +03/05/2022 14:10:36 - INFO - codeparrot_training - Skipping example with length 678 (seq_length=1024) +03/05/2022 14:10:39 - INFO - codeparrot_training - Step 41432: {'lr': 0.00041741932051158535, 'samples': 21213696, 'steps': 41432, 'loss/train': 2.8174750804901123} +03/05/2022 14:10:43 - INFO - codeparrot_training - Step 41433: {'lr': 0.00041741537940833247, 'samples': 21214208, 'steps': 41433, 'loss/train': 1.983985424041748} +03/05/2022 14:10:44 - INFO - codeparrot_training - Skipping example with length 338 (seq_length=1024) +03/05/2022 14:10:48 - INFO - codeparrot_training - Step 41434: {'lr': 0.00041741143822964476, 'samples': 21214720, 'steps': 41434, 'loss/train': 1.2514272928237915} +03/05/2022 14:10:51 - INFO - codeparrot_training - Step 41435: {'lr': 0.00041740749697552406, 'samples': 21215232, 'steps': 41435, 'loss/train': 1.0364766120910645} +03/05/2022 14:10:53 - INFO - codeparrot_training - Skipping example with length 271 (seq_length=1024) +03/05/2022 14:10:56 - INFO - codeparrot_training - Step 41436: {'lr': 0.0004174035556459721, 'samples': 21215744, 'steps': 41436, 'loss/train': 1.741824984550476} +03/05/2022 14:10:59 - INFO - codeparrot_training - Step 41437: {'lr': 0.0004173996142409907, 'samples': 21216256, 'steps': 41437, 'loss/train': 2.483654737472534} +03/05/2022 14:11:01 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/05/2022 14:11:05 - INFO - codeparrot_training - Step 41438: {'lr': 0.0004173956727605816, 'samples': 21216768, 'steps': 41438, 'loss/train': 2.31372332572937} +03/05/2022 14:11:08 - INFO - codeparrot_training - Step 41439: {'lr': 0.00041739173120474663, 'samples': 21217280, 'steps': 41439, 'loss/train': 1.8581352233886719} +03/05/2022 14:11:09 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/05/2022 14:11:13 - INFO - codeparrot_training - Step 41440: {'lr': 0.00041738778957348745, 'samples': 21217792, 'steps': 41440, 'loss/train': 2.4800143241882324} +03/05/2022 14:11:16 - INFO - codeparrot_training - Step 41441: {'lr': 0.00041738384786680596, 'samples': 21218304, 'steps': 41441, 'loss/train': 1.3973382711410522} +03/05/2022 14:11:18 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/05/2022 14:11:22 - INFO - codeparrot_training - Step 41442: {'lr': 0.0004173799060847039, 'samples': 21218816, 'steps': 41442, 'loss/train': 2.2493553161621094} +03/05/2022 14:11:25 - INFO - codeparrot_training - Step 41443: {'lr': 0.00041737596422718306, 'samples': 21219328, 'steps': 41443, 'loss/train': 2.1427581310272217} +03/05/2022 14:11:26 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/05/2022 14:11:30 - INFO - codeparrot_training - Step 41444: {'lr': 0.0004173720222942452, 'samples': 21219840, 'steps': 41444, 'loss/train': 1.7052302360534668} +03/05/2022 14:11:33 - INFO - codeparrot_training - Step 41445: {'lr': 0.000417368080285892, 'samples': 21220352, 'steps': 41445, 'loss/train': 1.7942179441452026} +03/05/2022 14:11:34 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/05/2022 14:11:38 - INFO - codeparrot_training - Step 41446: {'lr': 0.0004173641382021254, 'samples': 21220864, 'steps': 41446, 'loss/train': 2.3863537311553955} +03/05/2022 14:11:42 - INFO - codeparrot_training - Step 41447: {'lr': 0.00041736019604294704, 'samples': 21221376, 'steps': 41447, 'loss/train': 1.6136682033538818} +03/05/2022 14:11:42 - INFO - codeparrot_training - Skipping example with length 274 (seq_length=1024) +03/05/2022 14:11:47 - INFO - codeparrot_training - Step 41448: {'lr': 0.00041735625380835884, 'samples': 21221888, 'steps': 41448, 'loss/train': 1.6372950077056885} +03/05/2022 14:11:50 - INFO - codeparrot_training - Step 41449: {'lr': 0.0004173523114983624, 'samples': 21222400, 'steps': 41449, 'loss/train': 1.6027343273162842} +03/05/2022 14:11:50 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/05/2022 14:11:56 - INFO - codeparrot_training - Step 41450: {'lr': 0.0004173483691129597, 'samples': 21222912, 'steps': 41450, 'loss/train': 1.9475584030151367} +03/05/2022 14:11:59 - INFO - codeparrot_training - Step 41451: {'lr': 0.00041734442665215235, 'samples': 21223424, 'steps': 41451, 'loss/train': 1.8609545230865479} +03/05/2022 14:11:59 - INFO - codeparrot_training - Skipping example with length 361 (seq_length=1024) +03/05/2022 14:12:04 - INFO - codeparrot_training - Step 41452: {'lr': 0.00041734048411594214, 'samples': 21223936, 'steps': 41452, 'loss/train': 2.0061404705047607} +03/05/2022 14:12:07 - INFO - codeparrot_training - Step 41453: {'lr': 0.000417336541504331, 'samples': 21224448, 'steps': 41453, 'loss/train': 1.617042064666748} +03/05/2022 14:12:08 - INFO - codeparrot_training - Skipping example with length 361 (seq_length=1024) +03/05/2022 14:12:12 - INFO - codeparrot_training - Step 41454: {'lr': 0.0004173325988173205, 'samples': 21224960, 'steps': 41454, 'loss/train': 1.690610408782959} +03/05/2022 14:12:16 - INFO - codeparrot_training - Step 41455: {'lr': 0.00041732865605491256, 'samples': 21225472, 'steps': 41455, 'loss/train': 1.5892915725708008} +03/05/2022 14:12:16 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/05/2022 14:12:21 - INFO - codeparrot_training - Step 41456: {'lr': 0.00041732471321710886, 'samples': 21225984, 'steps': 41456, 'loss/train': 1.6462794542312622} +03/05/2022 14:12:24 - INFO - codeparrot_training - Step 41457: {'lr': 0.00041732077030391126, 'samples': 21226496, 'steps': 41457, 'loss/train': 2.1530699729919434} +03/05/2022 14:12:24 - INFO - codeparrot_training - Skipping example with length 673 (seq_length=1024) +03/05/2022 14:12:30 - INFO - codeparrot_training - Step 41458: {'lr': 0.00041731682731532154, 'samples': 21227008, 'steps': 41458, 'loss/train': 2.12203049659729} +03/05/2022 14:12:33 - INFO - codeparrot_training - Step 41459: {'lr': 0.0004173128842513414, 'samples': 21227520, 'steps': 41459, 'loss/train': 1.9235591888427734} +03/05/2022 14:12:34 - INFO - codeparrot_training - Skipping example with length 274 (seq_length=1024) +03/05/2022 14:12:38 - INFO - codeparrot_training - Step 41460: {'lr': 0.00041730894111197266, 'samples': 21228032, 'steps': 41460, 'loss/train': 2.042478084564209} +03/05/2022 14:12:41 - INFO - codeparrot_training - Step 41461: {'lr': 0.0004173049978972171, 'samples': 21228544, 'steps': 41461, 'loss/train': 1.2865082025527954} +03/05/2022 14:12:43 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/05/2022 14:12:47 - INFO - codeparrot_training - Step 41462: {'lr': 0.0004173010546070765, 'samples': 21229056, 'steps': 41462, 'loss/train': 2.117494821548462} +03/05/2022 14:12:50 - INFO - codeparrot_training - Step 41463: {'lr': 0.00041729711124155255, 'samples': 21229568, 'steps': 41463, 'loss/train': 2.097677707672119} +03/05/2022 14:12:51 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/05/2022 14:12:55 - INFO - codeparrot_training - Step 41464: {'lr': 0.0004172931678006472, 'samples': 21230080, 'steps': 41464, 'loss/train': 2.2862021923065186} +03/05/2022 14:12:58 - INFO - codeparrot_training - Step 41465: {'lr': 0.00041728922428436213, 'samples': 21230592, 'steps': 41465, 'loss/train': 2.2636687755584717} +03/05/2022 14:12:59 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/05/2022 14:13:04 - INFO - codeparrot_training - Step 41466: {'lr': 0.000417285280692699, 'samples': 21231104, 'steps': 41466, 'loss/train': 1.748808741569519} +03/05/2022 14:13:07 - INFO - codeparrot_training - Step 41467: {'lr': 0.00041728133702565985, 'samples': 21231616, 'steps': 41467, 'loss/train': 0.8206184506416321} +03/05/2022 14:13:08 - INFO - codeparrot_training - Skipping example with length 812 (seq_length=1024) +03/05/2022 14:13:12 - INFO - codeparrot_training - Step 41468: {'lr': 0.0004172773932832462, 'samples': 21232128, 'steps': 41468, 'loss/train': 0.5391926169395447} +03/05/2022 14:13:15 - INFO - codeparrot_training - Step 41469: {'lr': 0.00041727344946546, 'samples': 21232640, 'steps': 41469, 'loss/train': 1.8263237476348877} +03/05/2022 14:13:16 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/05/2022 14:13:21 - INFO - codeparrot_training - Step 41470: {'lr': 0.00041726950557230294, 'samples': 21233152, 'steps': 41470, 'loss/train': 2.835204601287842} +03/05/2022 14:13:24 - INFO - codeparrot_training - Step 41471: {'lr': 0.0004172655616037768, 'samples': 21233664, 'steps': 41471, 'loss/train': 1.359943151473999} +03/05/2022 14:13:25 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/05/2022 14:13:29 - INFO - codeparrot_training - Step 41472: {'lr': 0.0004172616175598835, 'samples': 21234176, 'steps': 41472, 'loss/train': 1.47471284866333} +03/05/2022 14:13:32 - INFO - codeparrot_training - Step 41473: {'lr': 0.00041725767344062453, 'samples': 21234688, 'steps': 41473, 'loss/train': 1.8852826356887817} +03/05/2022 14:13:34 - INFO - codeparrot_training - Skipping example with length 843 (seq_length=1024) +03/05/2022 14:13:38 - INFO - codeparrot_training - Step 41474: {'lr': 0.00041725372924600193, 'samples': 21235200, 'steps': 41474, 'loss/train': 2.075472831726074} +03/05/2022 14:13:41 - INFO - codeparrot_training - Step 41475: {'lr': 0.00041724978497601736, 'samples': 21235712, 'steps': 41475, 'loss/train': 2.257991075515747} +03/05/2022 14:13:42 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/05/2022 14:13:46 - INFO - codeparrot_training - Step 41476: {'lr': 0.0004172458406306726, 'samples': 21236224, 'steps': 41476, 'loss/train': 1.9495385885238647} +03/05/2022 14:13:49 - INFO - codeparrot_training - Step 41477: {'lr': 0.00041724189620996946, 'samples': 21236736, 'steps': 41477, 'loss/train': 2.5579681396484375} +03/05/2022 14:13:51 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/05/2022 14:13:55 - INFO - codeparrot_training - Step 41478: {'lr': 0.0004172379517139097, 'samples': 21237248, 'steps': 41478, 'loss/train': 1.9899598360061646} +03/05/2022 14:13:58 - INFO - codeparrot_training - Step 41479: {'lr': 0.0004172340071424951, 'samples': 21237760, 'steps': 41479, 'loss/train': 1.9186172485351562} +03/05/2022 14:13:59 - INFO - codeparrot_training - Skipping example with length 1016 (seq_length=1024) +03/05/2022 14:14:03 - INFO - codeparrot_training - Step 41480: {'lr': 0.00041723006249572744, 'samples': 21238272, 'steps': 41480, 'loss/train': 1.64205801486969} +03/05/2022 14:14:06 - INFO - codeparrot_training - Step 41481: {'lr': 0.00041722611777360844, 'samples': 21238784, 'steps': 41481, 'loss/train': 1.2513768672943115} +03/05/2022 14:14:07 - INFO - codeparrot_training - Skipping example with length 351 (seq_length=1024) +03/05/2022 14:14:12 - INFO - codeparrot_training - Step 41482: {'lr': 0.00041722217297614, 'samples': 21239296, 'steps': 41482, 'loss/train': 1.1852688789367676} +03/05/2022 14:14:15 - INFO - codeparrot_training - Step 41483: {'lr': 0.00041721822810332384, 'samples': 21239808, 'steps': 41483, 'loss/train': 1.959395170211792} +03/05/2022 14:14:16 - INFO - codeparrot_training - Skipping example with length 12 (seq_length=1024) +03/05/2022 14:14:20 - INFO - codeparrot_training - Step 41484: {'lr': 0.00041721428315516176, 'samples': 21240320, 'steps': 41484, 'loss/train': 1.6928373575210571} +03/05/2022 14:14:23 - INFO - codeparrot_training - Step 41485: {'lr': 0.00041721033813165543, 'samples': 21240832, 'steps': 41485, 'loss/train': 1.9689911603927612} +03/05/2022 14:14:26 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/05/2022 14:14:29 - INFO - codeparrot_training - Step 41486: {'lr': 0.0004172063930328067, 'samples': 21241344, 'steps': 41486, 'loss/train': 1.5693457126617432} +03/05/2022 14:14:32 - INFO - codeparrot_training - Step 41487: {'lr': 0.00041720244785861736, 'samples': 21241856, 'steps': 41487, 'loss/train': 1.6972384452819824} +03/05/2022 14:14:34 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/05/2022 14:14:37 - INFO - codeparrot_training - Step 41488: {'lr': 0.0004171985026090892, 'samples': 21242368, 'steps': 41488, 'loss/train': 0.12188681960105896} +03/05/2022 14:14:41 - INFO - codeparrot_training - Step 41489: {'lr': 0.00041719455728422394, 'samples': 21242880, 'steps': 41489, 'loss/train': 2.8541159629821777} +03/05/2022 14:14:44 - INFO - codeparrot_training - Step 41490: {'lr': 0.0004171906118840234, 'samples': 21243392, 'steps': 41490, 'loss/train': 2.2380781173706055} +03/05/2022 14:14:44 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/05/2022 14:14:49 - INFO - codeparrot_training - Step 41491: {'lr': 0.00041718666640848937, 'samples': 21243904, 'steps': 41491, 'loss/train': 2.174309253692627} +03/05/2022 14:14:52 - INFO - codeparrot_training - Step 41492: {'lr': 0.0004171827208576236, 'samples': 21244416, 'steps': 41492, 'loss/train': 1.4239755868911743} +03/05/2022 14:14:53 - INFO - codeparrot_training - Skipping example with length 720 (seq_length=1024) +03/05/2022 14:14:58 - INFO - codeparrot_training - Step 41493: {'lr': 0.00041717877523142786, 'samples': 21244928, 'steps': 41493, 'loss/train': 1.8029905557632446} +03/05/2022 14:15:01 - INFO - codeparrot_training - Step 41494: {'lr': 0.00041717482952990394, 'samples': 21245440, 'steps': 41494, 'loss/train': 0.7462776899337769} +03/05/2022 14:15:02 - INFO - codeparrot_training - Skipping example with length 559 (seq_length=1024) +03/05/2022 14:15:06 - INFO - codeparrot_training - Step 41495: {'lr': 0.00041717088375305367, 'samples': 21245952, 'steps': 41495, 'loss/train': 1.3760778903961182} +03/05/2022 14:15:09 - INFO - codeparrot_training - Step 41496: {'lr': 0.0004171669379008787, 'samples': 21246464, 'steps': 41496, 'loss/train': 1.112423062324524} +03/05/2022 14:15:10 - INFO - codeparrot_training - Skipping example with length 873 (seq_length=1024) +03/05/2022 14:15:14 - INFO - codeparrot_training - Step 41497: {'lr': 0.00041716299197338093, 'samples': 21246976, 'steps': 41497, 'loss/train': 2.2601466178894043} +03/05/2022 14:15:18 - INFO - codeparrot_training - Step 41498: {'lr': 0.0004171590459705622, 'samples': 21247488, 'steps': 41498, 'loss/train': 1.7729830741882324} +03/05/2022 14:15:18 - INFO - codeparrot_training - Skipping example with length 89 (seq_length=1024) +03/05/2022 14:15:23 - INFO - codeparrot_training - Step 41499: {'lr': 0.0004171550998924241, 'samples': 21248000, 'steps': 41499, 'loss/train': 1.9727226495742798} +03/05/2022 14:15:26 - INFO - codeparrot_training - Step 41500: {'lr': 0.0004171511537389684, 'samples': 21248512, 'steps': 41500, 'loss/train': 1.2623207569122314} +03/05/2022 14:15:27 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/05/2022 14:15:31 - INFO - codeparrot_training - Step 41501: {'lr': 0.0004171472075101971, 'samples': 21249024, 'steps': 41501, 'loss/train': 2.5038204193115234} +03/05/2022 14:15:35 - INFO - codeparrot_training - Step 41502: {'lr': 0.0004171432612061117, 'samples': 21249536, 'steps': 41502, 'loss/train': 2.6890692710876465} +03/05/2022 14:15:35 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/05/2022 14:15:40 - INFO - codeparrot_training - Step 41503: {'lr': 0.00041713931482671425, 'samples': 21250048, 'steps': 41503, 'loss/train': 2.242145299911499} +03/05/2022 14:15:43 - INFO - codeparrot_training - Step 41504: {'lr': 0.0004171353683720064, 'samples': 21250560, 'steps': 41504, 'loss/train': 1.1304399967193604} +03/05/2022 14:15:44 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/05/2022 14:15:48 - INFO - codeparrot_training - Step 41505: {'lr': 0.00041713142184198994, 'samples': 21251072, 'steps': 41505, 'loss/train': 0.5512522459030151} +03/05/2022 14:15:51 - INFO - codeparrot_training - Step 41506: {'lr': 0.0004171274752366665, 'samples': 21251584, 'steps': 41506, 'loss/train': 1.8599876165390015} +03/05/2022 14:15:52 - INFO - codeparrot_training - Skipping example with length 1016 (seq_length=1024) +03/05/2022 14:15:57 - INFO - codeparrot_training - Step 41507: {'lr': 0.00041712352855603817, 'samples': 21252096, 'steps': 41507, 'loss/train': 2.174855947494507} +03/05/2022 14:16:00 - INFO - codeparrot_training - Step 41508: {'lr': 0.00041711958180010644, 'samples': 21252608, 'steps': 41508, 'loss/train': 1.5455989837646484} +03/05/2022 14:16:00 - INFO - codeparrot_training - Skipping example with length 580 (seq_length=1024) +03/05/2022 14:16:05 - INFO - codeparrot_training - Step 41509: {'lr': 0.0004171156349688733, 'samples': 21253120, 'steps': 41509, 'loss/train': 2.3497087955474854} +03/05/2022 14:16:08 - INFO - codeparrot_training - Step 41510: {'lr': 0.0004171116880623404, 'samples': 21253632, 'steps': 41510, 'loss/train': 1.3743873834609985} +03/05/2022 14:16:09 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/05/2022 14:16:14 - INFO - codeparrot_training - Step 41511: {'lr': 0.0004171077410805095, 'samples': 21254144, 'steps': 41511, 'loss/train': 1.4136096239089966} +03/05/2022 14:16:17 - INFO - codeparrot_training - Step 41512: {'lr': 0.0004171037940233825, 'samples': 21254656, 'steps': 41512, 'loss/train': 1.9292138814926147} +03/05/2022 14:16:18 - INFO - codeparrot_training - Skipping example with length 612 (seq_length=1024) +03/05/2022 14:16:22 - INFO - codeparrot_training - Step 41513: {'lr': 0.0004170998468909611, 'samples': 21255168, 'steps': 41513, 'loss/train': 0.7834359407424927} +03/05/2022 14:16:25 - INFO - codeparrot_training - Step 41514: {'lr': 0.00041709589968324704, 'samples': 21255680, 'steps': 41514, 'loss/train': 1.5129069089889526} +03/05/2022 14:16:27 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/05/2022 14:16:31 - INFO - codeparrot_training - Step 41515: {'lr': 0.00041709195240024224, 'samples': 21256192, 'steps': 41515, 'loss/train': 1.92372465133667} +03/05/2022 14:16:34 - INFO - codeparrot_training - Step 41516: {'lr': 0.0004170880050419483, 'samples': 21256704, 'steps': 41516, 'loss/train': 1.8885161876678467} +03/05/2022 14:16:35 - INFO - codeparrot_training - Skipping example with length 86 (seq_length=1024) +03/05/2022 14:16:39 - INFO - codeparrot_training - Step 41517: {'lr': 0.0004170840576083671, 'samples': 21257216, 'steps': 41517, 'loss/train': 2.114734172821045} +03/05/2022 14:16:42 - INFO - codeparrot_training - Step 41518: {'lr': 0.00041708011009950044, 'samples': 21257728, 'steps': 41518, 'loss/train': 1.8671823740005493} +03/05/2022 14:16:44 - INFO - codeparrot_training - Skipping example with length 625 (seq_length=1024) +03/05/2022 14:16:48 - INFO - codeparrot_training - Step 41519: {'lr': 0.00041707616251535, 'samples': 21258240, 'steps': 41519, 'loss/train': 1.6012461185455322} +03/05/2022 14:16:51 - INFO - codeparrot_training - Step 41520: {'lr': 0.0004170722148559176, 'samples': 21258752, 'steps': 41520, 'loss/train': 1.7984524965286255} +03/05/2022 14:16:52 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/05/2022 14:16:56 - INFO - codeparrot_training - Step 41521: {'lr': 0.0004170682671212051, 'samples': 21259264, 'steps': 41521, 'loss/train': 1.2455508708953857} +03/05/2022 14:16:59 - INFO - codeparrot_training - Step 41522: {'lr': 0.00041706431931121416, 'samples': 21259776, 'steps': 41522, 'loss/train': 1.3203041553497314} +03/05/2022 14:17:01 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/05/2022 14:17:05 - INFO - codeparrot_training - Step 41523: {'lr': 0.00041706037142594666, 'samples': 21260288, 'steps': 41523, 'loss/train': 1.9387435913085938} +03/05/2022 14:17:08 - INFO - codeparrot_training - Step 41524: {'lr': 0.00041705642346540436, 'samples': 21260800, 'steps': 41524, 'loss/train': 2.2522120475769043} +03/05/2022 14:17:09 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/05/2022 14:17:13 - INFO - codeparrot_training - Step 41525: {'lr': 0.00041705247542958904, 'samples': 21261312, 'steps': 41525, 'loss/train': 0.49707064032554626} +03/05/2022 14:17:16 - INFO - codeparrot_training - Step 41526: {'lr': 0.00041704852731850234, 'samples': 21261824, 'steps': 41526, 'loss/train': 2.1818668842315674} +03/05/2022 14:17:18 - INFO - codeparrot_training - Skipping example with length 199 (seq_length=1024) +03/05/2022 14:17:21 - INFO - codeparrot_training - Step 41527: {'lr': 0.0004170445791321462, 'samples': 21262336, 'steps': 41527, 'loss/train': 1.3830277919769287} +03/05/2022 14:17:25 - INFO - codeparrot_training - Step 41528: {'lr': 0.00041704063087052236, 'samples': 21262848, 'steps': 41528, 'loss/train': 1.6878575086593628} +03/05/2022 14:17:26 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/05/2022 14:17:30 - INFO - codeparrot_training - Step 41529: {'lr': 0.0004170366825336326, 'samples': 21263360, 'steps': 41529, 'loss/train': 2.3182194232940674} +03/05/2022 14:17:33 - INFO - codeparrot_training - Step 41530: {'lr': 0.0004170327341214787, 'samples': 21263872, 'steps': 41530, 'loss/train': 1.2810611724853516} +03/05/2022 14:17:35 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/05/2022 14:17:38 - INFO - codeparrot_training - Step 41531: {'lr': 0.00041702878563406237, 'samples': 21264384, 'steps': 41531, 'loss/train': 1.500373125076294} +03/05/2022 14:17:41 - INFO - codeparrot_training - Step 41532: {'lr': 0.0004170248370713855, 'samples': 21264896, 'steps': 41532, 'loss/train': 1.6757190227508545} +03/05/2022 14:17:43 - INFO - codeparrot_training - Skipping example with length 555 (seq_length=1024) +03/05/2022 14:17:47 - INFO - codeparrot_training - Step 41533: {'lr': 0.0004170208884334498, 'samples': 21265408, 'steps': 41533, 'loss/train': 1.5885404348373413} +03/05/2022 14:17:50 - INFO - codeparrot_training - Step 41534: {'lr': 0.000417016939720257, 'samples': 21265920, 'steps': 41534, 'loss/train': 1.7236154079437256} +03/05/2022 14:17:52 - INFO - codeparrot_training - Skipping example with length 877 (seq_length=1024) +03/05/2022 14:17:55 - INFO - codeparrot_training - Step 41535: {'lr': 0.000417012990931809, 'samples': 21266432, 'steps': 41535, 'loss/train': 1.4442216157913208} +03/05/2022 14:17:58 - INFO - codeparrot_training - Step 41536: {'lr': 0.00041700904206810755, 'samples': 21266944, 'steps': 41536, 'loss/train': 1.9169870615005493} +03/05/2022 14:18:00 - INFO - codeparrot_training - Skipping example with length 915 (seq_length=1024) +03/05/2022 14:18:04 - INFO - codeparrot_training - Step 41537: {'lr': 0.00041700509312915437, 'samples': 21267456, 'steps': 41537, 'loss/train': 1.1122479438781738} +03/05/2022 14:18:07 - INFO - codeparrot_training - Step 41538: {'lr': 0.0004170011441149513, 'samples': 21267968, 'steps': 41538, 'loss/train': 1.8567417860031128} +03/05/2022 14:18:08 - INFO - codeparrot_training - Skipping example with length 830 (seq_length=1024) +03/05/2022 14:18:12 - INFO - codeparrot_training - Step 41539: {'lr': 0.0004169971950255001, 'samples': 21268480, 'steps': 41539, 'loss/train': 1.155175805091858} +03/05/2022 14:18:15 - INFO - codeparrot_training - Step 41540: {'lr': 0.0004169932458608025, 'samples': 21268992, 'steps': 41540, 'loss/train': 1.2085416316986084} +03/05/2022 14:18:17 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/05/2022 14:18:20 - INFO - codeparrot_training - Step 41541: {'lr': 0.00041698929662086035, 'samples': 21269504, 'steps': 41541, 'loss/train': 2.4421205520629883} +03/05/2022 14:18:24 - INFO - codeparrot_training - Step 41542: {'lr': 0.0004169853473056754, 'samples': 21270016, 'steps': 41542, 'loss/train': 2.1432294845581055} +03/05/2022 14:18:25 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/05/2022 14:18:29 - INFO - codeparrot_training - Step 41543: {'lr': 0.0004169813979152494, 'samples': 21270528, 'steps': 41543, 'loss/train': 1.0895339250564575} +03/05/2022 14:18:32 - INFO - codeparrot_training - Step 41544: {'lr': 0.0004169774484495841, 'samples': 21271040, 'steps': 41544, 'loss/train': 1.9807188510894775} +03/05/2022 14:18:33 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/05/2022 14:18:38 - INFO - codeparrot_training - Step 41545: {'lr': 0.00041697349890868146, 'samples': 21271552, 'steps': 41545, 'loss/train': 2.4010090827941895} +03/05/2022 14:18:41 - INFO - codeparrot_training - Step 41546: {'lr': 0.0004169695492925431, 'samples': 21272064, 'steps': 41546, 'loss/train': 1.3027721643447876} +03/05/2022 14:18:42 - INFO - codeparrot_training - Skipping example with length 803 (seq_length=1024) +03/05/2022 14:18:46 - INFO - codeparrot_training - Step 41547: {'lr': 0.0004169655996011708, 'samples': 21272576, 'steps': 41547, 'loss/train': 1.824916124343872} +03/05/2022 14:18:49 - INFO - codeparrot_training - Step 41548: {'lr': 0.0004169616498345664, 'samples': 21273088, 'steps': 41548, 'loss/train': 1.6445611715316772} +03/05/2022 14:18:50 - INFO - codeparrot_training - Skipping example with length 462 (seq_length=1024) +03/05/2022 14:18:54 - INFO - codeparrot_training - Step 41549: {'lr': 0.0004169576999927317, 'samples': 21273600, 'steps': 41549, 'loss/train': 1.4714884757995605} +03/05/2022 14:18:58 - INFO - codeparrot_training - Step 41550: {'lr': 0.00041695375007566837, 'samples': 21274112, 'steps': 41550, 'loss/train': 2.3362843990325928} +03/05/2022 14:18:59 - INFO - codeparrot_training - Skipping example with length 626 (seq_length=1024) +03/05/2022 14:19:03 - INFO - codeparrot_training - Step 41551: {'lr': 0.00041694980008337825, 'samples': 21274624, 'steps': 41551, 'loss/train': 1.9699262380599976} +03/05/2022 14:19:06 - INFO - codeparrot_training - Step 41552: {'lr': 0.0004169458500158632, 'samples': 21275136, 'steps': 41552, 'loss/train': 1.3705567121505737} +03/05/2022 14:19:07 - INFO - codeparrot_training - Skipping example with length 737 (seq_length=1024) +03/05/2022 14:19:11 - INFO - codeparrot_training - Step 41553: {'lr': 0.0004169418998731249, 'samples': 21275648, 'steps': 41553, 'loss/train': 1.479076623916626} +03/05/2022 14:19:15 - INFO - codeparrot_training - Step 41554: {'lr': 0.00041693794965516514, 'samples': 21276160, 'steps': 41554, 'loss/train': 2.1832082271575928} +03/05/2022 14:19:16 - INFO - codeparrot_training - Skipping example with length 481 (seq_length=1024) +03/05/2022 14:19:20 - INFO - codeparrot_training - Step 41555: {'lr': 0.0004169339993619857, 'samples': 21276672, 'steps': 41555, 'loss/train': 1.6583421230316162} +03/05/2022 14:19:23 - INFO - codeparrot_training - Step 41556: {'lr': 0.0004169300489935884, 'samples': 21277184, 'steps': 41556, 'loss/train': 1.3976430892944336} +03/05/2022 14:19:24 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/05/2022 14:19:28 - INFO - codeparrot_training - Step 41557: {'lr': 0.000416926098549975, 'samples': 21277696, 'steps': 41557, 'loss/train': 1.6029492616653442} +03/05/2022 14:19:31 - INFO - codeparrot_training - Step 41558: {'lr': 0.00041692214803114725, 'samples': 21278208, 'steps': 41558, 'loss/train': 2.44087290763855} +03/05/2022 14:19:32 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/05/2022 14:19:37 - INFO - codeparrot_training - Step 41559: {'lr': 0.00041691819743710704, 'samples': 21278720, 'steps': 41559, 'loss/train': 1.774378776550293} +03/05/2022 14:19:40 - INFO - codeparrot_training - Step 41560: {'lr': 0.00041691424676785593, 'samples': 21279232, 'steps': 41560, 'loss/train': 1.5088415145874023} +03/05/2022 14:19:41 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/05/2022 14:19:45 - INFO - codeparrot_training - Step 41561: {'lr': 0.00041691029602339595, 'samples': 21279744, 'steps': 41561, 'loss/train': 1.9280309677124023} +03/05/2022 14:19:48 - INFO - codeparrot_training - Step 41562: {'lr': 0.00041690634520372865, 'samples': 21280256, 'steps': 41562, 'loss/train': 1.6401698589324951} +03/05/2022 14:19:49 - INFO - codeparrot_training - Skipping example with length 13 (seq_length=1024) +03/05/2022 14:19:53 - INFO - codeparrot_training - Step 41563: {'lr': 0.000416902394308856, 'samples': 21280768, 'steps': 41563, 'loss/train': 2.0186192989349365} +03/05/2022 14:19:57 - INFO - codeparrot_training - Step 41564: {'lr': 0.00041689844333877966, 'samples': 21281280, 'steps': 41564, 'loss/train': 2.293422222137451} +03/05/2022 14:19:58 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/05/2022 14:20:02 - INFO - codeparrot_training - Step 41565: {'lr': 0.00041689449229350155, 'samples': 21281792, 'steps': 41565, 'loss/train': 2.0175814628601074} +03/05/2022 14:20:05 - INFO - codeparrot_training - Step 41566: {'lr': 0.00041689054117302333, 'samples': 21282304, 'steps': 41566, 'loss/train': 2.4216039180755615} +03/05/2022 14:20:06 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/05/2022 14:20:10 - INFO - codeparrot_training - Step 41567: {'lr': 0.00041688658997734675, 'samples': 21282816, 'steps': 41567, 'loss/train': 1.745259404182434} +03/05/2022 14:20:13 - INFO - codeparrot_training - Step 41568: {'lr': 0.0004168826387064737, 'samples': 21283328, 'steps': 41568, 'loss/train': 0.49325740337371826} +03/05/2022 14:20:14 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/05/2022 14:20:19 - INFO - codeparrot_training - Step 41569: {'lr': 0.00041687868736040593, 'samples': 21283840, 'steps': 41569, 'loss/train': 1.3871482610702515} +03/05/2022 14:20:22 - INFO - codeparrot_training - Step 41570: {'lr': 0.0004168747359391451, 'samples': 21284352, 'steps': 41570, 'loss/train': 1.9838333129882812} +03/05/2022 14:20:23 - INFO - codeparrot_training - Skipping example with length 726 (seq_length=1024) +03/05/2022 14:20:27 - INFO - codeparrot_training - Step 41571: {'lr': 0.00041687078444269316, 'samples': 21284864, 'steps': 41571, 'loss/train': 1.7342970371246338} +03/05/2022 14:20:30 - INFO - codeparrot_training - Step 41572: {'lr': 0.0004168668328710518, 'samples': 21285376, 'steps': 41572, 'loss/train': 1.462916374206543} +03/05/2022 14:20:31 - INFO - codeparrot_training - Skipping example with length 210 (seq_length=1024) +03/05/2022 14:20:36 - INFO - codeparrot_training - Step 41573: {'lr': 0.0004168628812242228, 'samples': 21285888, 'steps': 41573, 'loss/train': 1.8578145503997803} +03/05/2022 14:20:39 - INFO - codeparrot_training - Step 41574: {'lr': 0.00041685892950220804, 'samples': 21286400, 'steps': 41574, 'loss/train': 1.8084808588027954} +03/05/2022 14:20:40 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/05/2022 14:20:45 - INFO - codeparrot_training - Step 41575: {'lr': 0.0004168549777050091, 'samples': 21286912, 'steps': 41575, 'loss/train': 1.7415516376495361} +03/05/2022 14:20:48 - INFO - codeparrot_training - Step 41576: {'lr': 0.000416851025832628, 'samples': 21287424, 'steps': 41576, 'loss/train': 1.44538414478302} +03/05/2022 14:20:50 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/05/2022 14:20:53 - INFO - codeparrot_training - Step 41577: {'lr': 0.0004168470738850664, 'samples': 21287936, 'steps': 41577, 'loss/train': 1.8306907415390015} +03/05/2022 14:20:56 - INFO - codeparrot_training - Step 41578: {'lr': 0.00041684312186232597, 'samples': 21288448, 'steps': 41578, 'loss/train': 1.0058157444000244} +03/05/2022 14:20:58 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/05/2022 14:21:01 - INFO - codeparrot_training - Step 41579: {'lr': 0.0004168391697644087, 'samples': 21288960, 'steps': 41579, 'loss/train': 1.6646203994750977} +03/05/2022 14:21:05 - INFO - codeparrot_training - Step 41580: {'lr': 0.0004168352175913163, 'samples': 21289472, 'steps': 41580, 'loss/train': 2.1760144233703613} +03/05/2022 14:21:06 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/05/2022 14:21:10 - INFO - codeparrot_training - Step 41581: {'lr': 0.00041683126534305037, 'samples': 21289984, 'steps': 41581, 'loss/train': 1.388043761253357} +03/05/2022 14:21:13 - INFO - codeparrot_training - Step 41582: {'lr': 0.000416827313019613, 'samples': 21290496, 'steps': 41582, 'loss/train': 1.959945797920227} +03/05/2022 14:21:15 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/05/2022 14:21:18 - INFO - codeparrot_training - Step 41583: {'lr': 0.0004168233606210058, 'samples': 21291008, 'steps': 41583, 'loss/train': 1.571984052658081} +03/05/2022 14:21:22 - INFO - codeparrot_training - Step 41584: {'lr': 0.0004168194081472305, 'samples': 21291520, 'steps': 41584, 'loss/train': 2.443671226501465} +03/05/2022 14:21:23 - INFO - codeparrot_training - Skipping example with length 289 (seq_length=1024) +03/05/2022 14:21:27 - INFO - codeparrot_training - Step 41585: {'lr': 0.000416815455598289, 'samples': 21292032, 'steps': 41585, 'loss/train': 2.1880016326904297} +03/05/2022 14:21:30 - INFO - codeparrot_training - Step 41586: {'lr': 0.000416811502974183, 'samples': 21292544, 'steps': 41586, 'loss/train': 1.8529448509216309} +03/05/2022 14:21:31 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/05/2022 14:21:35 - INFO - codeparrot_training - Step 41587: {'lr': 0.00041680755027491433, 'samples': 21293056, 'steps': 41587, 'loss/train': 2.167682409286499} +03/05/2022 14:21:38 - INFO - codeparrot_training - Step 41588: {'lr': 0.0004168035975004847, 'samples': 21293568, 'steps': 41588, 'loss/train': 1.6651618480682373} +03/05/2022 14:21:40 - INFO - codeparrot_training - Skipping example with length 637 (seq_length=1024) +03/05/2022 14:21:44 - INFO - codeparrot_training - Step 41589: {'lr': 0.00041679964465089596, 'samples': 21294080, 'steps': 41589, 'loss/train': 5.998101234436035} +03/05/2022 14:21:47 - INFO - codeparrot_training - Step 41590: {'lr': 0.00041679569172614996, 'samples': 21294592, 'steps': 41590, 'loss/train': 0.9901602268218994} +03/05/2022 14:21:49 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/05/2022 14:21:52 - INFO - codeparrot_training - Step 41591: {'lr': 0.0004167917387262483, 'samples': 21295104, 'steps': 41591, 'loss/train': 1.5477582216262817} +03/05/2022 14:21:55 - INFO - codeparrot_training - Step 41592: {'lr': 0.0004167877856511929, 'samples': 21295616, 'steps': 41592, 'loss/train': 1.352095127105713} +03/05/2022 14:21:58 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/05/2022 14:22:01 - INFO - codeparrot_training - Step 41593: {'lr': 0.0004167838325009855, 'samples': 21296128, 'steps': 41593, 'loss/train': 1.580986738204956} +03/05/2022 14:22:04 - INFO - codeparrot_training - Step 41594: {'lr': 0.0004167798792756279, 'samples': 21296640, 'steps': 41594, 'loss/train': 1.9818189144134521} +03/05/2022 14:22:06 - INFO - codeparrot_training - Skipping example with length 928 (seq_length=1024) +03/05/2022 14:22:09 - INFO - codeparrot_training - Step 41595: {'lr': 0.0004167759259751218, 'samples': 21297152, 'steps': 41595, 'loss/train': 1.507184624671936} +03/05/2022 14:22:12 - INFO - codeparrot_training - Step 41596: {'lr': 0.0004167719725994691, 'samples': 21297664, 'steps': 41596, 'loss/train': 1.7664644718170166} +03/05/2022 14:22:15 - INFO - codeparrot_training - Skipping example with length 480 (seq_length=1024) +03/05/2022 14:22:18 - INFO - codeparrot_training - Step 41597: {'lr': 0.00041676801914867145, 'samples': 21298176, 'steps': 41597, 'loss/train': 1.2250535488128662} +03/05/2022 14:22:21 - INFO - codeparrot_training - Step 41598: {'lr': 0.00041676406562273074, 'samples': 21298688, 'steps': 41598, 'loss/train': 2.519519805908203} +03/05/2022 14:22:24 - INFO - codeparrot_training - Skipping example with length 73 (seq_length=1024) +03/05/2022 14:22:26 - INFO - codeparrot_training - Step 41599: {'lr': 0.00041676011202164875, 'samples': 21299200, 'steps': 41599, 'loss/train': 2.1665432453155518} +03/05/2022 14:22:29 - INFO - codeparrot_training - Step 41600: {'lr': 0.00041675615834542716, 'samples': 21299712, 'steps': 41600, 'loss/train': 1.8192920684814453} +03/05/2022 14:22:32 - INFO - codeparrot_training - Skipping example with length 671 (seq_length=1024) +03/05/2022 14:22:34 - INFO - codeparrot_training - Step 41601: {'lr': 0.0004167522045940678, 'samples': 21300224, 'steps': 41601, 'loss/train': 1.7899430990219116} +03/05/2022 14:22:38 - INFO - codeparrot_training - Step 41602: {'lr': 0.0004167482507675726, 'samples': 21300736, 'steps': 41602, 'loss/train': 0.9306067228317261} +03/05/2022 14:22:40 - INFO - codeparrot_training - Skipping example with length 452 (seq_length=1024) +03/05/2022 14:22:43 - INFO - codeparrot_training - Step 41603: {'lr': 0.0004167442968659431, 'samples': 21301248, 'steps': 41603, 'loss/train': 2.0426535606384277} +03/05/2022 14:22:46 - INFO - codeparrot_training - Step 41604: {'lr': 0.0004167403428891812, 'samples': 21301760, 'steps': 41604, 'loss/train': 0.8324275612831116} +03/05/2022 14:22:49 - INFO - codeparrot_training - Skipping example with length 743 (seq_length=1024) +03/05/2022 14:22:51 - INFO - codeparrot_training - Step 41605: {'lr': 0.00041673638883728877, 'samples': 21302272, 'steps': 41605, 'loss/train': 2.3379969596862793} +03/05/2022 14:22:55 - INFO - codeparrot_training - Step 41606: {'lr': 0.00041673243471026746, 'samples': 21302784, 'steps': 41606, 'loss/train': 1.4243279695510864} +03/05/2022 14:22:57 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/05/2022 14:23:00 - INFO - codeparrot_training - Step 41607: {'lr': 0.000416728480508119, 'samples': 21303296, 'steps': 41607, 'loss/train': 1.5020380020141602} +03/05/2022 14:23:03 - INFO - codeparrot_training - Step 41608: {'lr': 0.00041672452623084535, 'samples': 21303808, 'steps': 41608, 'loss/train': 1.3918269872665405} +03/05/2022 14:23:06 - INFO - codeparrot_training - Skipping example with length 166 (seq_length=1024) +03/05/2022 14:23:08 - INFO - codeparrot_training - Step 41609: {'lr': 0.0004167205718784481, 'samples': 21304320, 'steps': 41609, 'loss/train': 1.3375792503356934} +03/05/2022 14:23:12 - INFO - codeparrot_training - Step 41610: {'lr': 0.0004167166174509293, 'samples': 21304832, 'steps': 41610, 'loss/train': 1.8383891582489014} +03/05/2022 14:23:14 - INFO - codeparrot_training - Skipping example with length 550 (seq_length=1024) +03/05/2022 14:23:17 - INFO - codeparrot_training - Step 41611: {'lr': 0.00041671266294829036, 'samples': 21305344, 'steps': 41611, 'loss/train': 2.3257861137390137} +03/05/2022 14:23:20 - INFO - codeparrot_training - Step 41612: {'lr': 0.0004167087083705334, 'samples': 21305856, 'steps': 41612, 'loss/train': 1.3263678550720215} +03/05/2022 14:23:23 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/05/2022 14:23:25 - INFO - codeparrot_training - Step 41613: {'lr': 0.00041670475371766, 'samples': 21306368, 'steps': 41613, 'loss/train': 1.716586947441101} +03/05/2022 14:23:28 - INFO - codeparrot_training - Step 41614: {'lr': 0.0004167007989896721, 'samples': 21306880, 'steps': 41614, 'loss/train': 0.9858690500259399} +03/05/2022 14:23:31 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/05/2022 14:23:34 - INFO - codeparrot_training - Step 41615: {'lr': 0.0004166968441865714, 'samples': 21307392, 'steps': 41615, 'loss/train': 2.5312905311584473} +03/05/2022 14:23:37 - INFO - codeparrot_training - Step 41616: {'lr': 0.00041669288930835957, 'samples': 21307904, 'steps': 41616, 'loss/train': 1.8661984205245972} +03/05/2022 14:23:40 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/05/2022 14:23:42 - INFO - codeparrot_training - Step 41617: {'lr': 0.0004166889343550385, 'samples': 21308416, 'steps': 41617, 'loss/train': 2.1792097091674805} +03/05/2022 14:23:45 - INFO - codeparrot_training - Step 41618: {'lr': 0.00041668497932661005, 'samples': 21308928, 'steps': 41618, 'loss/train': 1.7360122203826904} +03/05/2022 14:23:48 - INFO - codeparrot_training - Skipping example with length 272 (seq_length=1024) +03/05/2022 14:23:51 - INFO - codeparrot_training - Step 41619: {'lr': 0.00041668102422307593, 'samples': 21309440, 'steps': 41619, 'loss/train': 1.0246167182922363} +03/05/2022 14:23:54 - INFO - codeparrot_training - Step 41620: {'lr': 0.0004166770690444378, 'samples': 21309952, 'steps': 41620, 'loss/train': 1.28842294216156} +03/05/2022 14:23:56 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/05/2022 14:23:59 - INFO - codeparrot_training - Step 41621: {'lr': 0.0004166731137906976, 'samples': 21310464, 'steps': 41621, 'loss/train': 2.127061128616333} +03/05/2022 14:24:02 - INFO - codeparrot_training - Step 41622: {'lr': 0.0004166691584618572, 'samples': 21310976, 'steps': 41622, 'loss/train': 1.4796943664550781} +03/05/2022 14:24:04 - INFO - codeparrot_training - Skipping example with length 233 (seq_length=1024) +03/05/2022 14:24:07 - INFO - codeparrot_training - Step 41623: {'lr': 0.00041666520305791806, 'samples': 21311488, 'steps': 41623, 'loss/train': 1.7269312143325806} +03/05/2022 14:24:11 - INFO - codeparrot_training - Step 41624: {'lr': 0.00041666124757888223, 'samples': 21312000, 'steps': 41624, 'loss/train': 2.2671031951904297} +03/05/2022 14:24:13 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/05/2022 14:24:16 - INFO - codeparrot_training - Step 41625: {'lr': 0.0004166572920247514, 'samples': 21312512, 'steps': 41625, 'loss/train': 1.7755485773086548} +03/05/2022 14:24:19 - INFO - codeparrot_training - Step 41626: {'lr': 0.0004166533363955274, 'samples': 21313024, 'steps': 41626, 'loss/train': 1.702317476272583} +03/05/2022 14:24:21 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/05/2022 14:24:24 - INFO - codeparrot_training - Step 41627: {'lr': 0.00041664938069121195, 'samples': 21313536, 'steps': 41627, 'loss/train': 1.6657503843307495} +03/05/2022 14:24:28 - INFO - codeparrot_training - Step 41628: {'lr': 0.00041664542491180685, 'samples': 21314048, 'steps': 41628, 'loss/train': 2.1434874534606934} +03/05/2022 14:24:30 - INFO - codeparrot_training - Skipping example with length 319 (seq_length=1024) +03/05/2022 14:24:33 - INFO - codeparrot_training - Step 41629: {'lr': 0.0004166414690573139, 'samples': 21314560, 'steps': 41629, 'loss/train': 1.9638348817825317} +03/05/2022 14:24:36 - INFO - codeparrot_training - Step 41630: {'lr': 0.0004166375131277349, 'samples': 21315072, 'steps': 41630, 'loss/train': 1.6797254085540771} +03/05/2022 14:24:38 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/05/2022 14:24:41 - INFO - codeparrot_training - Step 41631: {'lr': 0.0004166335571230716, 'samples': 21315584, 'steps': 41631, 'loss/train': 2.1227877140045166} +03/05/2022 14:24:44 - INFO - codeparrot_training - Step 41632: {'lr': 0.0004166296010433258, 'samples': 21316096, 'steps': 41632, 'loss/train': 0.8448343873023987} +03/05/2022 14:24:47 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/05/2022 14:24:50 - INFO - codeparrot_training - Step 41633: {'lr': 0.00041662564488849927, 'samples': 21316608, 'steps': 41633, 'loss/train': 1.7420042753219604} +03/05/2022 14:24:53 - INFO - codeparrot_training - Step 41634: {'lr': 0.00041662168865859374, 'samples': 21317120, 'steps': 41634, 'loss/train': 2.223531723022461} +03/05/2022 14:24:55 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/05/2022 14:24:58 - INFO - codeparrot_training - Step 41635: {'lr': 0.0004166177323536111, 'samples': 21317632, 'steps': 41635, 'loss/train': 1.6889363527297974} +03/05/2022 14:25:01 - INFO - codeparrot_training - Step 41636: {'lr': 0.000416613775973553, 'samples': 21318144, 'steps': 41636, 'loss/train': 1.4919371604919434} +03/05/2022 14:25:03 - INFO - codeparrot_training - Skipping example with length 916 (seq_length=1024) +03/05/2022 14:25:07 - INFO - codeparrot_training - Step 41637: {'lr': 0.0004166098195184214, 'samples': 21318656, 'steps': 41637, 'loss/train': 1.7616660594940186} +03/05/2022 14:25:10 - INFO - codeparrot_training - Step 41638: {'lr': 0.000416605862988218, 'samples': 21319168, 'steps': 41638, 'loss/train': 2.473755359649658} +03/05/2022 14:25:12 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/05/2022 14:25:15 - INFO - codeparrot_training - Step 41639: {'lr': 0.00041660190638294456, 'samples': 21319680, 'steps': 41639, 'loss/train': 2.1468098163604736} +03/05/2022 14:25:19 - INFO - codeparrot_training - Step 41640: {'lr': 0.0004165979497026028, 'samples': 21320192, 'steps': 41640, 'loss/train': 1.4655680656433105} +03/05/2022 14:25:21 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/05/2022 14:25:24 - INFO - codeparrot_training - Step 41641: {'lr': 0.00041659399294719456, 'samples': 21320704, 'steps': 41641, 'loss/train': 1.7440673112869263} +03/05/2022 14:25:27 - INFO - codeparrot_training - Step 41642: {'lr': 0.00041659003611672175, 'samples': 21321216, 'steps': 41642, 'loss/train': 1.5244495868682861} +03/05/2022 14:25:29 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/05/2022 14:25:32 - INFO - codeparrot_training - Step 41643: {'lr': 0.000416586079211186, 'samples': 21321728, 'steps': 41643, 'loss/train': 1.8985596895217896} +03/05/2022 14:25:35 - INFO - codeparrot_training - Step 41644: {'lr': 0.0004165821222305891, 'samples': 21322240, 'steps': 41644, 'loss/train': 0.8429443836212158} +03/05/2022 14:25:38 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/05/2022 14:25:41 - INFO - codeparrot_training - Step 41645: {'lr': 0.00041657816517493284, 'samples': 21322752, 'steps': 41645, 'loss/train': 2.8069746494293213} +03/05/2022 14:25:44 - INFO - codeparrot_training - Step 41646: {'lr': 0.00041657420804421907, 'samples': 21323264, 'steps': 41646, 'loss/train': 1.697685956954956} +03/05/2022 14:25:46 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/05/2022 14:25:49 - INFO - codeparrot_training - Step 41647: {'lr': 0.00041657025083844957, 'samples': 21323776, 'steps': 41647, 'loss/train': 2.434940814971924} +03/05/2022 14:25:53 - INFO - codeparrot_training - Step 41648: {'lr': 0.00041656629355762607, 'samples': 21324288, 'steps': 41648, 'loss/train': 2.075854539871216} +03/05/2022 14:25:55 - INFO - codeparrot_training - Skipping example with length 737 (seq_length=1024) +03/05/2022 14:25:58 - INFO - codeparrot_training - Step 41649: {'lr': 0.00041656233620175035, 'samples': 21324800, 'steps': 41649, 'loss/train': 0.4990417957305908} +03/05/2022 14:26:01 - INFO - codeparrot_training - Step 41650: {'lr': 0.0004165583787708242, 'samples': 21325312, 'steps': 41650, 'loss/train': 1.6960694789886475} +03/05/2022 14:26:03 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/05/2022 14:26:07 - INFO - codeparrot_training - Step 41651: {'lr': 0.0004165544212648494, 'samples': 21325824, 'steps': 41651, 'loss/train': 1.7673367261886597} +03/05/2022 14:26:10 - INFO - codeparrot_training - Step 41652: {'lr': 0.0004165504636838278, 'samples': 21326336, 'steps': 41652, 'loss/train': 1.4973268508911133} +03/05/2022 14:26:12 - INFO - codeparrot_training - Skipping example with length 50 (seq_length=1024) +03/05/2022 14:26:16 - INFO - codeparrot_training - Step 41653: {'lr': 0.0004165465060277611, 'samples': 21326848, 'steps': 41653, 'loss/train': 1.749884009361267} +03/05/2022 14:26:19 - INFO - codeparrot_training - Step 41654: {'lr': 0.0004165425482966512, 'samples': 21327360, 'steps': 41654, 'loss/train': 1.7984153032302856} +03/05/2022 14:26:22 - INFO - codeparrot_training - Step 41655: {'lr': 0.00041653859049049964, 'samples': 21327872, 'steps': 41655, 'loss/train': 2.2414894104003906} +03/05/2022 14:26:24 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/05/2022 14:26:27 - INFO - codeparrot_training - Step 41656: {'lr': 0.00041653463260930845, 'samples': 21328384, 'steps': 41656, 'loss/train': 1.9328770637512207} +03/05/2022 14:26:31 - INFO - codeparrot_training - Step 41657: {'lr': 0.00041653067465307925, 'samples': 21328896, 'steps': 41657, 'loss/train': 2.1162314414978027} +03/05/2022 14:26:33 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/05/2022 14:26:36 - INFO - codeparrot_training - Step 41658: {'lr': 0.00041652671662181394, 'samples': 21329408, 'steps': 41658, 'loss/train': 1.756325602531433} +03/05/2022 14:26:39 - INFO - codeparrot_training - Step 41659: {'lr': 0.00041652275851551435, 'samples': 21329920, 'steps': 41659, 'loss/train': 0.9732646942138672} +03/05/2022 14:26:41 - INFO - codeparrot_training - Skipping example with length 675 (seq_length=1024) +03/05/2022 14:26:44 - INFO - codeparrot_training - Step 41660: {'lr': 0.0004165188003341821, 'samples': 21330432, 'steps': 41660, 'loss/train': 2.4505391120910645} +03/05/2022 14:26:48 - INFO - codeparrot_training - Step 41661: {'lr': 0.0004165148420778191, 'samples': 21330944, 'steps': 41661, 'loss/train': 1.937347650527954} +03/05/2022 14:26:51 - INFO - codeparrot_training - Step 41662: {'lr': 0.000416510883746427, 'samples': 21331456, 'steps': 41662, 'loss/train': 2.591834306716919} +03/05/2022 14:26:51 - INFO - codeparrot_training - Skipping example with length 527 (seq_length=1024) +03/05/2022 14:26:56 - INFO - codeparrot_training - Step 41663: {'lr': 0.00041650692534000766, 'samples': 21331968, 'steps': 41663, 'loss/train': 1.4354758262634277} +03/05/2022 14:26:59 - INFO - codeparrot_training - Step 41664: {'lr': 0.0004165029668585629, 'samples': 21332480, 'steps': 41664, 'loss/train': 2.329026937484741} +03/05/2022 14:26:59 - INFO - codeparrot_training - Skipping example with length 264 (seq_length=1024) +03/05/2022 14:27:05 - INFO - codeparrot_training - Step 41665: {'lr': 0.00041649900830209455, 'samples': 21332992, 'steps': 41665, 'loss/train': 3.51607084274292} +03/05/2022 14:27:08 - INFO - codeparrot_training - Step 41666: {'lr': 0.00041649504967060423, 'samples': 21333504, 'steps': 41666, 'loss/train': 1.9585216045379639} +03/05/2022 14:27:08 - INFO - codeparrot_training - Skipping example with length 256 (seq_length=1024) +03/05/2022 14:27:13 - INFO - codeparrot_training - Step 41667: {'lr': 0.0004164910909640938, 'samples': 21334016, 'steps': 41667, 'loss/train': 1.897063970565796} +03/05/2022 14:27:16 - INFO - codeparrot_training - Step 41668: {'lr': 0.0004164871321825651, 'samples': 21334528, 'steps': 41668, 'loss/train': 1.3781602382659912} +03/05/2022 14:27:17 - INFO - codeparrot_training - Skipping example with length 845 (seq_length=1024) +03/05/2022 14:27:22 - INFO - codeparrot_training - Step 41669: {'lr': 0.0004164831733260198, 'samples': 21335040, 'steps': 41669, 'loss/train': 4.243336200714111} +03/05/2022 14:27:25 - INFO - codeparrot_training - Step 41670: {'lr': 0.0004164792143944598, 'samples': 21335552, 'steps': 41670, 'loss/train': 1.4653195142745972} +03/05/2022 14:27:26 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/05/2022 14:27:30 - INFO - codeparrot_training - Step 41671: {'lr': 0.0004164752553878868, 'samples': 21336064, 'steps': 41671, 'loss/train': 2.3739006519317627} +03/05/2022 14:27:33 - INFO - codeparrot_training - Step 41672: {'lr': 0.00041647129630630265, 'samples': 21336576, 'steps': 41672, 'loss/train': 2.4608724117279053} +03/05/2022 14:27:34 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/05/2022 14:27:39 - INFO - codeparrot_training - Step 41673: {'lr': 0.0004164673371497092, 'samples': 21337088, 'steps': 41673, 'loss/train': 1.587136149406433} +03/05/2022 14:27:42 - INFO - codeparrot_training - Step 41674: {'lr': 0.000416463377918108, 'samples': 21337600, 'steps': 41674, 'loss/train': 1.1188586950302124} +03/05/2022 14:27:42 - INFO - codeparrot_training - Skipping example with length 319 (seq_length=1024) +03/05/2022 14:27:47 - INFO - codeparrot_training - Step 41675: {'lr': 0.00041645941861150103, 'samples': 21338112, 'steps': 41675, 'loss/train': 1.8335367441177368} +03/05/2022 14:27:50 - INFO - codeparrot_training - Step 41676: {'lr': 0.00041645545922989, 'samples': 21338624, 'steps': 41676, 'loss/train': 1.3211820125579834} +03/05/2022 14:27:50 - INFO - codeparrot_training - Skipping example with length 173 (seq_length=1024) +03/05/2022 14:27:55 - INFO - codeparrot_training - Step 41677: {'lr': 0.00041645149977327667, 'samples': 21339136, 'steps': 41677, 'loss/train': 2.4156367778778076} +03/05/2022 14:27:59 - INFO - codeparrot_training - Step 41678: {'lr': 0.0004164475402416629, 'samples': 21339648, 'steps': 41678, 'loss/train': 2.0650765895843506} +03/05/2022 14:27:59 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/05/2022 14:28:04 - INFO - codeparrot_training - Step 41679: {'lr': 0.0004164435806350505, 'samples': 21340160, 'steps': 41679, 'loss/train': 1.0672776699066162} +03/05/2022 14:28:07 - INFO - codeparrot_training - Step 41680: {'lr': 0.00041643962095344107, 'samples': 21340672, 'steps': 41680, 'loss/train': 1.3673419952392578} +03/05/2022 14:28:07 - INFO - codeparrot_training - Skipping example with length 233 (seq_length=1024) +03/05/2022 14:28:12 - INFO - codeparrot_training - Step 41681: {'lr': 0.0004164356611968366, 'samples': 21341184, 'steps': 41681, 'loss/train': 1.7573305368423462} +03/05/2022 14:28:15 - INFO - codeparrot_training - Step 41682: {'lr': 0.0004164317013652387, 'samples': 21341696, 'steps': 41682, 'loss/train': 2.2569353580474854} +03/05/2022 14:28:16 - INFO - codeparrot_training - Skipping example with length 875 (seq_length=1024) +03/05/2022 14:28:21 - INFO - codeparrot_training - Step 41683: {'lr': 0.00041642774145864934, 'samples': 21342208, 'steps': 41683, 'loss/train': 1.987130880355835} +03/05/2022 14:28:24 - INFO - codeparrot_training - Step 41684: {'lr': 0.00041642378147707014, 'samples': 21342720, 'steps': 41684, 'loss/train': 1.2720822095870972} +03/05/2022 14:28:24 - INFO - codeparrot_training - Skipping example with length 845 (seq_length=1024) +03/05/2022 14:28:29 - INFO - codeparrot_training - Step 41685: {'lr': 0.00041641982142050297, 'samples': 21343232, 'steps': 41685, 'loss/train': 1.2535079717636108} +03/05/2022 14:28:32 - INFO - codeparrot_training - Step 41686: {'lr': 0.00041641586128894967, 'samples': 21343744, 'steps': 41686, 'loss/train': 0.30420124530792236} +03/05/2022 14:28:33 - INFO - codeparrot_training - Skipping example with length 951 (seq_length=1024) +03/05/2022 14:28:38 - INFO - codeparrot_training - Step 41687: {'lr': 0.0004164119010824119, 'samples': 21344256, 'steps': 41687, 'loss/train': 1.808125376701355} +03/05/2022 14:28:41 - INFO - codeparrot_training - Step 41688: {'lr': 0.00041640794080089144, 'samples': 21344768, 'steps': 41688, 'loss/train': 1.4203946590423584} +03/05/2022 14:28:41 - INFO - codeparrot_training - Skipping example with length 704 (seq_length=1024) +03/05/2022 14:28:46 - INFO - codeparrot_training - Step 41689: {'lr': 0.0004164039804443902, 'samples': 21345280, 'steps': 41689, 'loss/train': 1.6546109914779663} +03/05/2022 14:28:49 - INFO - codeparrot_training - Step 41690: {'lr': 0.0004164000200129099, 'samples': 21345792, 'steps': 41690, 'loss/train': 2.436789035797119} +03/05/2022 14:28:49 - INFO - codeparrot_training - Skipping example with length 957 (seq_length=1024) +03/05/2022 14:28:55 - INFO - codeparrot_training - Step 41691: {'lr': 0.0004163960595064522, 'samples': 21346304, 'steps': 41691, 'loss/train': 1.7991865873336792} +03/05/2022 14:28:58 - INFO - codeparrot_training - Step 41692: {'lr': 0.00041639209892501913, 'samples': 21346816, 'steps': 41692, 'loss/train': 1.4937413930892944} +03/05/2022 14:28:58 - INFO - codeparrot_training - Skipping example with length 637 (seq_length=1024) +03/05/2022 14:29:03 - INFO - codeparrot_training - Step 41693: {'lr': 0.00041638813826861234, 'samples': 21347328, 'steps': 41693, 'loss/train': 2.2962238788604736} +03/05/2022 14:29:06 - INFO - codeparrot_training - Step 41694: {'lr': 0.00041638417753723356, 'samples': 21347840, 'steps': 41694, 'loss/train': 1.8071091175079346} +03/05/2022 14:29:06 - INFO - codeparrot_training - Skipping example with length 231 (seq_length=1024) +03/05/2022 14:29:11 - INFO - codeparrot_training - Step 41695: {'lr': 0.00041638021673088464, 'samples': 21348352, 'steps': 41695, 'loss/train': 1.6083688735961914} +03/05/2022 14:29:15 - INFO - codeparrot_training - Step 41696: {'lr': 0.0004163762558495674, 'samples': 21348864, 'steps': 41696, 'loss/train': 0.7448838949203491} +03/05/2022 14:29:15 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/05/2022 14:29:20 - INFO - codeparrot_training - Step 41697: {'lr': 0.0004163722948932836, 'samples': 21349376, 'steps': 41697, 'loss/train': 1.8733819723129272} +03/05/2022 14:29:23 - INFO - codeparrot_training - Step 41698: {'lr': 0.000416368333862035, 'samples': 21349888, 'steps': 41698, 'loss/train': 0.9922410845756531} +03/05/2022 14:29:23 - INFO - codeparrot_training - Skipping example with length 454 (seq_length=1024) +03/05/2022 14:29:28 - INFO - codeparrot_training - Step 41699: {'lr': 0.00041636437275582335, 'samples': 21350400, 'steps': 41699, 'loss/train': 0.5835087299346924} +03/05/2022 14:29:31 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/05/2022 14:29:34 - INFO - codeparrot_training - Step 41700: {'lr': 0.00041636041157465056, 'samples': 21350912, 'steps': 41700, 'loss/train': 1.3758397102355957} +03/05/2022 14:29:37 - INFO - codeparrot_training - Step 41701: {'lr': 0.00041635645031851826, 'samples': 21351424, 'steps': 41701, 'loss/train': 1.8728586435317993} +03/05/2022 14:29:39 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/05/2022 14:29:42 - INFO - codeparrot_training - Step 41702: {'lr': 0.00041635248898742834, 'samples': 21351936, 'steps': 41702, 'loss/train': 1.651859998703003} +03/05/2022 14:29:45 - INFO - codeparrot_training - Step 41703: {'lr': 0.00041634852758138253, 'samples': 21352448, 'steps': 41703, 'loss/train': 1.826904058456421} +03/05/2022 14:29:48 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/05/2022 14:29:51 - INFO - codeparrot_training - Step 41704: {'lr': 0.0004163445661003827, 'samples': 21352960, 'steps': 41704, 'loss/train': 1.442320466041565} +03/05/2022 14:29:54 - INFO - codeparrot_training - Step 41705: {'lr': 0.0004163406045444306, 'samples': 21353472, 'steps': 41705, 'loss/train': 1.7606229782104492} +03/05/2022 14:29:57 - INFO - codeparrot_training - Skipping example with length 174 (seq_length=1024) +03/05/2022 14:29:59 - INFO - codeparrot_training - Step 41706: {'lr': 0.0004163366429135279, 'samples': 21353984, 'steps': 41706, 'loss/train': 1.3644556999206543} +03/05/2022 14:30:02 - INFO - codeparrot_training - Step 41707: {'lr': 0.00041633268120767653, 'samples': 21354496, 'steps': 41707, 'loss/train': 0.25754961371421814} +03/05/2022 14:30:05 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/05/2022 14:30:08 - INFO - codeparrot_training - Step 41708: {'lr': 0.00041632871942687814, 'samples': 21355008, 'steps': 41708, 'loss/train': 1.7235949039459229} +03/05/2022 14:30:11 - INFO - codeparrot_training - Step 41709: {'lr': 0.00041632475757113466, 'samples': 21355520, 'steps': 41709, 'loss/train': 1.9293450117111206} +03/05/2022 14:30:13 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/05/2022 14:30:16 - INFO - codeparrot_training - Step 41710: {'lr': 0.00041632079564044776, 'samples': 21356032, 'steps': 41710, 'loss/train': 1.8824853897094727} +03/05/2022 14:30:19 - INFO - codeparrot_training - Step 41711: {'lr': 0.0004163168336348194, 'samples': 21356544, 'steps': 41711, 'loss/train': 1.863020896911621} +03/05/2022 14:30:22 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/05/2022 14:30:25 - INFO - codeparrot_training - Step 41712: {'lr': 0.00041631287155425114, 'samples': 21357056, 'steps': 41712, 'loss/train': 1.8134782314300537} +03/05/2022 14:30:28 - INFO - codeparrot_training - Step 41713: {'lr': 0.0004163089093987449, 'samples': 21357568, 'steps': 41713, 'loss/train': 1.4001761674880981} +03/05/2022 14:30:31 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/05/2022 14:30:33 - INFO - codeparrot_training - Step 41714: {'lr': 0.00041630494716830244, 'samples': 21358080, 'steps': 41714, 'loss/train': 1.7449383735656738} +03/05/2022 14:30:36 - INFO - codeparrot_training - Step 41715: {'lr': 0.00041630098486292546, 'samples': 21358592, 'steps': 41715, 'loss/train': 2.236294984817505} +03/05/2022 14:30:39 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/05/2022 14:30:41 - INFO - codeparrot_training - Step 41716: {'lr': 0.0004162970224826159, 'samples': 21359104, 'steps': 41716, 'loss/train': 2.058333158493042} +03/05/2022 14:30:45 - INFO - codeparrot_training - Step 41717: {'lr': 0.0004162930600273754, 'samples': 21359616, 'steps': 41717, 'loss/train': 3.119856119155884} +03/05/2022 14:30:47 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/05/2022 14:30:50 - INFO - codeparrot_training - Step 41718: {'lr': 0.0004162890974972059, 'samples': 21360128, 'steps': 41718, 'loss/train': 0.34487542510032654} +03/05/2022 14:30:53 - INFO - codeparrot_training - Step 41719: {'lr': 0.00041628513489210906, 'samples': 21360640, 'steps': 41719, 'loss/train': 0.9428035020828247} +03/05/2022 14:30:56 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/05/2022 14:30:59 - INFO - codeparrot_training - Step 41720: {'lr': 0.0004162811722120867, 'samples': 21361152, 'steps': 41720, 'loss/train': 2.032289981842041} +03/05/2022 14:31:02 - INFO - codeparrot_training - Step 41721: {'lr': 0.00041627720945714065, 'samples': 21361664, 'steps': 41721, 'loss/train': 2.018343925476074} +03/05/2022 14:31:05 - INFO - codeparrot_training - Skipping example with length 955 (seq_length=1024) +03/05/2022 14:31:07 - INFO - codeparrot_training - Step 41722: {'lr': 0.00041627324662727263, 'samples': 21362176, 'steps': 41722, 'loss/train': 1.4492855072021484} +03/05/2022 14:31:10 - INFO - codeparrot_training - Step 41723: {'lr': 0.0004162692837224844, 'samples': 21362688, 'steps': 41723, 'loss/train': 2.0627553462982178} +03/05/2022 14:31:13 - INFO - codeparrot_training - Skipping example with length 939 (seq_length=1024) +03/05/2022 14:31:15 - INFO - codeparrot_training - Step 41724: {'lr': 0.00041626532074277785, 'samples': 21363200, 'steps': 41724, 'loss/train': 1.7866464853286743} +03/05/2022 14:31:19 - INFO - codeparrot_training - Step 41725: {'lr': 0.00041626135768815467, 'samples': 21363712, 'steps': 41725, 'loss/train': 0.8510217070579529} +03/05/2022 14:31:21 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/05/2022 14:31:24 - INFO - codeparrot_training - Step 41726: {'lr': 0.0004162573945586168, 'samples': 21364224, 'steps': 41726, 'loss/train': 1.8329081535339355} +03/05/2022 14:31:27 - INFO - codeparrot_training - Step 41727: {'lr': 0.0004162534313541658, 'samples': 21364736, 'steps': 41727, 'loss/train': 2.198261260986328} +03/05/2022 14:31:29 - INFO - codeparrot_training - Skipping example with length 523 (seq_length=1024) +03/05/2022 14:31:32 - INFO - codeparrot_training - Step 41728: {'lr': 0.00041624946807480357, 'samples': 21365248, 'steps': 41728, 'loss/train': 1.6582375764846802} +03/05/2022 14:31:35 - INFO - codeparrot_training - Step 41729: {'lr': 0.0004162455047205319, 'samples': 21365760, 'steps': 41729, 'loss/train': 1.9028270244598389} +03/05/2022 14:31:38 - INFO - codeparrot_training - Skipping example with length 226 (seq_length=1024) +03/05/2022 14:31:41 - INFO - codeparrot_training - Step 41730: {'lr': 0.0004162415412913526, 'samples': 21366272, 'steps': 41730, 'loss/train': 1.8614505529403687} +03/05/2022 14:31:44 - INFO - codeparrot_training - Step 41731: {'lr': 0.00041623757778726743, 'samples': 21366784, 'steps': 41731, 'loss/train': 1.7509688138961792} +03/05/2022 14:31:46 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/05/2022 14:31:49 - INFO - codeparrot_training - Step 41732: {'lr': 0.00041623361420827816, 'samples': 21367296, 'steps': 41732, 'loss/train': 1.241576910018921} +03/05/2022 14:31:52 - INFO - codeparrot_training - Step 41733: {'lr': 0.0004162296505543867, 'samples': 21367808, 'steps': 41733, 'loss/train': 1.568440318107605} +03/05/2022 14:31:55 - INFO - codeparrot_training - Skipping example with length 338 (seq_length=1024) +03/05/2022 14:31:58 - INFO - codeparrot_training - Step 41734: {'lr': 0.00041622568682559455, 'samples': 21368320, 'steps': 41734, 'loss/train': 1.812659502029419} +03/05/2022 14:32:01 - INFO - codeparrot_training - Step 41735: {'lr': 0.0004162217230219038, 'samples': 21368832, 'steps': 41735, 'loss/train': 2.452751874923706} +03/05/2022 14:32:04 - INFO - codeparrot_training - Skipping example with length 772 (seq_length=1024) +03/05/2022 14:32:06 - INFO - codeparrot_training - Step 41736: {'lr': 0.00041621775914331595, 'samples': 21369344, 'steps': 41736, 'loss/train': 0.9021335244178772} +03/05/2022 14:32:09 - INFO - codeparrot_training - Step 41737: {'lr': 0.00041621379518983306, 'samples': 21369856, 'steps': 41737, 'loss/train': 1.8786866664886475} +03/05/2022 14:32:13 - INFO - codeparrot_training - Step 41738: {'lr': 0.00041620983116145673, 'samples': 21370368, 'steps': 41738, 'loss/train': 1.1865384578704834} +03/05/2022 14:32:13 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/05/2022 14:32:18 - INFO - codeparrot_training - Step 41739: {'lr': 0.00041620586705818887, 'samples': 21370880, 'steps': 41739, 'loss/train': 2.075021505355835} +03/05/2022 14:32:21 - INFO - codeparrot_training - Step 41740: {'lr': 0.00041620190288003126, 'samples': 21371392, 'steps': 41740, 'loss/train': 1.3971706628799438} +03/05/2022 14:32:21 - INFO - codeparrot_training - Skipping example with length 34 (seq_length=1024) +03/05/2022 14:32:27 - INFO - codeparrot_training - Step 41741: {'lr': 0.00041619793862698553, 'samples': 21371904, 'steps': 41741, 'loss/train': 1.285348892211914} +03/05/2022 14:32:29 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/05/2022 14:32:32 - INFO - codeparrot_training - Step 41742: {'lr': 0.00041619397429905363, 'samples': 21372416, 'steps': 41742, 'loss/train': 2.2484617233276367} +03/05/2022 14:32:35 - INFO - codeparrot_training - Step 41743: {'lr': 0.0004161900098962373, 'samples': 21372928, 'steps': 41743, 'loss/train': 1.2048940658569336} +03/05/2022 14:32:38 - INFO - codeparrot_training - Step 41744: {'lr': 0.00041618604541853826, 'samples': 21373440, 'steps': 41744, 'loss/train': 1.5794901847839355} +03/05/2022 14:32:38 - INFO - codeparrot_training - Skipping example with length 641 (seq_length=1024) +03/05/2022 14:32:44 - INFO - codeparrot_training - Step 41745: {'lr': 0.00041618208086595843, 'samples': 21373952, 'steps': 41745, 'loss/train': 1.7224981784820557} +03/05/2022 14:32:47 - INFO - codeparrot_training - Step 41746: {'lr': 0.0004161781162384994, 'samples': 21374464, 'steps': 41746, 'loss/train': 1.7562841176986694} +03/05/2022 14:32:47 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/05/2022 14:32:52 - INFO - codeparrot_training - Step 41747: {'lr': 0.00041617415153616323, 'samples': 21374976, 'steps': 41747, 'loss/train': 1.4150525331497192} +03/05/2022 14:32:55 - INFO - codeparrot_training - Step 41748: {'lr': 0.00041617018675895145, 'samples': 21375488, 'steps': 41748, 'loss/train': 1.1216872930526733} +03/05/2022 14:32:55 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/05/2022 14:33:01 - INFO - codeparrot_training - Step 41749: {'lr': 0.00041616622190686597, 'samples': 21376000, 'steps': 41749, 'loss/train': 1.593578577041626} +03/05/2022 14:33:03 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/05/2022 14:33:06 - INFO - codeparrot_training - Step 41750: {'lr': 0.0004161622569799086, 'samples': 21376512, 'steps': 41750, 'loss/train': 1.9373071193695068} +03/05/2022 14:33:09 - INFO - codeparrot_training - Step 41751: {'lr': 0.00041615829197808095, 'samples': 21377024, 'steps': 41751, 'loss/train': 2.0588881969451904} +03/05/2022 14:33:12 - INFO - codeparrot_training - Skipping example with length 552 (seq_length=1024) +03/05/2022 14:33:14 - INFO - codeparrot_training - Step 41752: {'lr': 0.0004161543269013851, 'samples': 21377536, 'steps': 41752, 'loss/train': 2.1555511951446533} +03/05/2022 14:33:17 - INFO - codeparrot_training - Step 41753: {'lr': 0.0004161503617498226, 'samples': 21378048, 'steps': 41753, 'loss/train': 1.4979212284088135} +03/05/2022 14:33:20 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/05/2022 14:33:23 - INFO - codeparrot_training - Step 41754: {'lr': 0.00041614639652339533, 'samples': 21378560, 'steps': 41754, 'loss/train': 1.1509913206100464} +03/05/2022 14:33:26 - INFO - codeparrot_training - Step 41755: {'lr': 0.00041614243122210505, 'samples': 21379072, 'steps': 41755, 'loss/train': 1.7587387561798096} +03/05/2022 14:33:29 - INFO - codeparrot_training - Step 41756: {'lr': 0.0004161384658459535, 'samples': 21379584, 'steps': 41756, 'loss/train': 1.6897778511047363} +03/05/2022 14:33:29 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/05/2022 14:33:35 - INFO - codeparrot_training - Step 41757: {'lr': 0.0004161345003949426, 'samples': 21380096, 'steps': 41757, 'loss/train': 1.425044059753418} +03/05/2022 14:33:38 - INFO - codeparrot_training - Step 41758: {'lr': 0.00041613053486907396, 'samples': 21380608, 'steps': 41758, 'loss/train': 0.9519824981689453} +03/05/2022 14:33:38 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/05/2022 14:33:43 - INFO - codeparrot_training - Step 41759: {'lr': 0.0004161265692683496, 'samples': 21381120, 'steps': 41759, 'loss/train': 1.6517250537872314} +03/05/2022 14:33:46 - INFO - codeparrot_training - Step 41760: {'lr': 0.0004161226035927711, 'samples': 21381632, 'steps': 41760, 'loss/train': 1.877320408821106} +03/05/2022 14:33:46 - INFO - codeparrot_training - Skipping example with length 684 (seq_length=1024) +03/05/2022 14:33:52 - INFO - codeparrot_training - Step 41761: {'lr': 0.0004161186378423403, 'samples': 21382144, 'steps': 41761, 'loss/train': 1.8326894044876099} +03/05/2022 14:33:55 - INFO - codeparrot_training - Step 41762: {'lr': 0.000416114672017059, 'samples': 21382656, 'steps': 41762, 'loss/train': 1.6931723356246948} +03/05/2022 14:33:55 - INFO - codeparrot_training - Skipping example with length 74 (seq_length=1024) +03/05/2022 14:34:00 - INFO - codeparrot_training - Step 41763: {'lr': 0.000416110706116929, 'samples': 21383168, 'steps': 41763, 'loss/train': 1.6347417831420898} +03/05/2022 14:34:03 - INFO - codeparrot_training - Step 41764: {'lr': 0.0004161067401419521, 'samples': 21383680, 'steps': 41764, 'loss/train': 0.7342800498008728} +03/05/2022 14:34:03 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/05/2022 14:34:09 - INFO - codeparrot_training - Step 41765: {'lr': 0.00041610277409213003, 'samples': 21384192, 'steps': 41765, 'loss/train': 1.1642147302627563} +03/05/2022 14:34:12 - INFO - codeparrot_training - Skipping example with length 495 (seq_length=1024) +03/05/2022 14:34:14 - INFO - codeparrot_training - Step 41766: {'lr': 0.00041609880796746463, 'samples': 21384704, 'steps': 41766, 'loss/train': 0.7868087887763977} +03/05/2022 14:34:17 - INFO - codeparrot_training - Step 41767: {'lr': 0.00041609484176795774, 'samples': 21385216, 'steps': 41767, 'loss/train': 1.8411526679992676} +03/05/2022 14:34:20 - INFO - codeparrot_training - Skipping example with length 994 (seq_length=1024) +03/05/2022 14:34:23 - INFO - codeparrot_training - Step 41768: {'lr': 0.000416090875493611, 'samples': 21385728, 'steps': 41768, 'loss/train': 1.9450314044952393} +03/05/2022 14:34:26 - INFO - codeparrot_training - Step 41769: {'lr': 0.0004160869091444263, 'samples': 21386240, 'steps': 41769, 'loss/train': 2.205070972442627} +03/05/2022 14:34:28 - INFO - codeparrot_training - Skipping example with length 335 (seq_length=1024) +03/05/2022 14:34:31 - INFO - codeparrot_training - Step 41770: {'lr': 0.0004160829427204054, 'samples': 21386752, 'steps': 41770, 'loss/train': 1.0739736557006836} +03/05/2022 14:34:34 - INFO - codeparrot_training - Step 41771: {'lr': 0.00041607897622155006, 'samples': 21387264, 'steps': 41771, 'loss/train': 1.988388180732727} +03/05/2022 14:34:38 - INFO - codeparrot_training - Step 41772: {'lr': 0.00041607500964786217, 'samples': 21387776, 'steps': 41772, 'loss/train': 2.357546091079712} +03/05/2022 14:34:39 - INFO - codeparrot_training - Skipping example with length 424 (seq_length=1024) +03/05/2022 14:34:43 - INFO - codeparrot_training - Step 41773: {'lr': 0.0004160710429993434, 'samples': 21388288, 'steps': 41773, 'loss/train': 1.4070957899093628} +03/05/2022 14:34:46 - INFO - codeparrot_training - Step 41774: {'lr': 0.00041606707627599556, 'samples': 21388800, 'steps': 41774, 'loss/train': 1.8731684684753418} +03/05/2022 14:34:47 - INFO - codeparrot_training - Skipping example with length 511 (seq_length=1024) +03/05/2022 14:34:51 - INFO - codeparrot_training - Step 41775: {'lr': 0.00041606310947782046, 'samples': 21389312, 'steps': 41775, 'loss/train': 1.9431158304214478} +03/05/2022 14:34:54 - INFO - codeparrot_training - Step 41776: {'lr': 0.0004160591426048199, 'samples': 21389824, 'steps': 41776, 'loss/train': 1.3166228532791138} +03/05/2022 14:34:55 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/05/2022 14:35:00 - INFO - codeparrot_training - Step 41777: {'lr': 0.00041605517565699565, 'samples': 21390336, 'steps': 41777, 'loss/train': 2.4415535926818848} +03/05/2022 14:35:03 - INFO - codeparrot_training - Step 41778: {'lr': 0.00041605120863434945, 'samples': 21390848, 'steps': 41778, 'loss/train': 0.7269604206085205} +03/05/2022 14:35:04 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/05/2022 14:35:08 - INFO - codeparrot_training - Step 41779: {'lr': 0.0004160472415368832, 'samples': 21391360, 'steps': 41779, 'loss/train': 1.005304217338562} +03/05/2022 14:35:11 - INFO - codeparrot_training - Step 41780: {'lr': 0.00041604327436459864, 'samples': 21391872, 'steps': 41780, 'loss/train': 1.305239200592041} +03/05/2022 14:35:12 - INFO - codeparrot_training - Skipping example with length 51 (seq_length=1024) +03/05/2022 14:35:17 - INFO - codeparrot_training - Step 41781: {'lr': 0.0004160393071174975, 'samples': 21392384, 'steps': 41781, 'loss/train': 1.2439658641815186} +03/05/2022 14:35:20 - INFO - codeparrot_training - Step 41782: {'lr': 0.00041603533979558163, 'samples': 21392896, 'steps': 41782, 'loss/train': 2.236541271209717} +03/05/2022 14:35:23 - INFO - codeparrot_training - Step 41783: {'lr': 0.0004160313723988528, 'samples': 21393408, 'steps': 41783, 'loss/train': 2.9792258739471436} +03/05/2022 14:35:23 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/05/2022 14:35:29 - INFO - codeparrot_training - Step 41784: {'lr': 0.00041602740492731284, 'samples': 21393920, 'steps': 41784, 'loss/train': 2.185302257537842} +03/05/2022 14:35:32 - INFO - codeparrot_training - Step 41785: {'lr': 0.0004160234373809634, 'samples': 21394432, 'steps': 41785, 'loss/train': 1.638179898262024} +03/05/2022 14:35:32 - INFO - codeparrot_training - Skipping example with length 667 (seq_length=1024) +03/05/2022 14:35:37 - INFO - codeparrot_training - Step 41786: {'lr': 0.0004160194697598064, 'samples': 21394944, 'steps': 41786, 'loss/train': 2.057363748550415} +03/05/2022 14:35:40 - INFO - codeparrot_training - Step 41787: {'lr': 0.0004160155020638436, 'samples': 21395456, 'steps': 41787, 'loss/train': 1.5573837757110596} +03/05/2022 14:35:41 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/05/2022 14:35:46 - INFO - codeparrot_training - Step 41788: {'lr': 0.0004160115342930768, 'samples': 21395968, 'steps': 41788, 'loss/train': 2.139017343521118} +03/05/2022 14:35:49 - INFO - codeparrot_training - Step 41789: {'lr': 0.0004160075664475077, 'samples': 21396480, 'steps': 41789, 'loss/train': 1.1939339637756348} +03/05/2022 14:35:50 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/05/2022 14:35:54 - INFO - codeparrot_training - Step 41790: {'lr': 0.0004160035985271382, 'samples': 21396992, 'steps': 41790, 'loss/train': 1.2115103006362915} +03/05/2022 14:35:58 - INFO - codeparrot_training - Step 41791: {'lr': 0.00041599963053196997, 'samples': 21397504, 'steps': 41791, 'loss/train': 1.4891074895858765} +03/05/2022 14:35:58 - INFO - codeparrot_training - Skipping example with length 484 (seq_length=1024) +03/05/2022 14:36:03 - INFO - codeparrot_training - Step 41792: {'lr': 0.0004159956624620049, 'samples': 21398016, 'steps': 41792, 'loss/train': 2.7218449115753174} +03/05/2022 14:36:06 - INFO - codeparrot_training - Step 41793: {'lr': 0.0004159916943172448, 'samples': 21398528, 'steps': 41793, 'loss/train': 1.8159451484680176} +03/05/2022 14:36:06 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/05/2022 14:36:11 - INFO - codeparrot_training - Step 41794: {'lr': 0.0004159877260976914, 'samples': 21399040, 'steps': 41794, 'loss/train': 1.2737501859664917} +03/05/2022 14:36:15 - INFO - codeparrot_training - Step 41795: {'lr': 0.00041598375780334653, 'samples': 21399552, 'steps': 41795, 'loss/train': 2.182703971862793} +03/05/2022 14:36:15 - INFO - codeparrot_training - Skipping example with length 265 (seq_length=1024) +03/05/2022 14:36:20 - INFO - codeparrot_training - Step 41796: {'lr': 0.0004159797894342118, 'samples': 21400064, 'steps': 41796, 'loss/train': 1.7394859790802002} +03/05/2022 14:36:23 - INFO - codeparrot_training - Step 41797: {'lr': 0.0004159758209902892, 'samples': 21400576, 'steps': 41797, 'loss/train': 2.303119421005249} +03/05/2022 14:36:24 - INFO - codeparrot_training - Skipping example with length 631 (seq_length=1024) +03/05/2022 14:36:28 - INFO - codeparrot_training - Step 41798: {'lr': 0.00041597185247158053, 'samples': 21401088, 'steps': 41798, 'loss/train': 1.6201814413070679} +03/05/2022 14:36:32 - INFO - codeparrot_training - Step 41799: {'lr': 0.0004159678838780874, 'samples': 21401600, 'steps': 41799, 'loss/train': 1.4371980428695679} +03/05/2022 14:36:32 - INFO - codeparrot_training - Skipping example with length 615 (seq_length=1024) +03/05/2022 14:36:37 - INFO - codeparrot_training - Step 41800: {'lr': 0.0004159639152098118, 'samples': 21402112, 'steps': 41800, 'loss/train': 1.6071571111679077} +03/05/2022 14:36:40 - INFO - codeparrot_training - Step 41801: {'lr': 0.00041595994646675537, 'samples': 21402624, 'steps': 41801, 'loss/train': 0.42283979058265686} +03/05/2022 14:36:41 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/05/2022 14:36:45 - INFO - codeparrot_training - Step 41802: {'lr': 0.0004159559776489199, 'samples': 21403136, 'steps': 41802, 'loss/train': 1.1579097509384155} +03/05/2022 14:36:49 - INFO - codeparrot_training - Step 41803: {'lr': 0.00041595200875630734, 'samples': 21403648, 'steps': 41803, 'loss/train': 1.873628854751587} +03/05/2022 14:36:50 - INFO - codeparrot_training - Skipping example with length 962 (seq_length=1024) +03/05/2022 14:36:54 - INFO - codeparrot_training - Step 41804: {'lr': 0.00041594803978891925, 'samples': 21404160, 'steps': 41804, 'loss/train': 2.2802813053131104} +03/05/2022 14:36:57 - INFO - codeparrot_training - Step 41805: {'lr': 0.00041594407074675753, 'samples': 21404672, 'steps': 41805, 'loss/train': 1.569593906402588} +03/05/2022 14:36:58 - INFO - codeparrot_training - Skipping example with length 251 (seq_length=1024) +03/05/2022 14:37:02 - INFO - codeparrot_training - Step 41806: {'lr': 0.0004159401016298241, 'samples': 21405184, 'steps': 41806, 'loss/train': 2.0869293212890625} +03/05/2022 14:37:05 - INFO - codeparrot_training - Step 41807: {'lr': 0.0004159361324381206, 'samples': 21405696, 'steps': 41807, 'loss/train': 1.9293608665466309} +03/05/2022 14:37:06 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/05/2022 14:37:11 - INFO - codeparrot_training - Step 41808: {'lr': 0.0004159321631716487, 'samples': 21406208, 'steps': 41808, 'loss/train': 2.2722971439361572} +03/05/2022 14:37:14 - INFO - codeparrot_training - Step 41809: {'lr': 0.00041592819383041047, 'samples': 21406720, 'steps': 41809, 'loss/train': 1.460873007774353} +03/05/2022 14:37:15 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/05/2022 14:37:19 - INFO - codeparrot_training - Step 41810: {'lr': 0.0004159242244144075, 'samples': 21407232, 'steps': 41810, 'loss/train': 1.706587314605713} +03/05/2022 14:37:22 - INFO - codeparrot_training - Step 41811: {'lr': 0.0004159202549236416, 'samples': 21407744, 'steps': 41811, 'loss/train': 2.4048280715942383} +03/05/2022 14:37:23 - INFO - codeparrot_training - Skipping example with length 66 (seq_length=1024) +03/05/2022 14:37:28 - INFO - codeparrot_training - Step 41812: {'lr': 0.00041591628535811464, 'samples': 21408256, 'steps': 41812, 'loss/train': 0.7444491386413574} +03/05/2022 14:37:31 - INFO - codeparrot_training - Step 41813: {'lr': 0.00041591231571782834, 'samples': 21408768, 'steps': 41813, 'loss/train': 2.4078614711761475} +03/05/2022 14:37:32 - INFO - codeparrot_training - Skipping example with length 385 (seq_length=1024) +03/05/2022 14:37:36 - INFO - codeparrot_training - Step 41814: {'lr': 0.0004159083460027845, 'samples': 21409280, 'steps': 41814, 'loss/train': 1.0615136623382568} +03/05/2022 14:37:39 - INFO - codeparrot_training - Step 41815: {'lr': 0.000415904376212985, 'samples': 21409792, 'steps': 41815, 'loss/train': 2.0563478469848633} +03/05/2022 14:37:40 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/05/2022 14:37:45 - INFO - codeparrot_training - Step 41816: {'lr': 0.00041590040634843144, 'samples': 21410304, 'steps': 41816, 'loss/train': 1.5265034437179565} +03/05/2022 14:37:48 - INFO - codeparrot_training - Step 41817: {'lr': 0.00041589643640912576, 'samples': 21410816, 'steps': 41817, 'loss/train': 1.957690715789795} +03/05/2022 14:37:48 - INFO - codeparrot_training - Skipping example with length 449 (seq_length=1024) +03/05/2022 14:37:53 - INFO - codeparrot_training - Step 41818: {'lr': 0.0004158924663950697, 'samples': 21411328, 'steps': 41818, 'loss/train': 1.9975889921188354} +03/05/2022 14:37:56 - INFO - codeparrot_training - Step 41819: {'lr': 0.00041588849630626513, 'samples': 21411840, 'steps': 41819, 'loss/train': 1.611990213394165} +03/05/2022 14:37:57 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/05/2022 14:38:02 - INFO - codeparrot_training - Step 41820: {'lr': 0.00041588452614271364, 'samples': 21412352, 'steps': 41820, 'loss/train': 1.5102938413619995} +03/05/2022 14:38:06 - INFO - codeparrot_training - Step 41821: {'lr': 0.00041588055590441726, 'samples': 21412864, 'steps': 41821, 'loss/train': 2.0941689014434814} +03/05/2022 14:38:09 - INFO - codeparrot_training - Step 41822: {'lr': 0.0004158765855913776, 'samples': 21413376, 'steps': 41822, 'loss/train': 1.5637538433074951} +03/05/2022 14:38:10 - INFO - codeparrot_training - Skipping example with length 81 (seq_length=1024) +03/05/2022 14:38:15 - INFO - codeparrot_training - Step 41823: {'lr': 0.0004158726152035965, 'samples': 21413888, 'steps': 41823, 'loss/train': 0.7294315695762634} +03/05/2022 14:38:18 - INFO - codeparrot_training - Step 41824: {'lr': 0.00041586864474107575, 'samples': 21414400, 'steps': 41824, 'loss/train': 1.1033635139465332} +03/05/2022 14:38:20 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/05/2022 14:38:23 - INFO - codeparrot_training - Step 41825: {'lr': 0.0004158646742038172, 'samples': 21414912, 'steps': 41825, 'loss/train': 0.9787554144859314} +03/05/2022 14:38:27 - INFO - codeparrot_training - Step 41826: {'lr': 0.00041586070359182255, 'samples': 21415424, 'steps': 41826, 'loss/train': 1.2099827527999878} +03/05/2022 14:38:29 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/05/2022 14:38:32 - INFO - codeparrot_training - Step 41827: {'lr': 0.00041585673290509364, 'samples': 21415936, 'steps': 41827, 'loss/train': 1.6371926069259644} +03/05/2022 14:38:35 - INFO - codeparrot_training - Step 41828: {'lr': 0.0004158527621436322, 'samples': 21416448, 'steps': 41828, 'loss/train': 1.829106092453003} +03/05/2022 14:38:40 - INFO - codeparrot_training - Step 41829: {'lr': 0.0004158487913074401, 'samples': 21416960, 'steps': 41829, 'loss/train': 1.9899442195892334} +03/05/2022 14:38:44 - INFO - codeparrot_training - Step 41830: {'lr': 0.0004158448203965192, 'samples': 21417472, 'steps': 41830, 'loss/train': 1.8456447124481201} +03/05/2022 14:38:46 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/05/2022 14:38:49 - INFO - codeparrot_training - Step 41831: {'lr': 0.000415840849410871, 'samples': 21417984, 'steps': 41831, 'loss/train': 1.6782333850860596} +03/05/2022 14:38:52 - INFO - codeparrot_training - Step 41832: {'lr': 0.0004158368783504975, 'samples': 21418496, 'steps': 41832, 'loss/train': 1.309798002243042} +03/05/2022 14:38:57 - INFO - codeparrot_training - Step 41833: {'lr': 0.00041583290721540055, 'samples': 21419008, 'steps': 41833, 'loss/train': 1.317761778831482} +03/05/2022 14:39:00 - INFO - codeparrot_training - Step 41834: {'lr': 0.0004158289360055819, 'samples': 21419520, 'steps': 41834, 'loss/train': 0.9373942017555237} +03/05/2022 14:39:02 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/05/2022 14:39:06 - INFO - codeparrot_training - Step 41835: {'lr': 0.00041582496472104314, 'samples': 21420032, 'steps': 41835, 'loss/train': 1.9994984865188599} +03/05/2022 14:39:09 - INFO - codeparrot_training - Step 41836: {'lr': 0.0004158209933617863, 'samples': 21420544, 'steps': 41836, 'loss/train': 2.200524091720581} +03/05/2022 14:39:11 - INFO - codeparrot_training - Skipping example with length 802 (seq_length=1024) +03/05/2022 14:39:14 - INFO - codeparrot_training - Step 41837: {'lr': 0.00041581702192781305, 'samples': 21421056, 'steps': 41837, 'loss/train': 1.1478140354156494} +03/05/2022 14:39:17 - INFO - codeparrot_training - Step 41838: {'lr': 0.0004158130504191252, 'samples': 21421568, 'steps': 41838, 'loss/train': 2.030184507369995} +03/05/2022 14:39:19 - INFO - codeparrot_training - Skipping example with length 349 (seq_length=1024) +03/05/2022 14:39:23 - INFO - codeparrot_training - Step 41839: {'lr': 0.0004158090788357246, 'samples': 21422080, 'steps': 41839, 'loss/train': 1.8791109323501587} +03/05/2022 14:39:26 - INFO - codeparrot_training - Step 41840: {'lr': 0.0004158051071776129, 'samples': 21422592, 'steps': 41840, 'loss/train': 2.080824375152588} +03/05/2022 14:39:28 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/05/2022 14:39:31 - INFO - codeparrot_training - Step 41841: {'lr': 0.00041580113544479203, 'samples': 21423104, 'steps': 41841, 'loss/train': 2.2521655559539795} +03/05/2022 14:39:34 - INFO - codeparrot_training - Step 41842: {'lr': 0.00041579716363726376, 'samples': 21423616, 'steps': 41842, 'loss/train': 1.9264005422592163} +03/05/2022 14:39:36 - INFO - codeparrot_training - Skipping example with length 989 (seq_length=1024) +03/05/2022 14:39:40 - INFO - codeparrot_training - Step 41843: {'lr': 0.00041579319175502985, 'samples': 21424128, 'steps': 41843, 'loss/train': 2.2222495079040527} +03/05/2022 14:39:43 - INFO - codeparrot_training - Step 41844: {'lr': 0.000415789219798092, 'samples': 21424640, 'steps': 41844, 'loss/train': 1.8660755157470703} +03/05/2022 14:39:45 - INFO - codeparrot_training - Skipping example with length 569 (seq_length=1024) +03/05/2022 14:39:48 - INFO - codeparrot_training - Step 41845: {'lr': 0.00041578524776645216, 'samples': 21425152, 'steps': 41845, 'loss/train': 1.2696003913879395} +03/05/2022 14:39:52 - INFO - codeparrot_training - Step 41846: {'lr': 0.00041578127566011203, 'samples': 21425664, 'steps': 41846, 'loss/train': 1.2062631845474243} +03/05/2022 14:39:53 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/05/2022 14:39:57 - INFO - codeparrot_training - Step 41847: {'lr': 0.0004157773034790734, 'samples': 21426176, 'steps': 41847, 'loss/train': 1.3547146320343018} +03/05/2022 14:40:00 - INFO - codeparrot_training - Step 41848: {'lr': 0.00041577333122333807, 'samples': 21426688, 'steps': 41848, 'loss/train': 1.6823145151138306} +03/05/2022 14:40:02 - INFO - codeparrot_training - Skipping example with length 14 (seq_length=1024) +03/05/2022 14:40:05 - INFO - codeparrot_training - Step 41849: {'lr': 0.00041576935889290777, 'samples': 21427200, 'steps': 41849, 'loss/train': 1.9490214586257935} +03/05/2022 14:40:08 - INFO - codeparrot_training - Step 41850: {'lr': 0.0004157653864877845, 'samples': 21427712, 'steps': 41850, 'loss/train': 1.75373375415802} +03/05/2022 14:40:10 - INFO - codeparrot_training - Skipping example with length 211 (seq_length=1024) +03/05/2022 14:40:14 - INFO - codeparrot_training - Step 41851: {'lr': 0.00041576141400796984, 'samples': 21428224, 'steps': 41851, 'loss/train': 2.088066816329956} +03/05/2022 14:40:17 - INFO - codeparrot_training - Step 41852: {'lr': 0.00041575744145346563, 'samples': 21428736, 'steps': 41852, 'loss/train': 1.5561072826385498} +03/05/2022 14:40:18 - INFO - codeparrot_training - Skipping example with length 338 (seq_length=1024) +03/05/2022 14:40:22 - INFO - codeparrot_training - Step 41853: {'lr': 0.00041575346882427366, 'samples': 21429248, 'steps': 41853, 'loss/train': 2.195356845855713} +03/05/2022 14:40:25 - INFO - codeparrot_training - Step 41854: {'lr': 0.00041574949612039583, 'samples': 21429760, 'steps': 41854, 'loss/train': 1.5174028873443604} +03/05/2022 14:40:27 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/05/2022 14:40:31 - INFO - codeparrot_training - Step 41855: {'lr': 0.0004157455233418337, 'samples': 21430272, 'steps': 41855, 'loss/train': 1.2785555124282837} +03/05/2022 14:40:34 - INFO - codeparrot_training - Step 41856: {'lr': 0.0004157415504885893, 'samples': 21430784, 'steps': 41856, 'loss/train': 1.518579363822937} +03/05/2022 14:40:35 - INFO - codeparrot_training - Skipping example with length 1006 (seq_length=1024) +03/05/2022 14:40:39 - INFO - codeparrot_training - Step 41857: {'lr': 0.00041573757756066423, 'samples': 21431296, 'steps': 41857, 'loss/train': 1.6889809370040894} +03/05/2022 14:40:42 - INFO - codeparrot_training - Step 41858: {'lr': 0.0004157336045580604, 'samples': 21431808, 'steps': 41858, 'loss/train': 1.9305601119995117} +03/05/2022 14:40:45 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/05/2022 14:40:48 - INFO - codeparrot_training - Step 41859: {'lr': 0.0004157296314807796, 'samples': 21432320, 'steps': 41859, 'loss/train': 1.8745770454406738} +03/05/2022 14:40:51 - INFO - codeparrot_training - Step 41860: {'lr': 0.0004157256583288235, 'samples': 21432832, 'steps': 41860, 'loss/train': 2.1258575916290283} +03/05/2022 14:40:53 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/05/2022 14:40:56 - INFO - codeparrot_training - Step 41861: {'lr': 0.0004157216851021941, 'samples': 21433344, 'steps': 41861, 'loss/train': 2.034987211227417} +03/05/2022 14:40:59 - INFO - codeparrot_training - Step 41862: {'lr': 0.00041571771180089304, 'samples': 21433856, 'steps': 41862, 'loss/train': 1.7405117750167847} +03/05/2022 14:41:01 - INFO - codeparrot_training - Skipping example with length 591 (seq_length=1024) +03/05/2022 14:41:05 - INFO - codeparrot_training - Step 41863: {'lr': 0.0004157137384249221, 'samples': 21434368, 'steps': 41863, 'loss/train': 1.317883014678955} +03/05/2022 14:41:08 - INFO - codeparrot_training - Step 41864: {'lr': 0.00041570976497428303, 'samples': 21434880, 'steps': 41864, 'loss/train': 1.5707241296768188} +03/05/2022 14:41:10 - INFO - codeparrot_training - Skipping example with length 1018 (seq_length=1024) +03/05/2022 14:41:13 - INFO - codeparrot_training - Step 41865: {'lr': 0.0004157057914489778, 'samples': 21435392, 'steps': 41865, 'loss/train': 1.3716835975646973} +03/05/2022 14:41:16 - INFO - codeparrot_training - Step 41866: {'lr': 0.00041570181784900806, 'samples': 21435904, 'steps': 41866, 'loss/train': 0.6493259072303772} +03/05/2022 14:41:18 - INFO - codeparrot_training - Skipping example with length 416 (seq_length=1024) +03/05/2022 14:41:21 - INFO - codeparrot_training - Step 41867: {'lr': 0.0004156978441743756, 'samples': 21436416, 'steps': 41867, 'loss/train': 1.5787403583526611} +03/05/2022 14:41:25 - INFO - codeparrot_training - Step 41868: {'lr': 0.00041569387042508235, 'samples': 21436928, 'steps': 41868, 'loss/train': 1.674806833267212} +03/05/2022 14:41:26 - INFO - codeparrot_training - Skipping example with length 124 (seq_length=1024) +03/05/2022 14:41:30 - INFO - codeparrot_training - Step 41869: {'lr': 0.0004156898966011299, 'samples': 21437440, 'steps': 41869, 'loss/train': 1.9101808071136475} +03/05/2022 14:41:33 - INFO - codeparrot_training - Step 41870: {'lr': 0.0004156859227025202, 'samples': 21437952, 'steps': 41870, 'loss/train': 2.0147781372070312} +03/05/2022 14:41:35 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/05/2022 14:41:38 - INFO - codeparrot_training - Step 41871: {'lr': 0.0004156819487292549, 'samples': 21438464, 'steps': 41871, 'loss/train': 1.2760896682739258} +03/05/2022 14:41:41 - INFO - codeparrot_training - Step 41872: {'lr': 0.00041567797468133595, 'samples': 21438976, 'steps': 41872, 'loss/train': 1.737245798110962} +03/05/2022 14:41:43 - INFO - codeparrot_training - Skipping example with length 646 (seq_length=1024) +03/05/2022 14:41:47 - INFO - codeparrot_training - Step 41873: {'lr': 0.00041567400055876505, 'samples': 21439488, 'steps': 41873, 'loss/train': 1.7119011878967285} +03/05/2022 14:41:50 - INFO - codeparrot_training - Step 41874: {'lr': 0.00041567002636154406, 'samples': 21440000, 'steps': 41874, 'loss/train': 1.9952154159545898} +03/05/2022 14:41:52 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/05/2022 14:41:55 - INFO - codeparrot_training - Step 41875: {'lr': 0.0004156660520896746, 'samples': 21440512, 'steps': 41875, 'loss/train': 1.4254050254821777} +03/05/2022 14:41:58 - INFO - codeparrot_training - Step 41876: {'lr': 0.00041566207774315866, 'samples': 21441024, 'steps': 41876, 'loss/train': 1.753248691558838} +03/05/2022 14:42:00 - INFO - codeparrot_training - Skipping example with length 151 (seq_length=1024) +03/05/2022 14:42:04 - INFO - codeparrot_training - Step 41877: {'lr': 0.0004156581033219979, 'samples': 21441536, 'steps': 41877, 'loss/train': 0.9382883310317993} +03/05/2022 14:42:07 - INFO - codeparrot_training - Step 41878: {'lr': 0.0004156541288261941, 'samples': 21442048, 'steps': 41878, 'loss/train': 1.8855327367782593} +03/05/2022 14:42:09 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/05/2022 14:42:12 - INFO - codeparrot_training - Step 41879: {'lr': 0.00041565015425574917, 'samples': 21442560, 'steps': 41879, 'loss/train': 1.7170093059539795} +03/05/2022 14:42:15 - INFO - codeparrot_training - Step 41880: {'lr': 0.00041564617961066487, 'samples': 21443072, 'steps': 41880, 'loss/train': 2.184828996658325} +03/05/2022 14:42:18 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/05/2022 14:42:21 - INFO - codeparrot_training - Step 41881: {'lr': 0.00041564220489094295, 'samples': 21443584, 'steps': 41881, 'loss/train': 1.9496238231658936} +03/05/2022 14:42:24 - INFO - codeparrot_training - Step 41882: {'lr': 0.00041563823009658514, 'samples': 21444096, 'steps': 41882, 'loss/train': 1.2249605655670166} +03/05/2022 14:42:26 - INFO - codeparrot_training - Skipping example with length 231 (seq_length=1024) +03/05/2022 14:42:29 - INFO - codeparrot_training - Step 41883: {'lr': 0.00041563425522759336, 'samples': 21444608, 'steps': 41883, 'loss/train': 2.093924045562744} +03/05/2022 14:42:32 - INFO - codeparrot_training - Step 41884: {'lr': 0.0004156302802839693, 'samples': 21445120, 'steps': 41884, 'loss/train': 2.092982769012451} +03/05/2022 14:42:34 - INFO - codeparrot_training - Skipping example with length 961 (seq_length=1024) +03/05/2022 14:42:37 - INFO - codeparrot_training - Step 41885: {'lr': 0.0004156263052657148, 'samples': 21445632, 'steps': 41885, 'loss/train': 0.9450310468673706} +03/05/2022 14:42:41 - INFO - codeparrot_training - Step 41886: {'lr': 0.0004156223301728316, 'samples': 21446144, 'steps': 41886, 'loss/train': 1.9756571054458618} +03/05/2022 14:42:43 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/05/2022 14:42:46 - INFO - codeparrot_training - Step 41887: {'lr': 0.0004156183550053216, 'samples': 21446656, 'steps': 41887, 'loss/train': 1.7126123905181885} +03/05/2022 14:42:49 - INFO - codeparrot_training - Step 41888: {'lr': 0.0004156143797631866, 'samples': 21447168, 'steps': 41888, 'loss/train': 2.2664036750793457} +03/05/2022 14:42:51 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/05/2022 14:42:54 - INFO - codeparrot_training - Step 41889: {'lr': 0.0004156104044464282, 'samples': 21447680, 'steps': 41889, 'loss/train': 1.5993738174438477} +03/05/2022 14:42:58 - INFO - codeparrot_training - Step 41890: {'lr': 0.00041560642905504833, 'samples': 21448192, 'steps': 41890, 'loss/train': 2.0949103832244873} +03/05/2022 14:43:00 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/05/2022 14:43:03 - INFO - codeparrot_training - Step 41891: {'lr': 0.0004156024535890487, 'samples': 21448704, 'steps': 41891, 'loss/train': 1.7356857061386108} +03/05/2022 14:43:06 - INFO - codeparrot_training - Step 41892: {'lr': 0.00041559847804843123, 'samples': 21449216, 'steps': 41892, 'loss/train': 1.9027390480041504} +03/05/2022 14:43:08 - INFO - codeparrot_training - Skipping example with length 627 (seq_length=1024) +03/05/2022 14:43:11 - INFO - codeparrot_training - Step 41893: {'lr': 0.0004155945024331976, 'samples': 21449728, 'steps': 41893, 'loss/train': 2.2333579063415527} +03/05/2022 14:43:14 - INFO - codeparrot_training - Step 41894: {'lr': 0.00041559052674334975, 'samples': 21450240, 'steps': 41894, 'loss/train': 1.4915028810501099} +03/05/2022 14:43:16 - INFO - codeparrot_training - Skipping example with length 525 (seq_length=1024) +03/05/2022 14:43:20 - INFO - codeparrot_training - Step 41895: {'lr': 0.0004155865509788893, 'samples': 21450752, 'steps': 41895, 'loss/train': 1.9134665727615356} +03/05/2022 14:43:23 - INFO - codeparrot_training - Step 41896: {'lr': 0.00041558257513981805, 'samples': 21451264, 'steps': 41896, 'loss/train': 0.9108603000640869} +03/05/2022 14:43:25 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/05/2022 14:43:28 - INFO - codeparrot_training - Step 41897: {'lr': 0.00041557859922613795, 'samples': 21451776, 'steps': 41897, 'loss/train': 1.8455276489257812} +03/05/2022 14:43:31 - INFO - codeparrot_training - Step 41898: {'lr': 0.00041557462323785053, 'samples': 21452288, 'steps': 41898, 'loss/train': 1.6030369997024536} +03/05/2022 14:43:33 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/05/2022 14:43:37 - INFO - codeparrot_training - Step 41899: {'lr': 0.00041557064717495786, 'samples': 21452800, 'steps': 41899, 'loss/train': 1.9511888027191162} +03/05/2022 14:43:40 - INFO - codeparrot_training - Step 41900: {'lr': 0.00041556667103746157, 'samples': 21453312, 'steps': 41900, 'loss/train': 1.9744770526885986} +03/05/2022 14:43:42 - INFO - codeparrot_training - Skipping example with length 278 (seq_length=1024) +03/05/2022 14:43:45 - INFO - codeparrot_training - Step 41901: {'lr': 0.00041556269482536355, 'samples': 21453824, 'steps': 41901, 'loss/train': 1.8551928997039795} +03/05/2022 14:43:49 - INFO - codeparrot_training - Step 41902: {'lr': 0.00041555871853866553, 'samples': 21454336, 'steps': 41902, 'loss/train': 0.4991138279438019} +03/05/2022 14:43:51 - INFO - codeparrot_training - Skipping example with length 725 (seq_length=1024) +03/05/2022 14:43:54 - INFO - codeparrot_training - Step 41903: {'lr': 0.00041555474217736926, 'samples': 21454848, 'steps': 41903, 'loss/train': 1.8484559059143066} +03/05/2022 14:43:57 - INFO - codeparrot_training - Step 41904: {'lr': 0.0004155507657414766, 'samples': 21455360, 'steps': 41904, 'loss/train': 0.9804142117500305} +03/05/2022 14:43:59 - INFO - codeparrot_training - Skipping example with length 891 (seq_length=1024) +03/05/2022 14:44:02 - INFO - codeparrot_training - Step 41905: {'lr': 0.0004155467892309893, 'samples': 21455872, 'steps': 41905, 'loss/train': 2.7782599925994873} +03/05/2022 14:44:05 - INFO - codeparrot_training - Step 41906: {'lr': 0.0004155428126459092, 'samples': 21456384, 'steps': 41906, 'loss/train': 2.0531554222106934} +03/05/2022 14:44:08 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/05/2022 14:44:11 - INFO - codeparrot_training - Step 41907: {'lr': 0.00041553883598623804, 'samples': 21456896, 'steps': 41907, 'loss/train': 1.6811994314193726} +03/05/2022 14:44:14 - INFO - codeparrot_training - Step 41908: {'lr': 0.00041553485925197763, 'samples': 21457408, 'steps': 41908, 'loss/train': 2.1421356201171875} +03/05/2022 14:44:16 - INFO - codeparrot_training - Skipping example with length 26 (seq_length=1024) +03/05/2022 14:44:19 - INFO - codeparrot_training - Step 41909: {'lr': 0.00041553088244312975, 'samples': 21457920, 'steps': 41909, 'loss/train': 1.6446198225021362} +03/05/2022 14:44:22 - INFO - codeparrot_training - Step 41910: {'lr': 0.0004155269055596963, 'samples': 21458432, 'steps': 41910, 'loss/train': 1.1300580501556396} +03/05/2022 14:44:25 - INFO - codeparrot_training - Skipping example with length 395 (seq_length=1024) +03/05/2022 14:44:28 - INFO - codeparrot_training - Step 41911: {'lr': 0.0004155229286016789, 'samples': 21458944, 'steps': 41911, 'loss/train': 2.0153722763061523} +03/05/2022 14:44:31 - INFO - codeparrot_training - Step 41912: {'lr': 0.0004155189515690794, 'samples': 21459456, 'steps': 41912, 'loss/train': 2.7276649475097656} +03/05/2022 14:44:34 - INFO - codeparrot_training - Step 41913: {'lr': 0.0004155149744618997, 'samples': 21459968, 'steps': 41913, 'loss/train': 1.6332619190216064} +03/05/2022 14:44:35 - INFO - codeparrot_training - Skipping example with length 1022 (seq_length=1024) +03/05/2022 14:44:40 - INFO - codeparrot_training - Step 41914: {'lr': 0.0004155109972801414, 'samples': 21460480, 'steps': 41914, 'loss/train': 1.3162097930908203} +03/05/2022 14:44:43 - INFO - codeparrot_training - Step 41915: {'lr': 0.0004155070200238065, 'samples': 21460992, 'steps': 41915, 'loss/train': 2.081064224243164} +03/05/2022 14:44:44 - INFO - codeparrot_training - Skipping example with length 769 (seq_length=1024) +03/05/2022 14:44:48 - INFO - codeparrot_training - Step 41916: {'lr': 0.00041550304269289664, 'samples': 21461504, 'steps': 41916, 'loss/train': 1.428346872329712} +03/05/2022 14:44:51 - INFO - codeparrot_training - Step 41917: {'lr': 0.00041549906528741366, 'samples': 21462016, 'steps': 41917, 'loss/train': 1.9242680072784424} +03/05/2022 14:44:52 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/05/2022 14:44:56 - INFO - codeparrot_training - Step 41918: {'lr': 0.0004154950878073594, 'samples': 21462528, 'steps': 41918, 'loss/train': 1.1793553829193115} +03/05/2022 14:45:00 - INFO - codeparrot_training - Step 41919: {'lr': 0.0004154911102527356, 'samples': 21463040, 'steps': 41919, 'loss/train': 1.7504701614379883} +03/05/2022 14:45:01 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/05/2022 14:45:05 - INFO - codeparrot_training - Step 41920: {'lr': 0.00041548713262354396, 'samples': 21463552, 'steps': 41920, 'loss/train': 1.810105562210083} +03/05/2022 14:45:08 - INFO - codeparrot_training - Step 41921: {'lr': 0.0004154831549197865, 'samples': 21464064, 'steps': 41921, 'loss/train': 1.8996647596359253} +03/05/2022 14:45:09 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/05/2022 14:45:13 - INFO - codeparrot_training - Step 41922: {'lr': 0.0004154791771414648, 'samples': 21464576, 'steps': 41922, 'loss/train': 1.3253934383392334} +03/05/2022 14:45:16 - INFO - codeparrot_training - Step 41923: {'lr': 0.0004154751992885808, 'samples': 21465088, 'steps': 41923, 'loss/train': 0.605379045009613} +03/05/2022 14:45:18 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/05/2022 14:45:22 - INFO - codeparrot_training - Step 41924: {'lr': 0.0004154712213611362, 'samples': 21465600, 'steps': 41924, 'loss/train': 2.161938190460205} +03/05/2022 14:45:25 - INFO - codeparrot_training - Step 41925: {'lr': 0.0004154672433591328, 'samples': 21466112, 'steps': 41925, 'loss/train': 1.6911333799362183} +03/05/2022 14:45:26 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/05/2022 14:45:30 - INFO - codeparrot_training - Step 41926: {'lr': 0.0004154632652825724, 'samples': 21466624, 'steps': 41926, 'loss/train': 1.5501492023468018} +03/05/2022 14:45:33 - INFO - codeparrot_training - Step 41927: {'lr': 0.00041545928713145687, 'samples': 21467136, 'steps': 41927, 'loss/train': 1.6769646406173706} +03/05/2022 14:45:34 - INFO - codeparrot_training - Skipping example with length 537 (seq_length=1024) +03/05/2022 14:45:39 - INFO - codeparrot_training - Step 41928: {'lr': 0.00041545530890578784, 'samples': 21467648, 'steps': 41928, 'loss/train': 1.3389497995376587} +03/05/2022 14:45:42 - INFO - codeparrot_training - Step 41929: {'lr': 0.00041545133060556734, 'samples': 21468160, 'steps': 41929, 'loss/train': 2.020487070083618} +03/05/2022 14:45:43 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/05/2022 14:45:47 - INFO - codeparrot_training - Step 41930: {'lr': 0.00041544735223079693, 'samples': 21468672, 'steps': 41930, 'loss/train': 1.678605556488037} +03/05/2022 14:45:50 - INFO - codeparrot_training - Step 41931: {'lr': 0.0004154433737814786, 'samples': 21469184, 'steps': 41931, 'loss/train': 2.1452643871307373} +03/05/2022 14:45:51 - INFO - codeparrot_training - Skipping example with length 377 (seq_length=1024) +03/05/2022 14:45:56 - INFO - codeparrot_training - Step 41932: {'lr': 0.0004154393952576139, 'samples': 21469696, 'steps': 41932, 'loss/train': 0.8944704532623291} +03/05/2022 14:45:59 - INFO - codeparrot_training - Step 41933: {'lr': 0.00041543541665920483, 'samples': 21470208, 'steps': 41933, 'loss/train': 1.9454741477966309} +03/05/2022 14:45:59 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/05/2022 14:46:04 - INFO - codeparrot_training - Step 41934: {'lr': 0.000415431437986253, 'samples': 21470720, 'steps': 41934, 'loss/train': 0.7341524362564087} +03/05/2022 14:46:07 - INFO - codeparrot_training - Step 41935: {'lr': 0.00041542745923876047, 'samples': 21471232, 'steps': 41935, 'loss/train': 2.078669548034668} +03/05/2022 14:46:08 - INFO - codeparrot_training - Skipping example with length 871 (seq_length=1024) +03/05/2022 14:46:13 - INFO - codeparrot_training - Step 41936: {'lr': 0.00041542348041672886, 'samples': 21471744, 'steps': 41936, 'loss/train': 1.673399806022644} +03/05/2022 14:46:16 - INFO - codeparrot_training - Step 41937: {'lr': 0.00041541950152015997, 'samples': 21472256, 'steps': 41937, 'loss/train': 1.8219189643859863} +03/05/2022 14:46:17 - INFO - codeparrot_training - Skipping example with length 806 (seq_length=1024) +03/05/2022 14:46:21 - INFO - codeparrot_training - Step 41938: {'lr': 0.0004154155225490555, 'samples': 21472768, 'steps': 41938, 'loss/train': 1.7975094318389893} +03/05/2022 14:46:25 - INFO - codeparrot_training - Step 41939: {'lr': 0.0004154115435034175, 'samples': 21473280, 'steps': 41939, 'loss/train': 1.9789531230926514} +03/05/2022 14:46:25 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/05/2022 14:46:30 - INFO - codeparrot_training - Step 41940: {'lr': 0.00041540756438324746, 'samples': 21473792, 'steps': 41940, 'loss/train': 1.5022308826446533} +03/05/2022 14:46:33 - INFO - codeparrot_training - Step 41941: {'lr': 0.0004154035851885474, 'samples': 21474304, 'steps': 41941, 'loss/train': 2.125880479812622} +03/05/2022 14:46:33 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/05/2022 14:46:38 - INFO - codeparrot_training - Step 41942: {'lr': 0.0004153996059193191, 'samples': 21474816, 'steps': 41942, 'loss/train': 0.9406452775001526} +03/05/2022 14:46:41 - INFO - codeparrot_training - Step 41943: {'lr': 0.0004153956265755642, 'samples': 21475328, 'steps': 41943, 'loss/train': 1.2609305381774902} +03/05/2022 14:46:42 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/05/2022 14:46:47 - INFO - codeparrot_training - Step 41944: {'lr': 0.0004153916471572846, 'samples': 21475840, 'steps': 41944, 'loss/train': 1.358726978302002} +03/05/2022 14:46:50 - INFO - codeparrot_training - Step 41945: {'lr': 0.0004153876676644821, 'samples': 21476352, 'steps': 41945, 'loss/train': 1.6648014783859253} +03/05/2022 14:46:50 - INFO - codeparrot_training - Skipping example with length 312 (seq_length=1024) +03/05/2022 14:46:55 - INFO - codeparrot_training - Step 41946: {'lr': 0.0004153836880971585, 'samples': 21476864, 'steps': 41946, 'loss/train': 2.037508010864258} +03/05/2022 14:46:58 - INFO - codeparrot_training - Step 41947: {'lr': 0.00041537970845531547, 'samples': 21477376, 'steps': 41947, 'loss/train': 1.153216004371643} +03/05/2022 14:46:58 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/05/2022 14:47:04 - INFO - codeparrot_training - Step 41948: {'lr': 0.00041537572873895503, 'samples': 21477888, 'steps': 41948, 'loss/train': 1.9245078563690186} +03/05/2022 14:47:07 - INFO - codeparrot_training - Step 41949: {'lr': 0.00041537174894807873, 'samples': 21478400, 'steps': 41949, 'loss/train': 1.961256980895996} +03/05/2022 14:47:07 - INFO - codeparrot_training - Skipping example with length 981 (seq_length=1024) +03/05/2022 14:47:12 - INFO - codeparrot_training - Step 41950: {'lr': 0.00041536776908268847, 'samples': 21478912, 'steps': 41950, 'loss/train': 1.362103819847107} +03/05/2022 14:47:15 - INFO - codeparrot_training - Step 41951: {'lr': 0.00041536378914278603, 'samples': 21479424, 'steps': 41951, 'loss/train': 1.4469531774520874} +03/05/2022 14:47:16 - INFO - codeparrot_training - Skipping example with length 322 (seq_length=1024) +03/05/2022 14:47:21 - INFO - codeparrot_training - Step 41952: {'lr': 0.00041535980912837326, 'samples': 21479936, 'steps': 41952, 'loss/train': 1.94516122341156} +03/05/2022 14:47:24 - INFO - codeparrot_training - Step 41953: {'lr': 0.00041535582903945195, 'samples': 21480448, 'steps': 41953, 'loss/train': 2.380267858505249} +03/05/2022 14:47:26 - INFO - codeparrot_training - Skipping example with length 168 (seq_length=1024) +03/05/2022 14:47:30 - INFO - codeparrot_training - Step 41954: {'lr': 0.00041535184887602384, 'samples': 21480960, 'steps': 41954, 'loss/train': 0.20756934583187103} +03/05/2022 14:47:33 - INFO - codeparrot_training - Step 41955: {'lr': 0.0004153478686380907, 'samples': 21481472, 'steps': 41955, 'loss/train': 1.5236746072769165} +03/05/2022 14:47:35 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/05/2022 14:47:38 - INFO - codeparrot_training - Step 41956: {'lr': 0.0004153438883256544, 'samples': 21481984, 'steps': 41956, 'loss/train': 0.1346697211265564} +03/05/2022 14:47:41 - INFO - codeparrot_training - Step 41957: {'lr': 0.0004153399079387167, 'samples': 21482496, 'steps': 41957, 'loss/train': 1.7919999361038208} +03/05/2022 14:47:43 - INFO - codeparrot_training - Skipping example with length 700 (seq_length=1024) +03/05/2022 14:47:47 - INFO - codeparrot_training - Step 41958: {'lr': 0.00041533592747727935, 'samples': 21483008, 'steps': 41958, 'loss/train': 0.8220410943031311} +03/05/2022 14:47:50 - INFO - codeparrot_training - Step 41959: {'lr': 0.00041533194694134414, 'samples': 21483520, 'steps': 41959, 'loss/train': 1.867111325263977} +03/05/2022 14:47:52 - INFO - codeparrot_training - Skipping example with length 456 (seq_length=1024) +03/05/2022 14:47:55 - INFO - codeparrot_training - Step 41960: {'lr': 0.00041532796633091297, 'samples': 21484032, 'steps': 41960, 'loss/train': 1.4246114492416382} +03/05/2022 14:47:58 - INFO - codeparrot_training - Step 41961: {'lr': 0.00041532398564598757, 'samples': 21484544, 'steps': 41961, 'loss/train': 1.5380758047103882} +03/05/2022 14:48:03 - INFO - codeparrot_training - Step 41962: {'lr': 0.0004153200048865697, 'samples': 21485056, 'steps': 41962, 'loss/train': 1.9092661142349243} +03/05/2022 14:48:07 - INFO - codeparrot_training - Step 41963: {'lr': 0.0004153160240526612, 'samples': 21485568, 'steps': 41963, 'loss/train': 1.5522388219833374} +03/05/2022 14:48:08 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/05/2022 14:48:12 - INFO - codeparrot_training - Step 41964: {'lr': 0.0004153120431442639, 'samples': 21486080, 'steps': 41964, 'loss/train': 0.9297754764556885} +03/05/2022 14:48:15 - INFO - codeparrot_training - Step 41965: {'lr': 0.00041530806216137953, 'samples': 21486592, 'steps': 41965, 'loss/train': 2.274245500564575} +03/05/2022 14:48:17 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/05/2022 14:48:20 - INFO - codeparrot_training - Step 41966: {'lr': 0.00041530408110400987, 'samples': 21487104, 'steps': 41966, 'loss/train': 2.160557270050049} +03/05/2022 14:48:24 - INFO - codeparrot_training - Step 41967: {'lr': 0.00041530009997215665, 'samples': 21487616, 'steps': 41967, 'loss/train': 1.7717862129211426} +03/05/2022 14:48:25 - INFO - codeparrot_training - Skipping example with length 673 (seq_length=1024) +03/05/2022 14:48:29 - INFO - codeparrot_training - Step 41968: {'lr': 0.00041529611876582194, 'samples': 21488128, 'steps': 41968, 'loss/train': 0.5135242342948914} +03/05/2022 14:48:32 - INFO - codeparrot_training - Step 41969: {'lr': 0.00041529213748500726, 'samples': 21488640, 'steps': 41969, 'loss/train': 1.598252773284912} +03/05/2022 14:48:33 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/05/2022 14:48:37 - INFO - codeparrot_training - Step 41970: {'lr': 0.0004152881561297145, 'samples': 21489152, 'steps': 41970, 'loss/train': 1.3704453706741333} +03/05/2022 14:48:40 - INFO - codeparrot_training - Step 41971: {'lr': 0.0004152841746999454, 'samples': 21489664, 'steps': 41971, 'loss/train': 1.2177729606628418} +03/05/2022 14:48:42 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/05/2022 14:48:46 - INFO - codeparrot_training - Step 41972: {'lr': 0.00041528019319570186, 'samples': 21490176, 'steps': 41972, 'loss/train': 1.7277165651321411} +03/05/2022 14:48:49 - INFO - codeparrot_training - Step 41973: {'lr': 0.0004152762116169856, 'samples': 21490688, 'steps': 41973, 'loss/train': 1.5947171449661255} +03/05/2022 14:48:50 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/05/2022 14:48:54 - INFO - codeparrot_training - Step 41974: {'lr': 0.00041527222996379844, 'samples': 21491200, 'steps': 41974, 'loss/train': 1.3216217756271362} +03/05/2022 14:48:57 - INFO - codeparrot_training - Step 41975: {'lr': 0.0004152682482361422, 'samples': 21491712, 'steps': 41975, 'loss/train': 0.9165134429931641} +03/05/2022 14:48:59 - INFO - codeparrot_training - Skipping example with length 30 (seq_length=1024) +03/05/2022 14:49:03 - INFO - codeparrot_training - Step 41976: {'lr': 0.0004152642664340185, 'samples': 21492224, 'steps': 41976, 'loss/train': 1.774726390838623} +03/05/2022 14:49:06 - INFO - codeparrot_training - Step 41977: {'lr': 0.00041526028455742936, 'samples': 21492736, 'steps': 41977, 'loss/train': 1.277857780456543} +03/05/2022 14:49:07 - INFO - codeparrot_training - Skipping example with length 969 (seq_length=1024) +03/05/2022 14:49:11 - INFO - codeparrot_training - Step 41978: {'lr': 0.0004152563026063765, 'samples': 21493248, 'steps': 41978, 'loss/train': 1.992783546447754} +03/05/2022 14:49:14 - INFO - codeparrot_training - Step 41979: {'lr': 0.00041525232058086173, 'samples': 21493760, 'steps': 41979, 'loss/train': 1.623078465461731} +03/05/2022 14:49:15 - INFO - codeparrot_training - Skipping example with length 418 (seq_length=1024) +03/05/2022 14:49:19 - INFO - codeparrot_training - Step 41980: {'lr': 0.0004152483384808867, 'samples': 21494272, 'steps': 41980, 'loss/train': 1.3448256254196167} +03/05/2022 14:49:23 - INFO - codeparrot_training - Step 41981: {'lr': 0.0004152443563064534, 'samples': 21494784, 'steps': 41981, 'loss/train': 2.280818223953247} +03/05/2022 14:49:24 - INFO - codeparrot_training - Skipping example with length 206 (seq_length=1024) +03/05/2022 14:49:28 - INFO - codeparrot_training - Step 41982: {'lr': 0.00041524037405756356, 'samples': 21495296, 'steps': 41982, 'loss/train': 1.3301358222961426} +03/05/2022 14:49:31 - INFO - codeparrot_training - Step 41983: {'lr': 0.0004152363917342189, 'samples': 21495808, 'steps': 41983, 'loss/train': 2.0409293174743652} +03/05/2022 14:49:32 - INFO - codeparrot_training - Skipping example with length 581 (seq_length=1024) +03/05/2022 14:49:36 - INFO - codeparrot_training - Step 41984: {'lr': 0.00041523240933642134, 'samples': 21496320, 'steps': 41984, 'loss/train': 1.4239026308059692} +03/05/2022 14:49:39 - INFO - codeparrot_training - Step 41985: {'lr': 0.00041522842686417255, 'samples': 21496832, 'steps': 41985, 'loss/train': 2.484879970550537} +03/05/2022 14:49:40 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/05/2022 14:49:45 - INFO - codeparrot_training - Step 41986: {'lr': 0.0004152244443174744, 'samples': 21497344, 'steps': 41986, 'loss/train': 1.1105756759643555} +03/05/2022 14:49:48 - INFO - codeparrot_training - Step 41987: {'lr': 0.00041522046169632863, 'samples': 21497856, 'steps': 41987, 'loss/train': 1.0515499114990234} +03/05/2022 14:49:49 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/05/2022 14:49:53 - INFO - codeparrot_training - Step 41988: {'lr': 0.0004152164790007371, 'samples': 21498368, 'steps': 41988, 'loss/train': 1.947210669517517} +03/05/2022 14:49:57 - INFO - codeparrot_training - Step 41989: {'lr': 0.00041521249623070164, 'samples': 21498880, 'steps': 41989, 'loss/train': 1.5896849632263184} +03/05/2022 14:49:58 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/05/2022 14:50:02 - INFO - codeparrot_training - Step 41990: {'lr': 0.0004152085133862239, 'samples': 21499392, 'steps': 41990, 'loss/train': 1.2735093832015991} +03/05/2022 14:50:05 - INFO - codeparrot_training - Step 41991: {'lr': 0.0004152045304673058, 'samples': 21499904, 'steps': 41991, 'loss/train': 2.0329272747039795} +03/05/2022 14:50:07 - INFO - codeparrot_training - Skipping example with length 1018 (seq_length=1024) +03/05/2022 14:50:10 - INFO - codeparrot_training - Step 41992: {'lr': 0.000415200547473949, 'samples': 21500416, 'steps': 41992, 'loss/train': 1.4904472827911377} +03/05/2022 14:50:14 - INFO - codeparrot_training - Step 41993: {'lr': 0.00041519656440615544, 'samples': 21500928, 'steps': 41993, 'loss/train': 1.9958611726760864} +03/05/2022 14:50:19 - INFO - codeparrot_training - Step 41994: {'lr': 0.00041519258126392685, 'samples': 21501440, 'steps': 41994, 'loss/train': 1.702273964881897} +03/05/2022 14:50:22 - INFO - codeparrot_training - Step 41995: {'lr': 0.00041518859804726507, 'samples': 21501952, 'steps': 41995, 'loss/train': 1.5724694728851318} +03/05/2022 14:50:24 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/05/2022 14:50:27 - INFO - codeparrot_training - Step 41996: {'lr': 0.00041518461475617183, 'samples': 21502464, 'steps': 41996, 'loss/train': 1.521433711051941} +03/05/2022 14:50:30 - INFO - codeparrot_training - Step 41997: {'lr': 0.00041518063139064893, 'samples': 21502976, 'steps': 41997, 'loss/train': 1.8104257583618164} +03/05/2022 14:50:32 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/05/2022 14:50:36 - INFO - codeparrot_training - Step 41998: {'lr': 0.0004151766479506982, 'samples': 21503488, 'steps': 41998, 'loss/train': 1.3721625804901123} +03/05/2022 14:50:39 - INFO - codeparrot_training - Step 41999: {'lr': 0.0004151726644363214, 'samples': 21504000, 'steps': 41999, 'loss/train': 1.5022848844528198} +03/05/2022 14:50:40 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/05/2022 14:50:44 - INFO - codeparrot_training - Step 42000: {'lr': 0.00041516868084752034, 'samples': 21504512, 'steps': 42000, 'loss/train': 0.782934308052063} +03/05/2022 14:50:47 - INFO - codeparrot_training - Step 42001: {'lr': 0.0004151646971842968, 'samples': 21505024, 'steps': 42001, 'loss/train': 1.9118019342422485} +03/05/2022 14:50:49 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/05/2022 14:50:53 - INFO - codeparrot_training - Step 42002: {'lr': 0.00041516071344665275, 'samples': 21505536, 'steps': 42002, 'loss/train': 1.37834894657135} +03/05/2022 14:50:56 - INFO - codeparrot_training - Step 42003: {'lr': 0.00041515672963458975, 'samples': 21506048, 'steps': 42003, 'loss/train': 1.1964386701583862} +03/05/2022 14:50:57 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/05/2022 14:51:01 - INFO - codeparrot_training - Step 42004: {'lr': 0.00041515274574810965, 'samples': 21506560, 'steps': 42004, 'loss/train': 1.7944586277008057} +03/05/2022 14:51:04 - INFO - codeparrot_training - Step 42005: {'lr': 0.00041514876178721426, 'samples': 21507072, 'steps': 42005, 'loss/train': 2.1848740577697754} +03/05/2022 14:51:05 - INFO - codeparrot_training - Skipping example with length 762 (seq_length=1024) +03/05/2022 14:51:09 - INFO - codeparrot_training - Step 42006: {'lr': 0.0004151447777519054, 'samples': 21507584, 'steps': 42006, 'loss/train': 1.1487326622009277} +03/05/2022 14:51:13 - INFO - codeparrot_training - Step 42007: {'lr': 0.00041514079364218483, 'samples': 21508096, 'steps': 42007, 'loss/train': 1.5687570571899414} +03/05/2022 14:51:14 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/05/2022 14:51:18 - INFO - codeparrot_training - Step 42008: {'lr': 0.0004151368094580544, 'samples': 21508608, 'steps': 42008, 'loss/train': 1.2744157314300537} +03/05/2022 14:51:21 - INFO - codeparrot_training - Step 42009: {'lr': 0.0004151328251995159, 'samples': 21509120, 'steps': 42009, 'loss/train': 1.4255388975143433} +03/05/2022 14:51:22 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/05/2022 14:51:26 - INFO - codeparrot_training - Step 42010: {'lr': 0.000415128840866571, 'samples': 21509632, 'steps': 42010, 'loss/train': 1.8352463245391846} +03/05/2022 14:51:29 - INFO - codeparrot_training - Step 42011: {'lr': 0.00041512485645922164, 'samples': 21510144, 'steps': 42011, 'loss/train': 1.9540470838546753} +03/05/2022 14:51:30 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/05/2022 14:51:35 - INFO - codeparrot_training - Step 42012: {'lr': 0.0004151208719774696, 'samples': 21510656, 'steps': 42012, 'loss/train': 1.5386552810668945} +03/05/2022 14:51:38 - INFO - codeparrot_training - Step 42013: {'lr': 0.0004151168874213166, 'samples': 21511168, 'steps': 42013, 'loss/train': 1.5359935760498047} +03/05/2022 14:51:39 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/05/2022 14:51:43 - INFO - codeparrot_training - Step 42014: {'lr': 0.00041511290279076454, 'samples': 21511680, 'steps': 42014, 'loss/train': 1.3552520275115967} +03/05/2022 14:51:46 - INFO - codeparrot_training - Step 42015: {'lr': 0.0004151089180858151, 'samples': 21512192, 'steps': 42015, 'loss/train': 1.8658593893051147} +03/05/2022 14:51:47 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/05/2022 14:51:52 - INFO - codeparrot_training - Step 42016: {'lr': 0.00041510493330647015, 'samples': 21512704, 'steps': 42016, 'loss/train': 1.6373051404953003} +03/05/2022 14:51:55 - INFO - codeparrot_training - Step 42017: {'lr': 0.00041510094845273145, 'samples': 21513216, 'steps': 42017, 'loss/train': 0.26720091700553894} +03/05/2022 14:51:56 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/05/2022 14:52:00 - INFO - codeparrot_training - Step 42018: {'lr': 0.0004150969635246008, 'samples': 21513728, 'steps': 42018, 'loss/train': 1.9831372499465942} +03/05/2022 14:52:03 - INFO - codeparrot_training - Step 42019: {'lr': 0.00041509297852208003, 'samples': 21514240, 'steps': 42019, 'loss/train': 2.0508909225463867} +03/05/2022 14:52:04 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/05/2022 14:52:08 - INFO - codeparrot_training - Step 42020: {'lr': 0.00041508899344517094, 'samples': 21514752, 'steps': 42020, 'loss/train': 1.0435205698013306} +03/05/2022 14:52:12 - INFO - codeparrot_training - Step 42021: {'lr': 0.0004150850082938752, 'samples': 21515264, 'steps': 42021, 'loss/train': 1.9576176404953003} +03/05/2022 14:52:13 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/05/2022 14:52:17 - INFO - codeparrot_training - Step 42022: {'lr': 0.00041508102306819485, 'samples': 21515776, 'steps': 42022, 'loss/train': 1.3505452871322632} +03/05/2022 14:52:20 - INFO - codeparrot_training - Step 42023: {'lr': 0.0004150770377681314, 'samples': 21516288, 'steps': 42023, 'loss/train': 1.9806123971939087} +03/05/2022 14:52:21 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/05/2022 14:52:25 - INFO - codeparrot_training - Step 42024: {'lr': 0.00041507305239368684, 'samples': 21516800, 'steps': 42024, 'loss/train': 1.8587054014205933} +03/05/2022 14:52:29 - INFO - codeparrot_training - Step 42025: {'lr': 0.0004150690669448629, 'samples': 21517312, 'steps': 42025, 'loss/train': 1.7816216945648193} +03/05/2022 14:52:30 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/05/2022 14:52:34 - INFO - codeparrot_training - Step 42026: {'lr': 0.0004150650814216614, 'samples': 21517824, 'steps': 42026, 'loss/train': 1.8048359155654907} +03/05/2022 14:52:37 - INFO - codeparrot_training - Step 42027: {'lr': 0.0004150610958240841, 'samples': 21518336, 'steps': 42027, 'loss/train': 1.8063660860061646} +03/05/2022 14:52:38 - INFO - codeparrot_training - Skipping example with length 898 (seq_length=1024) +03/05/2022 14:52:42 - INFO - codeparrot_training - Step 42028: {'lr': 0.00041505711015213284, 'samples': 21518848, 'steps': 42028, 'loss/train': 1.8965425491333008} +03/05/2022 14:52:45 - INFO - codeparrot_training - Step 42029: {'lr': 0.0004150531244058094, 'samples': 21519360, 'steps': 42029, 'loss/train': 0.8695423603057861} +03/05/2022 14:52:47 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/05/2022 14:52:52 - INFO - codeparrot_training - Step 42030: {'lr': 0.00041504913858511557, 'samples': 21519872, 'steps': 42030, 'loss/train': 1.3530279397964478} +03/05/2022 14:52:55 - INFO - codeparrot_training - Step 42031: {'lr': 0.0004150451526900531, 'samples': 21520384, 'steps': 42031, 'loss/train': 2.1479485034942627} +03/05/2022 14:52:58 - INFO - codeparrot_training - Step 42032: {'lr': 0.00041504116672062385, 'samples': 21520896, 'steps': 42032, 'loss/train': 2.1182901859283447} +03/05/2022 14:52:59 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/05/2022 14:53:03 - INFO - codeparrot_training - Step 42033: {'lr': 0.0004150371806768296, 'samples': 21521408, 'steps': 42033, 'loss/train': 1.4731667041778564} +03/05/2022 14:53:07 - INFO - codeparrot_training - Step 42034: {'lr': 0.00041503319455867216, 'samples': 21521920, 'steps': 42034, 'loss/train': 1.693225383758545} +03/05/2022 14:53:08 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/05/2022 14:53:12 - INFO - codeparrot_training - Step 42035: {'lr': 0.0004150292083661533, 'samples': 21522432, 'steps': 42035, 'loss/train': 1.3553967475891113} +03/05/2022 14:53:15 - INFO - codeparrot_training - Step 42036: {'lr': 0.00041502522209927486, 'samples': 21522944, 'steps': 42036, 'loss/train': 0.8308656215667725} +03/05/2022 14:53:16 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/05/2022 14:53:20 - INFO - codeparrot_training - Step 42037: {'lr': 0.00041502123575803854, 'samples': 21523456, 'steps': 42037, 'loss/train': 0.8688053488731384} +03/05/2022 14:53:23 - INFO - codeparrot_training - Step 42038: {'lr': 0.0004150172493424462, 'samples': 21523968, 'steps': 42038, 'loss/train': 1.7066411972045898} +03/05/2022 14:53:25 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/05/2022 14:53:29 - INFO - codeparrot_training - Step 42039: {'lr': 0.00041501326285249963, 'samples': 21524480, 'steps': 42039, 'loss/train': 1.7727299928665161} +03/05/2022 14:53:32 - INFO - codeparrot_training - Step 42040: {'lr': 0.0004150092762882007, 'samples': 21524992, 'steps': 42040, 'loss/train': 1.7361472845077515} +03/05/2022 14:53:33 - INFO - codeparrot_training - Skipping example with length 229 (seq_length=1024) +03/05/2022 14:53:37 - INFO - codeparrot_training - Step 42041: {'lr': 0.00041500528964955106, 'samples': 21525504, 'steps': 42041, 'loss/train': 1.919515609741211} +03/05/2022 14:53:40 - INFO - codeparrot_training - Step 42042: {'lr': 0.0004150013029365527, 'samples': 21526016, 'steps': 42042, 'loss/train': 1.616940975189209} +03/05/2022 14:53:42 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/05/2022 14:53:46 - INFO - codeparrot_training - Step 42043: {'lr': 0.0004149973161492072, 'samples': 21526528, 'steps': 42043, 'loss/train': 2.092857599258423} +03/05/2022 14:53:49 - INFO - codeparrot_training - Step 42044: {'lr': 0.0004149933292875164, 'samples': 21527040, 'steps': 42044, 'loss/train': 1.5048235654830933} +03/05/2022 14:53:50 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/05/2022 14:53:54 - INFO - codeparrot_training - Step 42045: {'lr': 0.0004149893423514822, 'samples': 21527552, 'steps': 42045, 'loss/train': 2.0605039596557617} +03/05/2022 14:53:57 - INFO - codeparrot_training - Step 42046: {'lr': 0.0004149853553411064, 'samples': 21528064, 'steps': 42046, 'loss/train': 1.8431795835494995} +03/05/2022 14:53:59 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/05/2022 14:54:03 - INFO - codeparrot_training - Step 42047: {'lr': 0.00041498136825639074, 'samples': 21528576, 'steps': 42047, 'loss/train': 3.7218387126922607} +03/05/2022 14:54:06 - INFO - codeparrot_training - Step 42048: {'lr': 0.000414977381097337, 'samples': 21529088, 'steps': 42048, 'loss/train': 1.8772724866867065} +03/05/2022 14:54:08 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/05/2022 14:54:11 - INFO - codeparrot_training - Step 42049: {'lr': 0.000414973393863947, 'samples': 21529600, 'steps': 42049, 'loss/train': 0.6850914359092712} +03/05/2022 14:54:14 - INFO - codeparrot_training - Step 42050: {'lr': 0.0004149694065562225, 'samples': 21530112, 'steps': 42050, 'loss/train': 1.278337836265564} +03/05/2022 14:54:16 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/05/2022 14:54:20 - INFO - codeparrot_training - Step 42051: {'lr': 0.0004149654191741654, 'samples': 21530624, 'steps': 42051, 'loss/train': 2.21501088142395} +03/05/2022 14:54:23 - INFO - codeparrot_training - Step 42052: {'lr': 0.0004149614317177774, 'samples': 21531136, 'steps': 42052, 'loss/train': 0.9838099479675293} +03/05/2022 14:54:25 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/05/2022 14:54:28 - INFO - codeparrot_training - Step 42053: {'lr': 0.00041495744418706027, 'samples': 21531648, 'steps': 42053, 'loss/train': 1.9907677173614502} +03/05/2022 14:54:31 - INFO - codeparrot_training - Step 42054: {'lr': 0.00041495345658201587, 'samples': 21532160, 'steps': 42054, 'loss/train': 2.186314344406128} +03/05/2022 14:54:34 - INFO - codeparrot_training - Skipping example with length 328 (seq_length=1024) +03/05/2022 14:54:36 - INFO - codeparrot_training - Step 42055: {'lr': 0.00041494946890264606, 'samples': 21532672, 'steps': 42055, 'loss/train': 1.5653327703475952} +03/05/2022 14:54:40 - INFO - codeparrot_training - Step 42056: {'lr': 0.00041494548114895255, 'samples': 21533184, 'steps': 42056, 'loss/train': 1.6992850303649902} +03/05/2022 14:54:42 - INFO - codeparrot_training - Skipping example with length 98 (seq_length=1024) +03/05/2022 14:54:45 - INFO - codeparrot_training - Step 42057: {'lr': 0.0004149414933209371, 'samples': 21533696, 'steps': 42057, 'loss/train': 1.2012659311294556} +03/05/2022 14:54:48 - INFO - codeparrot_training - Step 42058: {'lr': 0.00041493750541860165, 'samples': 21534208, 'steps': 42058, 'loss/train': 2.228527307510376} +03/05/2022 14:54:50 - INFO - codeparrot_training - Skipping example with length 501 (seq_length=1024) +03/05/2022 14:54:53 - INFO - codeparrot_training - Step 42059: {'lr': 0.0004149335174419478, 'samples': 21534720, 'steps': 42059, 'loss/train': 2.387204647064209} +03/05/2022 14:54:57 - INFO - codeparrot_training - Step 42060: {'lr': 0.0004149295293909775, 'samples': 21535232, 'steps': 42060, 'loss/train': 0.8602176904678345} +03/05/2022 14:54:59 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/05/2022 14:55:02 - INFO - codeparrot_training - Step 42061: {'lr': 0.0004149255412656925, 'samples': 21535744, 'steps': 42061, 'loss/train': 1.6675893068313599} +03/05/2022 14:55:05 - INFO - codeparrot_training - Step 42062: {'lr': 0.00041492155306609456, 'samples': 21536256, 'steps': 42062, 'loss/train': 1.0316429138183594} +03/05/2022 14:55:07 - INFO - codeparrot_training - Skipping example with length 601 (seq_length=1024) +03/05/2022 14:55:10 - INFO - codeparrot_training - Step 42063: {'lr': 0.00041491756479218557, 'samples': 21536768, 'steps': 42063, 'loss/train': 2.1301400661468506} +03/05/2022 14:55:13 - INFO - codeparrot_training - Step 42064: {'lr': 0.0004149135764439672, 'samples': 21537280, 'steps': 42064, 'loss/train': 1.7208446264266968} +03/05/2022 14:55:16 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/05/2022 14:55:19 - INFO - codeparrot_training - Step 42065: {'lr': 0.0004149095880214414, 'samples': 21537792, 'steps': 42065, 'loss/train': 2.187117099761963} +03/05/2022 14:55:22 - INFO - codeparrot_training - Step 42066: {'lr': 0.00041490559952460983, 'samples': 21538304, 'steps': 42066, 'loss/train': 1.6324983835220337} +03/05/2022 14:55:24 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/05/2022 14:55:27 - INFO - codeparrot_training - Step 42067: {'lr': 0.00041490161095347435, 'samples': 21538816, 'steps': 42067, 'loss/train': 1.099818229675293} +03/05/2022 14:55:30 - INFO - codeparrot_training - Step 42068: {'lr': 0.00041489762230803676, 'samples': 21539328, 'steps': 42068, 'loss/train': 1.5437949895858765} +03/05/2022 14:55:32 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/05/2022 14:55:36 - INFO - codeparrot_training - Step 42069: {'lr': 0.00041489363358829885, 'samples': 21539840, 'steps': 42069, 'loss/train': 2.0456976890563965} +03/05/2022 14:55:39 - INFO - codeparrot_training - Step 42070: {'lr': 0.0004148896447942624, 'samples': 21540352, 'steps': 42070, 'loss/train': 1.3459950685501099} +03/05/2022 14:55:41 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/05/2022 14:55:44 - INFO - codeparrot_training - Step 42071: {'lr': 0.00041488565592592917, 'samples': 21540864, 'steps': 42071, 'loss/train': 1.543218970298767} +03/05/2022 14:55:47 - INFO - codeparrot_training - Step 42072: {'lr': 0.0004148816669833011, 'samples': 21541376, 'steps': 42072, 'loss/train': 1.985239028930664} +03/05/2022 14:55:50 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 14:55:53 - INFO - codeparrot_training - Step 42073: {'lr': 0.0004148776779663799, 'samples': 21541888, 'steps': 42073, 'loss/train': 1.9976308345794678} +03/05/2022 14:55:56 - INFO - codeparrot_training - Step 42074: {'lr': 0.00041487368887516726, 'samples': 21542400, 'steps': 42074, 'loss/train': 0.9212955832481384} +03/05/2022 14:55:58 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/05/2022 14:56:01 - INFO - codeparrot_training - Step 42075: {'lr': 0.00041486969970966516, 'samples': 21542912, 'steps': 42075, 'loss/train': 2.4219183921813965} +03/05/2022 14:56:04 - INFO - codeparrot_training - Step 42076: {'lr': 0.0004148657104698753, 'samples': 21543424, 'steps': 42076, 'loss/train': 2.0075337886810303} +03/05/2022 14:56:06 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/05/2022 14:56:09 - INFO - codeparrot_training - Step 42077: {'lr': 0.00041486172115579945, 'samples': 21543936, 'steps': 42077, 'loss/train': 2.4029383659362793} +03/05/2022 14:56:13 - INFO - codeparrot_training - Step 42078: {'lr': 0.00041485773176743953, 'samples': 21544448, 'steps': 42078, 'loss/train': 1.6762781143188477} +03/05/2022 14:56:14 - INFO - codeparrot_training - Skipping example with length 590 (seq_length=1024) +03/05/2022 14:56:18 - INFO - codeparrot_training - Step 42079: {'lr': 0.00041485374230479724, 'samples': 21544960, 'steps': 42079, 'loss/train': 1.6409190893173218} +03/05/2022 14:56:21 - INFO - codeparrot_training - Step 42080: {'lr': 0.00041484975276787436, 'samples': 21545472, 'steps': 42080, 'loss/train': 1.5761291980743408} +03/05/2022 14:56:23 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/05/2022 14:56:26 - INFO - codeparrot_training - Step 42081: {'lr': 0.00041484576315667273, 'samples': 21545984, 'steps': 42081, 'loss/train': 2.3278465270996094} +03/05/2022 14:56:30 - INFO - codeparrot_training - Step 42082: {'lr': 0.0004148417734711941, 'samples': 21546496, 'steps': 42082, 'loss/train': 2.618222951889038} +03/05/2022 14:56:31 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/05/2022 14:56:35 - INFO - codeparrot_training - Step 42083: {'lr': 0.00041483778371144046, 'samples': 21547008, 'steps': 42083, 'loss/train': 1.4391281604766846} +03/05/2022 14:56:38 - INFO - codeparrot_training - Step 42084: {'lr': 0.0004148337938774134, 'samples': 21547520, 'steps': 42084, 'loss/train': 1.605188012123108} +03/05/2022 14:56:40 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/05/2022 14:56:43 - INFO - codeparrot_training - Step 42085: {'lr': 0.00041482980396911467, 'samples': 21548032, 'steps': 42085, 'loss/train': 1.8894951343536377} +03/05/2022 14:56:46 - INFO - codeparrot_training - Step 42086: {'lr': 0.0004148258139865463, 'samples': 21548544, 'steps': 42086, 'loss/train': 0.9633037447929382} +03/05/2022 14:56:48 - INFO - codeparrot_training - Skipping example with length 413 (seq_length=1024) +03/05/2022 14:56:52 - INFO - codeparrot_training - Step 42087: {'lr': 0.00041482182392970984, 'samples': 21549056, 'steps': 42087, 'loss/train': 1.5174708366394043} +03/05/2022 14:56:55 - INFO - codeparrot_training - Step 42088: {'lr': 0.00041481783379860725, 'samples': 21549568, 'steps': 42088, 'loss/train': 2.1999340057373047} +03/05/2022 14:56:56 - INFO - codeparrot_training - Skipping example with length 746 (seq_length=1024) +03/05/2022 14:57:00 - INFO - codeparrot_training - Step 42089: {'lr': 0.0004148138435932404, 'samples': 21550080, 'steps': 42089, 'loss/train': 1.4440360069274902} +03/05/2022 14:57:03 - INFO - codeparrot_training - Step 42090: {'lr': 0.0004148098533136109, 'samples': 21550592, 'steps': 42090, 'loss/train': 1.475509524345398} +03/05/2022 14:57:04 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/05/2022 14:57:08 - INFO - codeparrot_training - Step 42091: {'lr': 0.0004148058629597206, 'samples': 21551104, 'steps': 42091, 'loss/train': 1.8951443433761597} +03/05/2022 14:57:12 - INFO - codeparrot_training - Step 42092: {'lr': 0.0004148018725315713, 'samples': 21551616, 'steps': 42092, 'loss/train': 1.9706394672393799} +03/05/2022 14:57:13 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/05/2022 14:57:17 - INFO - codeparrot_training - Step 42093: {'lr': 0.00041479788202916483, 'samples': 21552128, 'steps': 42093, 'loss/train': 2.485860824584961} +03/05/2022 14:57:20 - INFO - codeparrot_training - Step 42094: {'lr': 0.000414793891452503, 'samples': 21552640, 'steps': 42094, 'loss/train': 1.9755384922027588} +03/05/2022 14:57:21 - INFO - codeparrot_training - Skipping example with length 179 (seq_length=1024) +03/05/2022 14:57:26 - INFO - codeparrot_training - Step 42095: {'lr': 0.0004147899008015876, 'samples': 21553152, 'steps': 42095, 'loss/train': 1.1846425533294678} +03/05/2022 14:57:29 - INFO - codeparrot_training - Step 42096: {'lr': 0.0004147859100764204, 'samples': 21553664, 'steps': 42096, 'loss/train': 0.8696861267089844} +03/05/2022 14:57:30 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/05/2022 14:57:34 - INFO - codeparrot_training - Step 42097: {'lr': 0.0004147819192770033, 'samples': 21554176, 'steps': 42097, 'loss/train': 1.20148766040802} +03/05/2022 14:57:37 - INFO - codeparrot_training - Step 42098: {'lr': 0.00041477792840333784, 'samples': 21554688, 'steps': 42098, 'loss/train': 0.758374035358429} +03/05/2022 14:57:38 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/05/2022 14:57:42 - INFO - codeparrot_training - Step 42099: {'lr': 0.00041477393745542607, 'samples': 21555200, 'steps': 42099, 'loss/train': 1.9731801748275757} +03/05/2022 14:57:46 - INFO - codeparrot_training - Step 42100: {'lr': 0.0004147699464332697, 'samples': 21555712, 'steps': 42100, 'loss/train': 1.5502268075942993} +03/05/2022 14:57:46 - INFO - codeparrot_training - Skipping example with length 204 (seq_length=1024) +03/05/2022 14:57:51 - INFO - codeparrot_training - Step 42101: {'lr': 0.0004147659553368706, 'samples': 21556224, 'steps': 42101, 'loss/train': 1.6663472652435303} +03/05/2022 14:57:54 - INFO - codeparrot_training - Step 42102: {'lr': 0.00041476196416623034, 'samples': 21556736, 'steps': 42102, 'loss/train': 1.4499937295913696} +03/05/2022 14:57:55 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/05/2022 14:57:59 - INFO - codeparrot_training - Step 42103: {'lr': 0.0004147579729213511, 'samples': 21557248, 'steps': 42103, 'loss/train': 1.2467914819717407} +03/05/2022 14:58:02 - INFO - codeparrot_training - Step 42104: {'lr': 0.0004147539816022343, 'samples': 21557760, 'steps': 42104, 'loss/train': 1.895998239517212} +03/05/2022 14:58:03 - INFO - codeparrot_training - Skipping example with length 771 (seq_length=1024) +03/05/2022 14:58:08 - INFO - codeparrot_training - Step 42105: {'lr': 0.0004147499902088819, 'samples': 21558272, 'steps': 42105, 'loss/train': 2.1228394508361816} +03/05/2022 14:58:11 - INFO - codeparrot_training - Step 42106: {'lr': 0.0004147459987412958, 'samples': 21558784, 'steps': 42106, 'loss/train': 1.549649953842163} +03/05/2022 14:58:12 - INFO - codeparrot_training - Skipping example with length 246 (seq_length=1024) +03/05/2022 14:58:16 - INFO - codeparrot_training - Step 42107: {'lr': 0.0004147420071994776, 'samples': 21559296, 'steps': 42107, 'loss/train': 2.282243251800537} +03/05/2022 14:58:19 - INFO - codeparrot_training - Step 42108: {'lr': 0.0004147380155834293, 'samples': 21559808, 'steps': 42108, 'loss/train': 1.3776910305023193} +03/05/2022 14:58:21 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/05/2022 14:58:25 - INFO - codeparrot_training - Step 42109: {'lr': 0.0004147340238931525, 'samples': 21560320, 'steps': 42109, 'loss/train': 1.4800078868865967} +03/05/2022 14:58:28 - INFO - codeparrot_training - Step 42110: {'lr': 0.0004147300321286491, 'samples': 21560832, 'steps': 42110, 'loss/train': 1.5184056758880615} +03/05/2022 14:58:29 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/05/2022 14:58:33 - INFO - codeparrot_training - Step 42111: {'lr': 0.0004147260402899209, 'samples': 21561344, 'steps': 42111, 'loss/train': 1.6459730863571167} +03/05/2022 14:58:37 - INFO - codeparrot_training - Step 42112: {'lr': 0.0004147220483769697, 'samples': 21561856, 'steps': 42112, 'loss/train': 1.7488583326339722} +03/05/2022 14:58:38 - INFO - codeparrot_training - Skipping example with length 906 (seq_length=1024) +03/05/2022 14:58:42 - INFO - codeparrot_training - Step 42113: {'lr': 0.0004147180563897972, 'samples': 21562368, 'steps': 42113, 'loss/train': 1.497232437133789} +03/05/2022 14:58:45 - INFO - codeparrot_training - Step 42114: {'lr': 0.0004147140643284054, 'samples': 21562880, 'steps': 42114, 'loss/train': 2.525475263595581} +03/05/2022 14:58:46 - INFO - codeparrot_training - Skipping example with length 81 (seq_length=1024) +03/05/2022 14:58:50 - INFO - codeparrot_training - Step 42115: {'lr': 0.00041471007219279595, 'samples': 21563392, 'steps': 42115, 'loss/train': 1.702001929283142} +03/05/2022 14:58:53 - INFO - codeparrot_training - Step 42116: {'lr': 0.0004147060799829707, 'samples': 21563904, 'steps': 42116, 'loss/train': 1.9059182405471802} +03/05/2022 14:58:54 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/05/2022 14:58:59 - INFO - codeparrot_training - Step 42117: {'lr': 0.00041470208769893137, 'samples': 21564416, 'steps': 42117, 'loss/train': 1.6902800798416138} +03/05/2022 14:59:02 - INFO - codeparrot_training - Step 42118: {'lr': 0.0004146980953406799, 'samples': 21564928, 'steps': 42118, 'loss/train': 1.4078137874603271} +03/05/2022 14:59:07 - INFO - codeparrot_training - Step 42119: {'lr': 0.000414694102908218, 'samples': 21565440, 'steps': 42119, 'loss/train': 1.4370009899139404} +03/05/2022 14:59:10 - INFO - codeparrot_training - Step 42120: {'lr': 0.0004146901104015474, 'samples': 21565952, 'steps': 42120, 'loss/train': 1.9531177282333374} +03/05/2022 14:59:11 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/05/2022 14:59:16 - INFO - codeparrot_training - Step 42121: {'lr': 0.00041468611782067, 'samples': 21566464, 'steps': 42121, 'loss/train': 1.8331111669540405} +03/05/2022 14:59:19 - INFO - codeparrot_training - Step 42122: {'lr': 0.0004146821251655877, 'samples': 21566976, 'steps': 42122, 'loss/train': 1.7453663349151611} +03/05/2022 14:59:19 - INFO - codeparrot_training - Skipping example with length 191 (seq_length=1024) +03/05/2022 14:59:24 - INFO - codeparrot_training - Step 42123: {'lr': 0.000414678132436302, 'samples': 21567488, 'steps': 42123, 'loss/train': 1.587219476699829} +03/05/2022 14:59:27 - INFO - codeparrot_training - Step 42124: {'lr': 0.000414674139632815, 'samples': 21568000, 'steps': 42124, 'loss/train': 1.8458361625671387} +03/05/2022 14:59:28 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/05/2022 14:59:32 - INFO - codeparrot_training - Step 42125: {'lr': 0.0004146701467551283, 'samples': 21568512, 'steps': 42125, 'loss/train': 0.7398783564567566} +03/05/2022 14:59:36 - INFO - codeparrot_training - Step 42126: {'lr': 0.0004146661538032438, 'samples': 21569024, 'steps': 42126, 'loss/train': 1.2402812242507935} +03/05/2022 14:59:36 - INFO - codeparrot_training - Skipping example with length 996 (seq_length=1024) +03/05/2022 14:59:41 - INFO - codeparrot_training - Step 42127: {'lr': 0.0004146621607771633, 'samples': 21569536, 'steps': 42127, 'loss/train': 1.9953598976135254} +03/05/2022 14:59:44 - INFO - codeparrot_training - Step 42128: {'lr': 0.00041465816767688853, 'samples': 21570048, 'steps': 42128, 'loss/train': 2.205073356628418} +03/05/2022 14:59:44 - INFO - codeparrot_training - Skipping example with length 823 (seq_length=1024) +03/05/2022 14:59:49 - INFO - codeparrot_training - Step 42129: {'lr': 0.0004146541745024214, 'samples': 21570560, 'steps': 42129, 'loss/train': 1.6233755350112915} +03/05/2022 14:59:53 - INFO - codeparrot_training - Step 42130: {'lr': 0.00041465018125376354, 'samples': 21571072, 'steps': 42130, 'loss/train': 2.0043537616729736} +03/05/2022 14:59:53 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/05/2022 14:59:58 - INFO - codeparrot_training - Step 42131: {'lr': 0.0004146461879309169, 'samples': 21571584, 'steps': 42131, 'loss/train': 1.492004632949829} +03/05/2022 15:00:01 - INFO - codeparrot_training - Step 42132: {'lr': 0.0004146421945338832, 'samples': 21572096, 'steps': 42132, 'loss/train': 1.0547499656677246} +03/05/2022 15:00:02 - INFO - codeparrot_training - Skipping example with length 801 (seq_length=1024) +03/05/2022 15:00:07 - INFO - codeparrot_training - Step 42133: {'lr': 0.0004146382010626643, 'samples': 21572608, 'steps': 42133, 'loss/train': 1.4061479568481445} +03/05/2022 15:00:10 - INFO - codeparrot_training - Step 42134: {'lr': 0.000414634207517262, 'samples': 21573120, 'steps': 42134, 'loss/train': 1.1031289100646973} +03/05/2022 15:00:11 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/05/2022 15:00:15 - INFO - codeparrot_training - Step 42135: {'lr': 0.000414630213897678, 'samples': 21573632, 'steps': 42135, 'loss/train': 0.7073723077774048} +03/05/2022 15:00:18 - INFO - codeparrot_training - Step 42136: {'lr': 0.00041462622020391416, 'samples': 21574144, 'steps': 42136, 'loss/train': 1.6221882104873657} +03/05/2022 15:00:19 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/05/2022 15:00:24 - INFO - codeparrot_training - Step 42137: {'lr': 0.00041462222643597236, 'samples': 21574656, 'steps': 42137, 'loss/train': 1.655709981918335} +03/05/2022 15:00:27 - INFO - codeparrot_training - Step 42138: {'lr': 0.00041461823259385423, 'samples': 21575168, 'steps': 42138, 'loss/train': 2.076871395111084} +03/05/2022 15:00:28 - INFO - codeparrot_training - Skipping example with length 112 (seq_length=1024) +03/05/2022 15:00:32 - INFO - codeparrot_training - Step 42139: {'lr': 0.00041461423867756176, 'samples': 21575680, 'steps': 42139, 'loss/train': 2.403907299041748} +03/05/2022 15:00:35 - INFO - codeparrot_training - Step 42140: {'lr': 0.00041461024468709664, 'samples': 21576192, 'steps': 42140, 'loss/train': 1.0964268445968628} +03/05/2022 15:00:36 - INFO - codeparrot_training - Skipping example with length 624 (seq_length=1024) +03/05/2022 15:00:41 - INFO - codeparrot_training - Step 42141: {'lr': 0.0004146062506224606, 'samples': 21576704, 'steps': 42141, 'loss/train': 2.1374969482421875} +03/05/2022 15:00:44 - INFO - codeparrot_training - Step 42142: {'lr': 0.0004146022564836556, 'samples': 21577216, 'steps': 42142, 'loss/train': 2.103226661682129} +03/05/2022 15:00:45 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/05/2022 15:00:49 - INFO - codeparrot_training - Step 42143: {'lr': 0.0004145982622706833, 'samples': 21577728, 'steps': 42143, 'loss/train': 0.655133843421936} +03/05/2022 15:00:52 - INFO - codeparrot_training - Step 42144: {'lr': 0.00041459426798354563, 'samples': 21578240, 'steps': 42144, 'loss/train': 1.4235546588897705} +03/05/2022 15:00:53 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/05/2022 15:00:57 - INFO - codeparrot_training - Step 42145: {'lr': 0.00041459027362224433, 'samples': 21578752, 'steps': 42145, 'loss/train': 1.1091488599777222} +03/05/2022 15:01:01 - INFO - codeparrot_training - Step 42146: {'lr': 0.00041458627918678116, 'samples': 21579264, 'steps': 42146, 'loss/train': 1.28324556350708} +03/05/2022 15:01:02 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/05/2022 15:01:06 - INFO - codeparrot_training - Step 42147: {'lr': 0.00041458228467715786, 'samples': 21579776, 'steps': 42147, 'loss/train': 1.0475341081619263} +03/05/2022 15:01:09 - INFO - codeparrot_training - Step 42148: {'lr': 0.00041457829009337643, 'samples': 21580288, 'steps': 42148, 'loss/train': 1.8122377395629883} +03/05/2022 15:01:10 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/05/2022 15:01:14 - INFO - codeparrot_training - Step 42149: {'lr': 0.00041457429543543856, 'samples': 21580800, 'steps': 42149, 'loss/train': 0.7676851153373718} +03/05/2022 15:01:18 - INFO - codeparrot_training - Step 42150: {'lr': 0.0004145703007033461, 'samples': 21581312, 'steps': 42150, 'loss/train': 1.8544318675994873} +03/05/2022 15:01:19 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/05/2022 15:01:23 - INFO - codeparrot_training - Step 42151: {'lr': 0.00041456630589710073, 'samples': 21581824, 'steps': 42151, 'loss/train': 2.0541646480560303} +03/05/2022 15:01:26 - INFO - codeparrot_training - Step 42152: {'lr': 0.0004145623110167043, 'samples': 21582336, 'steps': 42152, 'loss/train': 0.6513358354568481} +03/05/2022 15:01:28 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/05/2022 15:01:31 - INFO - codeparrot_training - Step 42153: {'lr': 0.00041455831606215863, 'samples': 21582848, 'steps': 42153, 'loss/train': 1.680469274520874} +03/05/2022 15:01:35 - INFO - codeparrot_training - Step 42154: {'lr': 0.0004145543210334656, 'samples': 21583360, 'steps': 42154, 'loss/train': 1.857198715209961} +03/05/2022 15:01:36 - INFO - codeparrot_training - Skipping example with length 434 (seq_length=1024) +03/05/2022 15:01:40 - INFO - codeparrot_training - Step 42155: {'lr': 0.00041455032593062685, 'samples': 21583872, 'steps': 42155, 'loss/train': 2.1395299434661865} +03/05/2022 15:01:43 - INFO - codeparrot_training - Step 42156: {'lr': 0.00041454633075364427, 'samples': 21584384, 'steps': 42156, 'loss/train': 1.8817479610443115} +03/05/2022 15:01:44 - INFO - codeparrot_training - Skipping example with length 55 (seq_length=1024) +03/05/2022 15:01:48 - INFO - codeparrot_training - Step 42157: {'lr': 0.00041454233550251976, 'samples': 21584896, 'steps': 42157, 'loss/train': 1.0926960706710815} +03/05/2022 15:01:51 - INFO - codeparrot_training - Step 42158: {'lr': 0.0004145383401772549, 'samples': 21585408, 'steps': 42158, 'loss/train': 1.6872514486312866} +03/05/2022 15:01:52 - INFO - codeparrot_training - Skipping example with length 707 (seq_length=1024) +03/05/2022 15:01:57 - INFO - codeparrot_training - Step 42159: {'lr': 0.00041453434477785165, 'samples': 21585920, 'steps': 42159, 'loss/train': 1.102889895439148} +03/05/2022 15:02:00 - INFO - codeparrot_training - Step 42160: {'lr': 0.0004145303493043118, 'samples': 21586432, 'steps': 42160, 'loss/train': 1.8617645502090454} +03/05/2022 15:02:01 - INFO - codeparrot_training - Skipping example with length 708 (seq_length=1024) +03/05/2022 15:02:05 - INFO - codeparrot_training - Step 42161: {'lr': 0.000414526353756637, 'samples': 21586944, 'steps': 42161, 'loss/train': 2.0461184978485107} +03/05/2022 15:02:08 - INFO - codeparrot_training - Step 42162: {'lr': 0.0004145223581348292, 'samples': 21587456, 'steps': 42162, 'loss/train': 0.9375805258750916} +03/05/2022 15:02:09 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/05/2022 15:02:13 - INFO - codeparrot_training - Step 42163: {'lr': 0.00041451836243889027, 'samples': 21587968, 'steps': 42163, 'loss/train': 1.5858362913131714} +03/05/2022 15:02:17 - INFO - codeparrot_training - Step 42164: {'lr': 0.0004145143666688218, 'samples': 21588480, 'steps': 42164, 'loss/train': 1.484890341758728} +03/05/2022 15:02:17 - INFO - codeparrot_training - Skipping example with length 345 (seq_length=1024) +03/05/2022 15:02:22 - INFO - codeparrot_training - Step 42165: {'lr': 0.0004145103708246257, 'samples': 21588992, 'steps': 42165, 'loss/train': 2.3167335987091064} +03/05/2022 15:02:25 - INFO - codeparrot_training - Step 42166: {'lr': 0.0004145063749063038, 'samples': 21589504, 'steps': 42166, 'loss/train': 1.8873045444488525} +03/05/2022 15:02:26 - INFO - codeparrot_training - Skipping example with length 849 (seq_length=1024) +03/05/2022 15:02:31 - INFO - codeparrot_training - Step 42167: {'lr': 0.00041450237891385783, 'samples': 21590016, 'steps': 42167, 'loss/train': 2.0730371475219727} +03/05/2022 15:02:34 - INFO - codeparrot_training - Step 42168: {'lr': 0.00041449838284728964, 'samples': 21590528, 'steps': 42168, 'loss/train': 2.154503345489502} +03/05/2022 15:02:35 - INFO - codeparrot_training - Skipping example with length 366 (seq_length=1024) +03/05/2022 15:02:39 - INFO - codeparrot_training - Step 42169: {'lr': 0.000414494386706601, 'samples': 21591040, 'steps': 42169, 'loss/train': 1.163810133934021} +03/05/2022 15:02:42 - INFO - codeparrot_training - Step 42170: {'lr': 0.00041449039049179385, 'samples': 21591552, 'steps': 42170, 'loss/train': 2.304111957550049} +03/05/2022 15:02:44 - INFO - codeparrot_training - Skipping example with length 626 (seq_length=1024) +03/05/2022 15:02:47 - INFO - codeparrot_training - Step 42171: {'lr': 0.0004144863942028697, 'samples': 21592064, 'steps': 42171, 'loss/train': 1.1101733446121216} +03/05/2022 15:02:51 - INFO - codeparrot_training - Step 42172: {'lr': 0.0004144823978398306, 'samples': 21592576, 'steps': 42172, 'loss/train': 1.6818877458572388} +03/05/2022 15:02:52 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/05/2022 15:02:56 - INFO - codeparrot_training - Step 42173: {'lr': 0.0004144784014026782, 'samples': 21593088, 'steps': 42173, 'loss/train': 1.3978164196014404} +03/05/2022 15:02:59 - INFO - codeparrot_training - Step 42174: {'lr': 0.0004144744048914145, 'samples': 21593600, 'steps': 42174, 'loss/train': 1.677324891090393} +03/05/2022 15:03:00 - INFO - codeparrot_training - Skipping example with length 824 (seq_length=1024) +03/05/2022 15:03:04 - INFO - codeparrot_training - Step 42175: {'lr': 0.0004144704083060411, 'samples': 21594112, 'steps': 42175, 'loss/train': 2.514826536178589} +03/05/2022 15:03:08 - INFO - codeparrot_training - Step 42176: {'lr': 0.00041446641164655983, 'samples': 21594624, 'steps': 42176, 'loss/train': 1.5027203559875488} +03/05/2022 15:03:08 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/05/2022 15:03:13 - INFO - codeparrot_training - Step 42177: {'lr': 0.0004144624149129727, 'samples': 21595136, 'steps': 42177, 'loss/train': 1.678390622138977} +03/05/2022 15:03:16 - INFO - codeparrot_training - Step 42178: {'lr': 0.00041445841810528117, 'samples': 21595648, 'steps': 42178, 'loss/train': 1.3233743906021118} +03/05/2022 15:03:17 - INFO - codeparrot_training - Skipping example with length 384 (seq_length=1024) +03/05/2022 15:03:21 - INFO - codeparrot_training - Step 42179: {'lr': 0.00041445442122348727, 'samples': 21596160, 'steps': 42179, 'loss/train': 1.9203853607177734} +03/05/2022 15:03:24 - INFO - codeparrot_training - Step 42180: {'lr': 0.0004144504242675927, 'samples': 21596672, 'steps': 42180, 'loss/train': 1.6320512294769287} +03/05/2022 15:03:25 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/05/2022 15:03:30 - INFO - codeparrot_training - Step 42181: {'lr': 0.0004144464272375994, 'samples': 21597184, 'steps': 42181, 'loss/train': 2.415912389755249} +03/05/2022 15:03:33 - INFO - codeparrot_training - Step 42182: {'lr': 0.000414442430133509, 'samples': 21597696, 'steps': 42182, 'loss/train': 0.9352185130119324} +03/05/2022 15:03:34 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/05/2022 15:03:38 - INFO - codeparrot_training - Step 42183: {'lr': 0.00041443843295532333, 'samples': 21598208, 'steps': 42183, 'loss/train': 1.8543468713760376} +03/05/2022 15:03:42 - INFO - codeparrot_training - Step 42184: {'lr': 0.0004144344357030444, 'samples': 21598720, 'steps': 42184, 'loss/train': 1.773468255996704} +03/05/2022 15:03:47 - INFO - codeparrot_training - Step 42185: {'lr': 0.0004144304383766737, 'samples': 21599232, 'steps': 42185, 'loss/train': 1.5729092359542847} +03/05/2022 15:03:50 - INFO - codeparrot_training - Step 42186: {'lr': 0.0004144264409762133, 'samples': 21599744, 'steps': 42186, 'loss/train': 1.443477988243103} +03/05/2022 15:03:52 - INFO - codeparrot_training - Skipping example with length 611 (seq_length=1024) +03/05/2022 15:03:55 - INFO - codeparrot_training - Step 42187: {'lr': 0.0004144224435016648, 'samples': 21600256, 'steps': 42187, 'loss/train': 2.096717596054077} +03/05/2022 15:03:59 - INFO - codeparrot_training - Step 42188: {'lr': 0.00041441844595303015, 'samples': 21600768, 'steps': 42188, 'loss/train': 0.817893922328949} +03/05/2022 15:04:01 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/05/2022 15:04:04 - INFO - codeparrot_training - Step 42189: {'lr': 0.0004144144483303111, 'samples': 21601280, 'steps': 42189, 'loss/train': 1.8398594856262207} +03/05/2022 15:04:07 - INFO - codeparrot_training - Step 42190: {'lr': 0.00041441045063350933, 'samples': 21601792, 'steps': 42190, 'loss/train': 1.8738045692443848} +03/05/2022 15:04:10 - INFO - codeparrot_training - Skipping example with length 701 (seq_length=1024) +03/05/2022 15:04:12 - INFO - codeparrot_training - Step 42191: {'lr': 0.00041440645286262677, 'samples': 21602304, 'steps': 42191, 'loss/train': 1.6132060289382935} +03/05/2022 15:04:16 - INFO - codeparrot_training - Step 42192: {'lr': 0.0004144024550176653, 'samples': 21602816, 'steps': 42192, 'loss/train': 2.137815475463867} +03/05/2022 15:04:19 - INFO - codeparrot_training - Step 42193: {'lr': 0.0004143984570986265, 'samples': 21603328, 'steps': 42193, 'loss/train': 2.0393776893615723} +03/05/2022 15:04:19 - INFO - codeparrot_training - Skipping example with length 666 (seq_length=1024) +03/05/2022 15:04:24 - INFO - codeparrot_training - Step 42194: {'lr': 0.00041439445910551235, 'samples': 21603840, 'steps': 42194, 'loss/train': 2.115001916885376} +03/05/2022 15:04:28 - INFO - codeparrot_training - Step 42195: {'lr': 0.00041439046103832454, 'samples': 21604352, 'steps': 42195, 'loss/train': 0.20712223649024963} +03/05/2022 15:04:28 - INFO - codeparrot_training - Skipping example with length 1018 (seq_length=1024) +03/05/2022 15:04:33 - INFO - codeparrot_training - Step 42196: {'lr': 0.000414386462897065, 'samples': 21604864, 'steps': 42196, 'loss/train': 1.6035618782043457} +03/05/2022 15:04:36 - INFO - codeparrot_training - Step 42197: {'lr': 0.00041438246468173545, 'samples': 21605376, 'steps': 42197, 'loss/train': 1.017578125} +03/05/2022 15:04:36 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/05/2022 15:04:41 - INFO - codeparrot_training - Step 42198: {'lr': 0.0004143784663923377, 'samples': 21605888, 'steps': 42198, 'loss/train': 0.3560017943382263} +03/05/2022 15:04:44 - INFO - codeparrot_training - Step 42199: {'lr': 0.00041437446802887354, 'samples': 21606400, 'steps': 42199, 'loss/train': 1.8241307735443115} +03/05/2022 15:04:45 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/05/2022 15:04:50 - INFO - codeparrot_training - Step 42200: {'lr': 0.0004143704695913447, 'samples': 21606912, 'steps': 42200, 'loss/train': 1.0067501068115234} +03/05/2022 15:04:53 - INFO - codeparrot_training - Step 42201: {'lr': 0.0004143664710797531, 'samples': 21607424, 'steps': 42201, 'loss/train': 2.0237298011779785} +03/05/2022 15:04:53 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/05/2022 15:04:58 - INFO - codeparrot_training - Step 42202: {'lr': 0.0004143624724941006, 'samples': 21607936, 'steps': 42202, 'loss/train': 1.914589524269104} +03/05/2022 15:05:01 - INFO - codeparrot_training - Skipping example with length 51 (seq_length=1024) +03/05/2022 15:05:04 - INFO - codeparrot_training - Step 42203: {'lr': 0.00041435847383438886, 'samples': 21608448, 'steps': 42203, 'loss/train': 1.348642110824585} +03/05/2022 15:05:07 - INFO - codeparrot_training - Step 42204: {'lr': 0.0004143544751006197, 'samples': 21608960, 'steps': 42204, 'loss/train': 1.5692260265350342} +03/05/2022 15:05:10 - INFO - codeparrot_training - Skipping example with length 116 (seq_length=1024) +03/05/2022 15:05:12 - INFO - codeparrot_training - Step 42205: {'lr': 0.000414350476292795, 'samples': 21609472, 'steps': 42205, 'loss/train': 2.01166033744812} +03/05/2022 15:05:15 - INFO - codeparrot_training - Step 42206: {'lr': 0.0004143464774109164, 'samples': 21609984, 'steps': 42206, 'loss/train': 1.4153409004211426} +03/05/2022 15:05:21 - INFO - codeparrot_training - Step 42207: {'lr': 0.0004143424784549859, 'samples': 21610496, 'steps': 42207, 'loss/train': 0.9907054901123047} +03/05/2022 15:05:24 - INFO - codeparrot_training - Step 42208: {'lr': 0.00041433847942500516, 'samples': 21611008, 'steps': 42208, 'loss/train': 1.9480186700820923} +03/05/2022 15:05:27 - INFO - codeparrot_training - Step 42209: {'lr': 0.0004143344803209761, 'samples': 21611520, 'steps': 42209, 'loss/train': 0.7840176820755005} +03/05/2022 15:05:28 - INFO - codeparrot_training - Skipping example with length 104 (seq_length=1024) +03/05/2022 15:05:32 - INFO - codeparrot_training - Step 42210: {'lr': 0.0004143304811429005, 'samples': 21612032, 'steps': 42210, 'loss/train': 1.0992892980575562} +03/05/2022 15:05:36 - INFO - codeparrot_training - Step 42211: {'lr': 0.00041432648189078006, 'samples': 21612544, 'steps': 42211, 'loss/train': 2.0092458724975586} +03/05/2022 15:05:37 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/05/2022 15:05:41 - INFO - codeparrot_training - Step 42212: {'lr': 0.0004143224825646166, 'samples': 21613056, 'steps': 42212, 'loss/train': 2.1416375637054443} +03/05/2022 15:05:44 - INFO - codeparrot_training - Step 42213: {'lr': 0.000414318483164412, 'samples': 21613568, 'steps': 42213, 'loss/train': 2.076869010925293} +03/05/2022 15:05:45 - INFO - codeparrot_training - Skipping example with length 90 (seq_length=1024) +03/05/2022 15:05:49 - INFO - codeparrot_training - Step 42214: {'lr': 0.000414314483690168, 'samples': 21614080, 'steps': 42214, 'loss/train': 1.0867937803268433} +03/05/2022 15:05:53 - INFO - codeparrot_training - Step 42215: {'lr': 0.00041431048414188645, 'samples': 21614592, 'steps': 42215, 'loss/train': 1.8841760158538818} +03/05/2022 15:05:54 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/05/2022 15:05:58 - INFO - codeparrot_training - Step 42216: {'lr': 0.00041430648451956913, 'samples': 21615104, 'steps': 42216, 'loss/train': 2.4689812660217285} +03/05/2022 15:06:01 - INFO - codeparrot_training - Step 42217: {'lr': 0.00041430248482321794, 'samples': 21615616, 'steps': 42217, 'loss/train': 2.2619245052337646} +03/05/2022 15:06:03 - INFO - codeparrot_training - Skipping example with length 659 (seq_length=1024) +03/05/2022 15:06:06 - INFO - codeparrot_training - Step 42218: {'lr': 0.00041429848505283444, 'samples': 21616128, 'steps': 42218, 'loss/train': 2.1733052730560303} +03/05/2022 15:06:10 - INFO - codeparrot_training - Step 42219: {'lr': 0.00041429448520842064, 'samples': 21616640, 'steps': 42219, 'loss/train': 2.3186469078063965} +03/05/2022 15:06:12 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/05/2022 15:06:15 - INFO - codeparrot_training - Step 42220: {'lr': 0.0004142904852899783, 'samples': 21617152, 'steps': 42220, 'loss/train': 1.6358833312988281} +03/05/2022 15:06:18 - INFO - codeparrot_training - Step 42221: {'lr': 0.0004142864852975092, 'samples': 21617664, 'steps': 42221, 'loss/train': 1.7900112867355347} +03/05/2022 15:06:20 - INFO - codeparrot_training - Skipping example with length 804 (seq_length=1024) +03/05/2022 15:06:23 - INFO - codeparrot_training - Step 42222: {'lr': 0.00041428248523101507, 'samples': 21618176, 'steps': 42222, 'loss/train': 1.5021916627883911} +03/05/2022 15:06:26 - INFO - codeparrot_training - Step 42223: {'lr': 0.0004142784850904978, 'samples': 21618688, 'steps': 42223, 'loss/train': 1.9106868505477905} +03/05/2022 15:06:28 - INFO - codeparrot_training - Skipping example with length 369 (seq_length=1024) +03/05/2022 15:06:32 - INFO - codeparrot_training - Step 42224: {'lr': 0.00041427448487595933, 'samples': 21619200, 'steps': 42224, 'loss/train': 2.2315149307250977} +03/05/2022 15:06:35 - INFO - codeparrot_training - Step 42225: {'lr': 0.0004142704845874012, 'samples': 21619712, 'steps': 42225, 'loss/train': 1.7947694063186646} +03/05/2022 15:06:36 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/05/2022 15:06:40 - INFO - codeparrot_training - Step 42226: {'lr': 0.00041426648422482527, 'samples': 21620224, 'steps': 42226, 'loss/train': 1.484729290008545} +03/05/2022 15:06:43 - INFO - codeparrot_training - Step 42227: {'lr': 0.0004142624837882335, 'samples': 21620736, 'steps': 42227, 'loss/train': 2.035325765609741} +03/05/2022 15:06:45 - INFO - codeparrot_training - Skipping example with length 682 (seq_length=1024) +03/05/2022 15:06:49 - INFO - codeparrot_training - Step 42228: {'lr': 0.0004142584832776275, 'samples': 21621248, 'steps': 42228, 'loss/train': 1.801566481590271} +03/05/2022 15:06:52 - INFO - codeparrot_training - Step 42229: {'lr': 0.00041425448269300923, 'samples': 21621760, 'steps': 42229, 'loss/train': 1.6826670169830322} +03/05/2022 15:06:53 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/05/2022 15:06:57 - INFO - codeparrot_training - Step 42230: {'lr': 0.00041425048203438036, 'samples': 21622272, 'steps': 42230, 'loss/train': 2.2317399978637695} +03/05/2022 15:07:00 - INFO - codeparrot_training - Step 42231: {'lr': 0.0004142464813017429, 'samples': 21622784, 'steps': 42231, 'loss/train': 1.0485529899597168} +03/05/2022 15:07:01 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/05/2022 15:07:05 - INFO - codeparrot_training - Step 42232: {'lr': 0.0004142424804950984, 'samples': 21623296, 'steps': 42232, 'loss/train': 1.8699569702148438} +03/05/2022 15:07:09 - INFO - codeparrot_training - Step 42233: {'lr': 0.00041423847961444873, 'samples': 21623808, 'steps': 42233, 'loss/train': 1.1809583902359009} +03/05/2022 15:07:10 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/05/2022 15:07:14 - INFO - codeparrot_training - Step 42234: {'lr': 0.0004142344786597958, 'samples': 21624320, 'steps': 42234, 'loss/train': 2.0509021282196045} +03/05/2022 15:07:17 - INFO - codeparrot_training - Step 42235: {'lr': 0.0004142304776311413, 'samples': 21624832, 'steps': 42235, 'loss/train': 1.9713292121887207} +03/05/2022 15:07:19 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/05/2022 15:07:22 - INFO - codeparrot_training - Step 42236: {'lr': 0.0004142264765284871, 'samples': 21625344, 'steps': 42236, 'loss/train': 1.4943140745162964} +03/05/2022 15:07:26 - INFO - codeparrot_training - Step 42237: {'lr': 0.0004142224753518351, 'samples': 21625856, 'steps': 42237, 'loss/train': 2.1657516956329346} +03/05/2022 15:07:27 - INFO - codeparrot_training - Skipping example with length 964 (seq_length=1024) +03/05/2022 15:07:31 - INFO - codeparrot_training - Step 42238: {'lr': 0.00041421847410118685, 'samples': 21626368, 'steps': 42238, 'loss/train': 0.10753724724054337} +03/05/2022 15:07:34 - INFO - codeparrot_training - Step 42239: {'lr': 0.00041421447277654436, 'samples': 21626880, 'steps': 42239, 'loss/train': 1.2892000675201416} +03/05/2022 15:07:36 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/05/2022 15:07:39 - INFO - codeparrot_training - Step 42240: {'lr': 0.0004142104713779093, 'samples': 21627392, 'steps': 42240, 'loss/train': 1.4510924816131592} +03/05/2022 15:07:43 - INFO - codeparrot_training - Step 42241: {'lr': 0.00041420646990528355, 'samples': 21627904, 'steps': 42241, 'loss/train': 2.0203566551208496} +03/05/2022 15:07:44 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/05/2022 15:07:48 - INFO - codeparrot_training - Step 42242: {'lr': 0.0004142024683586689, 'samples': 21628416, 'steps': 42242, 'loss/train': 1.955403447151184} +03/05/2022 15:07:51 - INFO - codeparrot_training - Step 42243: {'lr': 0.00041419846673806715, 'samples': 21628928, 'steps': 42243, 'loss/train': 2.081850528717041} +03/05/2022 15:07:52 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/05/2022 15:07:56 - INFO - codeparrot_training - Step 42244: {'lr': 0.0004141944650434801, 'samples': 21629440, 'steps': 42244, 'loss/train': 1.644861102104187} +03/05/2022 15:07:59 - INFO - codeparrot_training - Step 42245: {'lr': 0.00041419046327490964, 'samples': 21629952, 'steps': 42245, 'loss/train': 1.9781160354614258} +03/05/2022 15:08:01 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/05/2022 15:08:05 - INFO - codeparrot_training - Step 42246: {'lr': 0.00041418646143235737, 'samples': 21630464, 'steps': 42246, 'loss/train': 1.7483758926391602} +03/05/2022 15:08:08 - INFO - codeparrot_training - Step 42247: {'lr': 0.0004141824595158253, 'samples': 21630976, 'steps': 42247, 'loss/train': 1.000533938407898} +03/05/2022 15:08:09 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/05/2022 15:08:13 - INFO - codeparrot_training - Step 42248: {'lr': 0.0004141784575253151, 'samples': 21631488, 'steps': 42248, 'loss/train': 2.204744815826416} +03/05/2022 15:08:16 - INFO - codeparrot_training - Step 42249: {'lr': 0.0004141744554608287, 'samples': 21632000, 'steps': 42249, 'loss/train': 1.0809922218322754} +03/05/2022 15:08:17 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/05/2022 15:08:22 - INFO - codeparrot_training - Step 42250: {'lr': 0.00041417045332236776, 'samples': 21632512, 'steps': 42250, 'loss/train': 2.0767133235931396} +03/05/2022 15:08:25 - INFO - codeparrot_training - Step 42251: {'lr': 0.0004141664511099341, 'samples': 21633024, 'steps': 42251, 'loss/train': 1.5652543306350708} +03/05/2022 15:08:26 - INFO - codeparrot_training - Skipping example with length 35 (seq_length=1024) +03/05/2022 15:08:30 - INFO - codeparrot_training - Step 42252: {'lr': 0.00041416244882352965, 'samples': 21633536, 'steps': 42252, 'loss/train': 1.1194431781768799} +03/05/2022 15:08:33 - INFO - codeparrot_training - Step 42253: {'lr': 0.00041415844646315613, 'samples': 21634048, 'steps': 42253, 'loss/train': 2.1713337898254395} +03/05/2022 15:08:34 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/05/2022 15:08:38 - INFO - codeparrot_training - Step 42254: {'lr': 0.0004141544440288153, 'samples': 21634560, 'steps': 42254, 'loss/train': 0.7658924460411072} +03/05/2022 15:08:42 - INFO - codeparrot_training - Step 42255: {'lr': 0.0004141504415205091, 'samples': 21635072, 'steps': 42255, 'loss/train': 1.9556533098220825} +03/05/2022 15:08:42 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/05/2022 15:08:47 - INFO - codeparrot_training - Step 42256: {'lr': 0.0004141464389382391, 'samples': 21635584, 'steps': 42256, 'loss/train': 6.466179847717285} +03/05/2022 15:08:50 - INFO - codeparrot_training - Step 42257: {'lr': 0.0004141424362820073, 'samples': 21636096, 'steps': 42257, 'loss/train': 1.5943946838378906} +03/05/2022 15:08:52 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/05/2022 15:08:55 - INFO - codeparrot_training - Step 42258: {'lr': 0.0004141384335518155, 'samples': 21636608, 'steps': 42258, 'loss/train': 1.768483281135559} +03/05/2022 15:08:58 - INFO - codeparrot_training - Step 42259: {'lr': 0.00041413443074766543, 'samples': 21637120, 'steps': 42259, 'loss/train': 1.9302858114242554} +03/05/2022 15:09:00 - INFO - codeparrot_training - Skipping example with length 274 (seq_length=1024) +03/05/2022 15:09:04 - INFO - codeparrot_training - Step 42260: {'lr': 0.000414130427869559, 'samples': 21637632, 'steps': 42260, 'loss/train': 1.9095699787139893} +03/05/2022 15:09:07 - INFO - codeparrot_training - Step 42261: {'lr': 0.0004141264249174978, 'samples': 21638144, 'steps': 42261, 'loss/train': 2.270962953567505} +03/05/2022 15:09:08 - INFO - codeparrot_training - Skipping example with length 607 (seq_length=1024) +03/05/2022 15:09:12 - INFO - codeparrot_training - Step 42262: {'lr': 0.00041412242189148383, 'samples': 21638656, 'steps': 42262, 'loss/train': 1.8687002658843994} +03/05/2022 15:09:16 - INFO - codeparrot_training - Step 42263: {'lr': 0.00041411841879151877, 'samples': 21639168, 'steps': 42263, 'loss/train': 3.505892038345337} +03/05/2022 15:09:17 - INFO - codeparrot_training - Skipping example with length 583 (seq_length=1024) +03/05/2022 15:09:21 - INFO - codeparrot_training - Step 42264: {'lr': 0.00041411441561760455, 'samples': 21639680, 'steps': 42264, 'loss/train': 2.1697139739990234} +03/05/2022 15:09:24 - INFO - codeparrot_training - Step 42265: {'lr': 0.0004141104123697429, 'samples': 21640192, 'steps': 42265, 'loss/train': 2.6605231761932373} +03/05/2022 15:09:25 - INFO - codeparrot_training - Skipping example with length 965 (seq_length=1024) +03/05/2022 15:09:29 - INFO - codeparrot_training - Step 42266: {'lr': 0.00041410640904793563, 'samples': 21640704, 'steps': 42266, 'loss/train': 2.050741195678711} +03/05/2022 15:09:32 - INFO - codeparrot_training - Step 42267: {'lr': 0.0004141024056521845, 'samples': 21641216, 'steps': 42267, 'loss/train': 1.5571380853652954} +03/05/2022 15:09:33 - INFO - codeparrot_training - Skipping example with length 967 (seq_length=1024) +03/05/2022 15:09:38 - INFO - codeparrot_training - Step 42268: {'lr': 0.0004140984021824914, 'samples': 21641728, 'steps': 42268, 'loss/train': 1.9108104705810547} +03/05/2022 15:09:41 - INFO - codeparrot_training - Step 42269: {'lr': 0.0004140943986388581, 'samples': 21642240, 'steps': 42269, 'loss/train': 1.3083782196044922} +03/05/2022 15:09:44 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/05/2022 15:09:46 - INFO - codeparrot_training - Step 42270: {'lr': 0.00041409039502128634, 'samples': 21642752, 'steps': 42270, 'loss/train': 1.51084303855896} +03/05/2022 15:09:50 - INFO - codeparrot_training - Step 42271: {'lr': 0.000414086391329778, 'samples': 21643264, 'steps': 42271, 'loss/train': 1.693543553352356} +03/05/2022 15:09:52 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/05/2022 15:09:55 - INFO - codeparrot_training - Step 42272: {'lr': 0.0004140823875643349, 'samples': 21643776, 'steps': 42272, 'loss/train': 0.8703694939613342} +03/05/2022 15:09:58 - INFO - codeparrot_training - Step 42273: {'lr': 0.00041407838372495883, 'samples': 21644288, 'steps': 42273, 'loss/train': 1.7392849922180176} +03/05/2022 15:10:00 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/05/2022 15:10:03 - INFO - codeparrot_training - Step 42274: {'lr': 0.00041407437981165154, 'samples': 21644800, 'steps': 42274, 'loss/train': 1.8765876293182373} +03/05/2022 15:10:07 - INFO - codeparrot_training - Step 42275: {'lr': 0.0004140703758244148, 'samples': 21645312, 'steps': 42275, 'loss/train': 1.4659923315048218} +03/05/2022 15:10:09 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/05/2022 15:10:12 - INFO - codeparrot_training - Step 42276: {'lr': 0.00041406637176325054, 'samples': 21645824, 'steps': 42276, 'loss/train': 1.9681718349456787} +03/05/2022 15:10:15 - INFO - codeparrot_training - Step 42277: {'lr': 0.00041406236762816053, 'samples': 21646336, 'steps': 42277, 'loss/train': 0.23262548446655273} +03/05/2022 15:10:18 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/05/2022 15:10:20 - INFO - codeparrot_training - Step 42278: {'lr': 0.0004140583634191465, 'samples': 21646848, 'steps': 42278, 'loss/train': 1.8907297849655151} +03/05/2022 15:10:24 - INFO - codeparrot_training - Step 42279: {'lr': 0.00041405435913621037, 'samples': 21647360, 'steps': 42279, 'loss/train': 2.085663080215454} +03/05/2022 15:10:26 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/05/2022 15:10:29 - INFO - codeparrot_training - Step 42280: {'lr': 0.0004140503547793538, 'samples': 21647872, 'steps': 42280, 'loss/train': 1.946205496788025} +03/05/2022 15:10:32 - INFO - codeparrot_training - Step 42281: {'lr': 0.00041404635034857876, 'samples': 21648384, 'steps': 42281, 'loss/train': 1.590935230255127} +03/05/2022 15:10:35 - INFO - codeparrot_training - Step 42282: {'lr': 0.00041404234584388683, 'samples': 21648896, 'steps': 42282, 'loss/train': 1.3725224733352661} +03/05/2022 15:10:35 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/05/2022 15:10:41 - INFO - codeparrot_training - Step 42283: {'lr': 0.00041403834126528007, 'samples': 21649408, 'steps': 42283, 'loss/train': 2.4166133403778076} +03/05/2022 15:10:44 - INFO - codeparrot_training - Step 42284: {'lr': 0.00041403433661276015, 'samples': 21649920, 'steps': 42284, 'loss/train': 2.694035768508911} +03/05/2022 15:10:44 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/05/2022 15:10:49 - INFO - codeparrot_training - Step 42285: {'lr': 0.0004140303318863288, 'samples': 21650432, 'steps': 42285, 'loss/train': 1.734609842300415} +03/05/2022 15:10:52 - INFO - codeparrot_training - Step 42286: {'lr': 0.00041402632708598797, 'samples': 21650944, 'steps': 42286, 'loss/train': 2.030993938446045} +03/05/2022 15:10:53 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/05/2022 15:10:58 - INFO - codeparrot_training - Step 42287: {'lr': 0.0004140223222117394, 'samples': 21651456, 'steps': 42287, 'loss/train': 1.8440669775009155} +03/05/2022 15:11:01 - INFO - codeparrot_training - Step 42288: {'lr': 0.00041401831726358497, 'samples': 21651968, 'steps': 42288, 'loss/train': 1.7034146785736084} +03/05/2022 15:11:01 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/05/2022 15:11:06 - INFO - codeparrot_training - Step 42289: {'lr': 0.0004140143122415263, 'samples': 21652480, 'steps': 42289, 'loss/train': 1.4834851026535034} +03/05/2022 15:11:09 - INFO - codeparrot_training - Step 42290: {'lr': 0.0004140103071455654, 'samples': 21652992, 'steps': 42290, 'loss/train': 1.0906099081039429} +03/05/2022 15:11:09 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/05/2022 15:11:14 - INFO - codeparrot_training - Step 42291: {'lr': 0.000414006301975704, 'samples': 21653504, 'steps': 42291, 'loss/train': 1.7261799573898315} +03/05/2022 15:11:18 - INFO - codeparrot_training - Step 42292: {'lr': 0.0004140022967319438, 'samples': 21654016, 'steps': 42292, 'loss/train': 1.9778822660446167} +03/05/2022 15:11:18 - INFO - codeparrot_training - Skipping example with length 926 (seq_length=1024) +03/05/2022 15:11:23 - INFO - codeparrot_training - Step 42293: {'lr': 0.0004139982914142868, 'samples': 21654528, 'steps': 42293, 'loss/train': 1.683379888534546} +03/05/2022 15:11:26 - INFO - codeparrot_training - Step 42294: {'lr': 0.0004139942860227346, 'samples': 21655040, 'steps': 42294, 'loss/train': 2.001723289489746} +03/05/2022 15:11:26 - INFO - codeparrot_training - Skipping example with length 612 (seq_length=1024) +03/05/2022 15:11:31 - INFO - codeparrot_training - Step 42295: {'lr': 0.00041399028055728914, 'samples': 21655552, 'steps': 42295, 'loss/train': 1.8406199216842651} +03/05/2022 15:11:34 - INFO - codeparrot_training - Step 42296: {'lr': 0.0004139862750179523, 'samples': 21656064, 'steps': 42296, 'loss/train': 1.232178807258606} +03/05/2022 15:11:35 - INFO - codeparrot_training - Skipping example with length 536 (seq_length=1024) +03/05/2022 15:11:40 - INFO - codeparrot_training - Step 42297: {'lr': 0.0004139822694047256, 'samples': 21656576, 'steps': 42297, 'loss/train': 1.1844849586486816} +03/05/2022 15:11:43 - INFO - codeparrot_training - Skipping example with length 926 (seq_length=1024) +03/05/2022 15:11:45 - INFO - codeparrot_training - Step 42298: {'lr': 0.0004139782637176112, 'samples': 21657088, 'steps': 42298, 'loss/train': 1.055175542831421} +03/05/2022 15:11:48 - INFO - codeparrot_training - Step 42299: {'lr': 0.0004139742579566106, 'samples': 21657600, 'steps': 42299, 'loss/train': 1.8662828207015991} +03/05/2022 15:11:51 - INFO - codeparrot_training - Skipping example with length 351 (seq_length=1024) +03/05/2022 15:11:53 - INFO - codeparrot_training - Step 42300: {'lr': 0.00041397025212172573, 'samples': 21658112, 'steps': 42300, 'loss/train': 1.5083459615707397} +03/05/2022 15:11:57 - INFO - codeparrot_training - Step 42301: {'lr': 0.00041396624621295843, 'samples': 21658624, 'steps': 42301, 'loss/train': 1.764953374862671} +03/05/2022 15:11:59 - INFO - codeparrot_training - Skipping example with length 383 (seq_length=1024) +03/05/2022 15:12:02 - INFO - codeparrot_training - Step 42302: {'lr': 0.00041396224023031045, 'samples': 21659136, 'steps': 42302, 'loss/train': 1.308441162109375} +03/05/2022 15:12:05 - INFO - codeparrot_training - Step 42303: {'lr': 0.0004139582341737836, 'samples': 21659648, 'steps': 42303, 'loss/train': 1.327883005142212} +03/05/2022 15:12:08 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/05/2022 15:12:10 - INFO - codeparrot_training - Step 42304: {'lr': 0.0004139542280433797, 'samples': 21660160, 'steps': 42304, 'loss/train': 1.9545114040374756} +03/05/2022 15:12:14 - INFO - codeparrot_training - Step 42305: {'lr': 0.00041395022183910064, 'samples': 21660672, 'steps': 42305, 'loss/train': 1.517080307006836} +03/05/2022 15:12:16 - INFO - codeparrot_training - Skipping example with length 921 (seq_length=1024) +03/05/2022 15:12:19 - INFO - codeparrot_training - Step 42306: {'lr': 0.00041394621556094805, 'samples': 21661184, 'steps': 42306, 'loss/train': 1.4589508771896362} +03/05/2022 15:12:22 - INFO - codeparrot_training - Step 42307: {'lr': 0.0004139422092089239, 'samples': 21661696, 'steps': 42307, 'loss/train': 1.1395530700683594} +03/05/2022 15:12:24 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/05/2022 15:12:28 - INFO - codeparrot_training - Step 42308: {'lr': 0.0004139382027830298, 'samples': 21662208, 'steps': 42308, 'loss/train': 1.7505488395690918} +03/05/2022 15:12:31 - INFO - codeparrot_training - Step 42309: {'lr': 0.00041393419628326777, 'samples': 21662720, 'steps': 42309, 'loss/train': 1.7447443008422852} +03/05/2022 15:12:34 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/05/2022 15:12:36 - INFO - codeparrot_training - Step 42310: {'lr': 0.00041393018970963945, 'samples': 21663232, 'steps': 42310, 'loss/train': 2.2256128787994385} +03/05/2022 15:12:39 - INFO - codeparrot_training - Step 42311: {'lr': 0.00041392618306214683, 'samples': 21663744, 'steps': 42311, 'loss/train': 1.460722804069519} +03/05/2022 15:12:42 - INFO - codeparrot_training - Skipping example with length 927 (seq_length=1024) +03/05/2022 15:12:44 - INFO - codeparrot_training - Step 42312: {'lr': 0.0004139221763407915, 'samples': 21664256, 'steps': 42312, 'loss/train': 2.418182373046875} +03/05/2022 15:12:48 - INFO - codeparrot_training - Step 42313: {'lr': 0.00041391816954557543, 'samples': 21664768, 'steps': 42313, 'loss/train': 1.9882206916809082} +03/05/2022 15:12:50 - INFO - codeparrot_training - Skipping example with length 523 (seq_length=1024) +03/05/2022 15:12:53 - INFO - codeparrot_training - Step 42314: {'lr': 0.00041391416267650034, 'samples': 21665280, 'steps': 42314, 'loss/train': 2.168405532836914} +03/05/2022 15:12:56 - INFO - codeparrot_training - Step 42315: {'lr': 0.00041391015573356805, 'samples': 21665792, 'steps': 42315, 'loss/train': 1.2848774194717407} +03/05/2022 15:12:59 - INFO - codeparrot_training - Skipping example with length 520 (seq_length=1024) +03/05/2022 15:13:01 - INFO - codeparrot_training - Step 42316: {'lr': 0.0004139061487167804, 'samples': 21666304, 'steps': 42316, 'loss/train': 1.385947346687317} +03/05/2022 15:13:05 - INFO - codeparrot_training - Step 42317: {'lr': 0.00041390214162613916, 'samples': 21666816, 'steps': 42317, 'loss/train': 1.472153663635254} +03/05/2022 15:13:07 - INFO - codeparrot_training - Skipping example with length 483 (seq_length=1024) +03/05/2022 15:13:10 - INFO - codeparrot_training - Step 42318: {'lr': 0.00041389813446164614, 'samples': 21667328, 'steps': 42318, 'loss/train': 1.05081307888031} +03/05/2022 15:13:13 - INFO - codeparrot_training - Step 42319: {'lr': 0.0004138941272233031, 'samples': 21667840, 'steps': 42319, 'loss/train': 1.188031554222107} +03/05/2022 15:13:16 - INFO - codeparrot_training - Skipping example with length 896 (seq_length=1024) +03/05/2022 15:13:18 - INFO - codeparrot_training - Step 42320: {'lr': 0.0004138901199111119, 'samples': 21668352, 'steps': 42320, 'loss/train': 1.2935422658920288} +03/05/2022 15:13:21 - INFO - codeparrot_training - Step 42321: {'lr': 0.00041388611252507446, 'samples': 21668864, 'steps': 42321, 'loss/train': 1.2160004377365112} +03/05/2022 15:13:24 - INFO - codeparrot_training - Skipping example with length 653 (seq_length=1024) +03/05/2022 15:13:27 - INFO - codeparrot_training - Step 42322: {'lr': 0.0004138821050651923, 'samples': 21669376, 'steps': 42322, 'loss/train': 0.896248459815979} +03/05/2022 15:13:30 - INFO - codeparrot_training - Step 42323: {'lr': 0.00041387809753146756, 'samples': 21669888, 'steps': 42323, 'loss/train': 1.356410026550293} +03/05/2022 15:13:33 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/05/2022 15:13:35 - INFO - codeparrot_training - Step 42324: {'lr': 0.00041387408992390177, 'samples': 21670400, 'steps': 42324, 'loss/train': 1.4991496801376343} +03/05/2022 15:13:39 - INFO - codeparrot_training - Step 42325: {'lr': 0.0004138700822424968, 'samples': 21670912, 'steps': 42325, 'loss/train': 0.8494857549667358} +03/05/2022 15:13:41 - INFO - codeparrot_training - Skipping example with length 334 (seq_length=1024) +03/05/2022 15:13:44 - INFO - codeparrot_training - Step 42326: {'lr': 0.0004138660744872547, 'samples': 21671424, 'steps': 42326, 'loss/train': 1.6943747997283936} +03/05/2022 15:13:47 - INFO - codeparrot_training - Step 42327: {'lr': 0.00041386206665817684, 'samples': 21671936, 'steps': 42327, 'loss/train': 2.171386241912842} +03/05/2022 15:13:50 - INFO - codeparrot_training - Step 42328: {'lr': 0.0004138580587552654, 'samples': 21672448, 'steps': 42328, 'loss/train': 1.4312162399291992} +03/05/2022 15:13:51 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/05/2022 15:13:56 - INFO - codeparrot_training - Step 42329: {'lr': 0.000413854050778522, 'samples': 21672960, 'steps': 42329, 'loss/train': 2.0114142894744873} +03/05/2022 15:13:59 - INFO - codeparrot_training - Step 42330: {'lr': 0.00041385004272794846, 'samples': 21673472, 'steps': 42330, 'loss/train': 0.3372471332550049} +03/05/2022 15:14:00 - INFO - codeparrot_training - Skipping example with length 1007 (seq_length=1024) +03/05/2022 15:14:04 - INFO - codeparrot_training - Step 42331: {'lr': 0.0004138460346035467, 'samples': 21673984, 'steps': 42331, 'loss/train': 2.801154136657715} +03/05/2022 15:14:08 - INFO - codeparrot_training - Step 42332: {'lr': 0.0004138420264053184, 'samples': 21674496, 'steps': 42332, 'loss/train': 2.1664011478424072} +03/05/2022 15:14:09 - INFO - codeparrot_training - Skipping example with length 788 (seq_length=1024) +03/05/2022 15:14:13 - INFO - codeparrot_training - Step 42333: {'lr': 0.00041383801813326543, 'samples': 21675008, 'steps': 42333, 'loss/train': 1.3563294410705566} +03/05/2022 15:14:16 - INFO - codeparrot_training - Step 42334: {'lr': 0.00041383400978738956, 'samples': 21675520, 'steps': 42334, 'loss/train': 1.7399111986160278} +03/05/2022 15:14:18 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/05/2022 15:14:21 - INFO - codeparrot_training - Step 42335: {'lr': 0.0004138300013676926, 'samples': 21676032, 'steps': 42335, 'loss/train': 0.2610914707183838} +03/05/2022 15:14:25 - INFO - codeparrot_training - Step 42336: {'lr': 0.0004138259928741764, 'samples': 21676544, 'steps': 42336, 'loss/train': 1.5745344161987305} +03/05/2022 15:14:26 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/05/2022 15:14:30 - INFO - codeparrot_training - Step 42337: {'lr': 0.0004138219843068427, 'samples': 21677056, 'steps': 42337, 'loss/train': 1.6311700344085693} +03/05/2022 15:14:33 - INFO - codeparrot_training - Step 42338: {'lr': 0.00041381797566569345, 'samples': 21677568, 'steps': 42338, 'loss/train': 2.036377191543579} +03/05/2022 15:14:35 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/05/2022 15:14:38 - INFO - codeparrot_training - Step 42339: {'lr': 0.0004138139669507303, 'samples': 21678080, 'steps': 42339, 'loss/train': 1.6639888286590576} +03/05/2022 15:14:42 - INFO - codeparrot_training - Step 42340: {'lr': 0.000413809958161955, 'samples': 21678592, 'steps': 42340, 'loss/train': 2.180830717086792} +03/05/2022 15:14:43 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/05/2022 15:14:47 - INFO - codeparrot_training - Step 42341: {'lr': 0.0004138059492993695, 'samples': 21679104, 'steps': 42341, 'loss/train': 1.6778161525726318} +03/05/2022 15:14:50 - INFO - codeparrot_training - Step 42342: {'lr': 0.0004138019403629756, 'samples': 21679616, 'steps': 42342, 'loss/train': 1.9946653842926025} +03/05/2022 15:14:51 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/05/2022 15:14:55 - INFO - codeparrot_training - Step 42343: {'lr': 0.0004137979313527751, 'samples': 21680128, 'steps': 42343, 'loss/train': 1.8503549098968506} +03/05/2022 15:14:58 - INFO - codeparrot_training - Step 42344: {'lr': 0.00041379392226876974, 'samples': 21680640, 'steps': 42344, 'loss/train': 2.1069529056549072} +03/05/2022 15:15:00 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/05/2022 15:15:04 - INFO - codeparrot_training - Step 42345: {'lr': 0.0004137899131109614, 'samples': 21681152, 'steps': 42345, 'loss/train': 2.392202615737915} +03/05/2022 15:15:07 - INFO - codeparrot_training - Step 42346: {'lr': 0.0004137859038793518, 'samples': 21681664, 'steps': 42346, 'loss/train': 2.257373094558716} +03/05/2022 15:15:08 - INFO - codeparrot_training - Skipping example with length 547 (seq_length=1024) +03/05/2022 15:15:12 - INFO - codeparrot_training - Step 42347: {'lr': 0.0004137818945739428, 'samples': 21682176, 'steps': 42347, 'loss/train': 0.6692388653755188} +03/05/2022 15:15:15 - INFO - codeparrot_training - Step 42348: {'lr': 0.00041377788519473624, 'samples': 21682688, 'steps': 42348, 'loss/train': 0.8978343605995178} +03/05/2022 15:15:17 - INFO - codeparrot_training - Skipping example with length 228 (seq_length=1024) +03/05/2022 15:15:21 - INFO - codeparrot_training - Step 42349: {'lr': 0.0004137738757417339, 'samples': 21683200, 'steps': 42349, 'loss/train': 2.5353991985321045} +03/05/2022 15:15:24 - INFO - codeparrot_training - Step 42350: {'lr': 0.0004137698662149375, 'samples': 21683712, 'steps': 42350, 'loss/train': 1.7081776857376099} +03/05/2022 15:15:25 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/05/2022 15:15:29 - INFO - codeparrot_training - Step 42351: {'lr': 0.00041376585661434903, 'samples': 21684224, 'steps': 42351, 'loss/train': 1.7706999778747559} +03/05/2022 15:15:32 - INFO - codeparrot_training - Step 42352: {'lr': 0.0004137618469399702, 'samples': 21684736, 'steps': 42352, 'loss/train': 2.1542680263519287} +03/05/2022 15:15:33 - INFO - codeparrot_training - Skipping example with length 110 (seq_length=1024) +03/05/2022 15:15:38 - INFO - codeparrot_training - Step 42353: {'lr': 0.0004137578371918027, 'samples': 21685248, 'steps': 42353, 'loss/train': 1.7423452138900757} +03/05/2022 15:15:41 - INFO - codeparrot_training - Step 42354: {'lr': 0.00041375382736984857, 'samples': 21685760, 'steps': 42354, 'loss/train': 2.076129198074341} +03/05/2022 15:15:42 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/05/2022 15:15:46 - INFO - codeparrot_training - Step 42355: {'lr': 0.0004137498174741094, 'samples': 21686272, 'steps': 42355, 'loss/train': 1.5596511363983154} +03/05/2022 15:15:49 - INFO - codeparrot_training - Step 42356: {'lr': 0.0004137458075045871, 'samples': 21686784, 'steps': 42356, 'loss/train': 1.682293176651001} +03/05/2022 15:15:51 - INFO - codeparrot_training - Skipping example with length 469 (seq_length=1024) +03/05/2022 15:15:55 - INFO - codeparrot_training - Step 42357: {'lr': 0.0004137417974612835, 'samples': 21687296, 'steps': 42357, 'loss/train': 1.4429855346679688} +03/05/2022 15:15:58 - INFO - codeparrot_training - Step 42358: {'lr': 0.0004137377873442004, 'samples': 21687808, 'steps': 42358, 'loss/train': 1.4033256769180298} +03/05/2022 15:15:59 - INFO - codeparrot_training - Skipping example with length 239 (seq_length=1024) +03/05/2022 15:16:03 - INFO - codeparrot_training - Step 42359: {'lr': 0.00041373377715333946, 'samples': 21688320, 'steps': 42359, 'loss/train': 1.4400829076766968} +03/05/2022 15:16:06 - INFO - codeparrot_training - Step 42360: {'lr': 0.00041372976688870266, 'samples': 21688832, 'steps': 42360, 'loss/train': 1.1200079917907715} +03/05/2022 15:16:07 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/05/2022 15:16:12 - INFO - codeparrot_training - Step 42361: {'lr': 0.0004137257565502918, 'samples': 21689344, 'steps': 42361, 'loss/train': 2.3171565532684326} +03/05/2022 15:16:15 - INFO - codeparrot_training - Step 42362: {'lr': 0.00041372174613810863, 'samples': 21689856, 'steps': 42362, 'loss/train': 1.7401020526885986} +03/05/2022 15:16:16 - INFO - codeparrot_training - Skipping example with length 62 (seq_length=1024) +03/05/2022 15:16:20 - INFO - codeparrot_training - Step 42363: {'lr': 0.00041371773565215494, 'samples': 21690368, 'steps': 42363, 'loss/train': 1.2325447797775269} +03/05/2022 15:16:23 - INFO - codeparrot_training - Step 42364: {'lr': 0.00041371372509243256, 'samples': 21690880, 'steps': 42364, 'loss/train': 1.7275514602661133} +03/05/2022 15:16:24 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/05/2022 15:16:28 - INFO - codeparrot_training - Step 42365: {'lr': 0.00041370971445894335, 'samples': 21691392, 'steps': 42365, 'loss/train': 0.6369551420211792} +03/05/2022 15:16:32 - INFO - codeparrot_training - Step 42366: {'lr': 0.00041370570375168903, 'samples': 21691904, 'steps': 42366, 'loss/train': 1.9925694465637207} +03/05/2022 15:16:32 - INFO - codeparrot_training - Skipping example with length 858 (seq_length=1024) +03/05/2022 15:16:37 - INFO - codeparrot_training - Step 42367: {'lr': 0.00041370169297067145, 'samples': 21692416, 'steps': 42367, 'loss/train': 0.3987744152545929} +03/05/2022 15:16:40 - INFO - codeparrot_training - Step 42368: {'lr': 0.00041369768211589245, 'samples': 21692928, 'steps': 42368, 'loss/train': 1.7809486389160156} +03/05/2022 15:16:41 - INFO - codeparrot_training - Skipping example with length 898 (seq_length=1024) +03/05/2022 15:16:45 - INFO - codeparrot_training - Step 42369: {'lr': 0.0004136936711873537, 'samples': 21693440, 'steps': 42369, 'loss/train': 2.1908984184265137} +03/05/2022 15:16:49 - INFO - codeparrot_training - Step 42370: {'lr': 0.0004136896601850572, 'samples': 21693952, 'steps': 42370, 'loss/train': 2.257387161254883} +03/05/2022 15:16:49 - INFO - codeparrot_training - Skipping example with length 786 (seq_length=1024) +03/05/2022 15:16:54 - INFO - codeparrot_training - Step 42371: {'lr': 0.0004136856491090046, 'samples': 21694464, 'steps': 42371, 'loss/train': 2.691514730453491} +03/05/2022 15:16:57 - INFO - codeparrot_training - Step 42372: {'lr': 0.0004136816379591979, 'samples': 21694976, 'steps': 42372, 'loss/train': 1.0881742238998413} +03/05/2022 15:16:58 - INFO - codeparrot_training - Skipping example with length 687 (seq_length=1024) +03/05/2022 15:17:02 - INFO - codeparrot_training - Step 42373: {'lr': 0.0004136776267356387, 'samples': 21695488, 'steps': 42373, 'loss/train': 2.3424744606018066} +03/05/2022 15:17:06 - INFO - codeparrot_training - Step 42374: {'lr': 0.0004136736154383288, 'samples': 21696000, 'steps': 42374, 'loss/train': 1.1213961839675903} +03/05/2022 15:17:06 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/05/2022 15:17:11 - INFO - codeparrot_training - Step 42375: {'lr': 0.00041366960406727024, 'samples': 21696512, 'steps': 42375, 'loss/train': 1.706502079963684} +03/05/2022 15:17:14 - INFO - codeparrot_training - Step 42376: {'lr': 0.00041366559262246463, 'samples': 21697024, 'steps': 42376, 'loss/train': 1.6131337881088257} +03/05/2022 15:17:15 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/05/2022 15:17:19 - INFO - codeparrot_training - Step 42377: {'lr': 0.00041366158110391375, 'samples': 21697536, 'steps': 42377, 'loss/train': 1.0953948497772217} +03/05/2022 15:17:22 - INFO - codeparrot_training - Step 42378: {'lr': 0.0004136575695116196, 'samples': 21698048, 'steps': 42378, 'loss/train': 1.4052002429962158} +03/05/2022 15:17:23 - INFO - codeparrot_training - Skipping example with length 70 (seq_length=1024) +03/05/2022 15:17:28 - INFO - codeparrot_training - Step 42379: {'lr': 0.0004136535578455838, 'samples': 21698560, 'steps': 42379, 'loss/train': 1.6577708721160889} +03/05/2022 15:17:31 - INFO - codeparrot_training - Step 42380: {'lr': 0.0004136495461058083, 'samples': 21699072, 'steps': 42380, 'loss/train': 1.4538023471832275} +03/05/2022 15:17:31 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/05/2022 15:17:36 - INFO - codeparrot_training - Step 42381: {'lr': 0.0004136455342922948, 'samples': 21699584, 'steps': 42381, 'loss/train': 1.0310801267623901} +03/05/2022 15:17:39 - INFO - codeparrot_training - Step 42382: {'lr': 0.0004136415224050451, 'samples': 21700096, 'steps': 42382, 'loss/train': 1.9006279706954956} +03/05/2022 15:17:40 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/05/2022 15:17:45 - INFO - codeparrot_training - Step 42383: {'lr': 0.0004136375104440611, 'samples': 21700608, 'steps': 42383, 'loss/train': 1.6706410646438599} +03/05/2022 15:17:48 - INFO - codeparrot_training - Step 42384: {'lr': 0.0004136334984093446, 'samples': 21701120, 'steps': 42384, 'loss/train': 1.9014859199523926} +03/05/2022 15:17:50 - INFO - codeparrot_training - Skipping example with length 817 (seq_length=1024) +03/05/2022 15:17:53 - INFO - codeparrot_training - Step 42385: {'lr': 0.0004136294863008974, 'samples': 21701632, 'steps': 42385, 'loss/train': 2.427670478820801} +03/05/2022 15:17:56 - INFO - codeparrot_training - Step 42386: {'lr': 0.00041362547411872116, 'samples': 21702144, 'steps': 42386, 'loss/train': 0.4849485754966736} +03/05/2022 15:17:58 - INFO - codeparrot_training - Skipping example with length 171 (seq_length=1024) +03/05/2022 15:18:02 - INFO - codeparrot_training - Step 42387: {'lr': 0.00041362146186281777, 'samples': 21702656, 'steps': 42387, 'loss/train': 1.632948637008667} +03/05/2022 15:18:05 - INFO - codeparrot_training - Step 42388: {'lr': 0.00041361744953318923, 'samples': 21703168, 'steps': 42388, 'loss/train': 1.691718339920044} +03/05/2022 15:18:07 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/05/2022 15:18:10 - INFO - codeparrot_training - Step 42389: {'lr': 0.0004136134371298371, 'samples': 21703680, 'steps': 42389, 'loss/train': 2.0299110412597656} +03/05/2022 15:18:14 - INFO - codeparrot_training - Step 42390: {'lr': 0.0004136094246527633, 'samples': 21704192, 'steps': 42390, 'loss/train': 1.5378493070602417} +03/05/2022 15:18:16 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/05/2022 15:18:19 - INFO - codeparrot_training - Step 42391: {'lr': 0.0004136054121019697, 'samples': 21704704, 'steps': 42391, 'loss/train': 2.0200769901275635} +03/05/2022 15:18:22 - INFO - codeparrot_training - Step 42392: {'lr': 0.0004136013994774579, 'samples': 21705216, 'steps': 42392, 'loss/train': 2.1085689067840576} +03/05/2022 15:18:25 - INFO - codeparrot_training - Step 42393: {'lr': 0.00041359738677922993, 'samples': 21705728, 'steps': 42393, 'loss/train': 1.471612811088562} +03/05/2022 15:18:26 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/05/2022 15:18:30 - INFO - codeparrot_training - Step 42394: {'lr': 0.00041359337400728746, 'samples': 21706240, 'steps': 42394, 'loss/train': 1.5655121803283691} +03/05/2022 15:18:34 - INFO - codeparrot_training - Step 42395: {'lr': 0.00041358936116163224, 'samples': 21706752, 'steps': 42395, 'loss/train': 1.8408195972442627} +03/05/2022 15:18:34 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/05/2022 15:18:39 - INFO - codeparrot_training - Step 42396: {'lr': 0.00041358534824226635, 'samples': 21707264, 'steps': 42396, 'loss/train': 1.8806945085525513} +03/05/2022 15:18:42 - INFO - codeparrot_training - Step 42397: {'lr': 0.0004135813352491913, 'samples': 21707776, 'steps': 42397, 'loss/train': 1.1682320833206177} +03/05/2022 15:18:42 - INFO - codeparrot_training - Skipping example with length 453 (seq_length=1024) +03/05/2022 15:18:47 - INFO - codeparrot_training - Step 42398: {'lr': 0.00041357732218240905, 'samples': 21708288, 'steps': 42398, 'loss/train': 0.895717978477478} +03/05/2022 15:18:50 - INFO - codeparrot_training - Step 42399: {'lr': 0.0004135733090419215, 'samples': 21708800, 'steps': 42399, 'loss/train': 1.5662328004837036} +03/05/2022 15:18:50 - INFO - codeparrot_training - Skipping example with length 803 (seq_length=1024) +03/05/2022 15:18:56 - INFO - codeparrot_training - Step 42400: {'lr': 0.00041356929582773023, 'samples': 21709312, 'steps': 42400, 'loss/train': 0.6348327398300171} +03/05/2022 15:18:59 - INFO - codeparrot_training - Step 42401: {'lr': 0.00041356528253983714, 'samples': 21709824, 'steps': 42401, 'loss/train': 1.604033350944519} +03/05/2022 15:18:59 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/05/2022 15:19:04 - INFO - codeparrot_training - Step 42402: {'lr': 0.0004135612691782441, 'samples': 21710336, 'steps': 42402, 'loss/train': 1.4334324598312378} +03/05/2022 15:19:07 - INFO - codeparrot_training - Step 42403: {'lr': 0.0004135572557429529, 'samples': 21710848, 'steps': 42403, 'loss/train': 0.7647178769111633} +03/05/2022 15:19:08 - INFO - codeparrot_training - Skipping example with length 108 (seq_length=1024) +03/05/2022 15:19:13 - INFO - codeparrot_training - Step 42404: {'lr': 0.0004135532422339653, 'samples': 21711360, 'steps': 42404, 'loss/train': 1.9096533060073853} +03/05/2022 15:19:16 - INFO - codeparrot_training - Step 42405: {'lr': 0.00041354922865128316, 'samples': 21711872, 'steps': 42405, 'loss/train': 0.6175175905227661} +03/05/2022 15:19:16 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/05/2022 15:19:21 - INFO - codeparrot_training - Step 42406: {'lr': 0.00041354521499490813, 'samples': 21712384, 'steps': 42406, 'loss/train': 1.987703561782837} +03/05/2022 15:19:24 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/05/2022 15:19:26 - INFO - codeparrot_training - Step 42407: {'lr': 0.00041354120126484227, 'samples': 21712896, 'steps': 42407, 'loss/train': 1.938920259475708} +03/05/2022 15:19:30 - INFO - codeparrot_training - Step 42408: {'lr': 0.00041353718746108724, 'samples': 21713408, 'steps': 42408, 'loss/train': 1.3082078695297241} +03/05/2022 15:19:33 - INFO - codeparrot_training - Step 42409: {'lr': 0.00041353317358364496, 'samples': 21713920, 'steps': 42409, 'loss/train': 1.6471275091171265} +03/05/2022 15:19:33 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/05/2022 15:19:38 - INFO - codeparrot_training - Step 42410: {'lr': 0.00041352915963251705, 'samples': 21714432, 'steps': 42410, 'loss/train': 2.706022024154663} +03/05/2022 15:19:41 - INFO - codeparrot_training - Step 42411: {'lr': 0.00041352514560770545, 'samples': 21714944, 'steps': 42411, 'loss/train': 1.4449020624160767} +03/05/2022 15:19:42 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/05/2022 15:19:47 - INFO - codeparrot_training - Step 42412: {'lr': 0.000413521131509212, 'samples': 21715456, 'steps': 42412, 'loss/train': 1.7380239963531494} +03/05/2022 15:19:50 - INFO - codeparrot_training - Step 42413: {'lr': 0.0004135171173370383, 'samples': 21715968, 'steps': 42413, 'loss/train': 5.091571807861328} +03/05/2022 15:19:51 - INFO - codeparrot_training - Skipping example with length 879 (seq_length=1024) +03/05/2022 15:19:56 - INFO - codeparrot_training - Step 42414: {'lr': 0.00041351310309118653, 'samples': 21716480, 'steps': 42414, 'loss/train': 2.850825548171997} +03/05/2022 15:19:59 - INFO - codeparrot_training - Step 42415: {'lr': 0.00041350908877165805, 'samples': 21716992, 'steps': 42415, 'loss/train': 2.0891385078430176} +03/05/2022 15:20:02 - INFO - codeparrot_training - Step 42416: {'lr': 0.00041350507437845505, 'samples': 21717504, 'steps': 42416, 'loss/train': 1.8256524801254272} +03/05/2022 15:20:02 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/05/2022 15:20:07 - INFO - codeparrot_training - Step 42417: {'lr': 0.00041350105991157915, 'samples': 21718016, 'steps': 42417, 'loss/train': 1.523796796798706} +03/05/2022 15:20:11 - INFO - codeparrot_training - Step 42418: {'lr': 0.00041349704537103216, 'samples': 21718528, 'steps': 42418, 'loss/train': 1.7898316383361816} +03/05/2022 15:20:11 - INFO - codeparrot_training - Skipping example with length 50 (seq_length=1024) +03/05/2022 15:20:16 - INFO - codeparrot_training - Step 42419: {'lr': 0.000413493030756816, 'samples': 21719040, 'steps': 42419, 'loss/train': 1.7714990377426147} +03/05/2022 15:20:19 - INFO - codeparrot_training - Step 42420: {'lr': 0.0004134890160689323, 'samples': 21719552, 'steps': 42420, 'loss/train': 1.8830819129943848} +03/05/2022 15:20:20 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/05/2022 15:20:25 - INFO - codeparrot_training - Step 42421: {'lr': 0.000413485001307383, 'samples': 21720064, 'steps': 42421, 'loss/train': 2.099813938140869} +03/05/2022 15:20:28 - INFO - codeparrot_training - Step 42422: {'lr': 0.00041348098647216993, 'samples': 21720576, 'steps': 42422, 'loss/train': 2.2930963039398193} +03/05/2022 15:20:29 - INFO - codeparrot_training - Skipping example with length 455 (seq_length=1024) +03/05/2022 15:20:33 - INFO - codeparrot_training - Step 42423: {'lr': 0.00041347697156329485, 'samples': 21721088, 'steps': 42423, 'loss/train': 2.5516021251678467} +03/05/2022 15:20:36 - INFO - codeparrot_training - Step 42424: {'lr': 0.00041347295658075955, 'samples': 21721600, 'steps': 42424, 'loss/train': 1.4078001976013184} +03/05/2022 15:20:37 - INFO - codeparrot_training - Skipping example with length 352 (seq_length=1024) +03/05/2022 15:20:41 - INFO - codeparrot_training - Step 42425: {'lr': 0.00041346894152456584, 'samples': 21722112, 'steps': 42425, 'loss/train': 1.6297658681869507} +03/05/2022 15:20:45 - INFO - codeparrot_training - Step 42426: {'lr': 0.00041346492639471555, 'samples': 21722624, 'steps': 42426, 'loss/train': 2.191555976867676} +03/05/2022 15:20:45 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/05/2022 15:20:50 - INFO - codeparrot_training - Step 42427: {'lr': 0.0004134609111912105, 'samples': 21723136, 'steps': 42427, 'loss/train': 2.584141731262207} +03/05/2022 15:20:53 - INFO - codeparrot_training - Step 42428: {'lr': 0.00041345689591405256, 'samples': 21723648, 'steps': 42428, 'loss/train': 1.9068603515625} +03/05/2022 15:20:54 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/05/2022 15:20:58 - INFO - codeparrot_training - Step 42429: {'lr': 0.0004134528805632434, 'samples': 21724160, 'steps': 42429, 'loss/train': 1.6674448251724243} +03/05/2022 15:21:02 - INFO - codeparrot_training - Step 42430: {'lr': 0.00041344886513878485, 'samples': 21724672, 'steps': 42430, 'loss/train': 1.8804675340652466} +03/05/2022 15:21:02 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/05/2022 15:21:07 - INFO - codeparrot_training - Step 42431: {'lr': 0.00041344484964067873, 'samples': 21725184, 'steps': 42431, 'loss/train': 1.7256267070770264} +03/05/2022 15:21:10 - INFO - codeparrot_training - Step 42432: {'lr': 0.00041344083406892704, 'samples': 21725696, 'steps': 42432, 'loss/train': 1.6917905807495117} +03/05/2022 15:21:11 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/05/2022 15:21:15 - INFO - codeparrot_training - Step 42433: {'lr': 0.0004134368184235313, 'samples': 21726208, 'steps': 42433, 'loss/train': 1.7010291814804077} +03/05/2022 15:21:19 - INFO - codeparrot_training - Step 42434: {'lr': 0.0004134328027044935, 'samples': 21726720, 'steps': 42434, 'loss/train': 1.6858408451080322} +03/05/2022 15:21:19 - INFO - codeparrot_training - Skipping example with length 738 (seq_length=1024) +03/05/2022 15:21:24 - INFO - codeparrot_training - Step 42435: {'lr': 0.0004134287869118154, 'samples': 21727232, 'steps': 42435, 'loss/train': 0.7728706002235413} +03/05/2022 15:21:27 - INFO - codeparrot_training - Step 42436: {'lr': 0.0004134247710454988, 'samples': 21727744, 'steps': 42436, 'loss/train': 2.1889593601226807} +03/05/2022 15:21:28 - INFO - codeparrot_training - Skipping example with length 177 (seq_length=1024) +03/05/2022 15:21:32 - INFO - codeparrot_training - Step 42437: {'lr': 0.00041342075510554554, 'samples': 21728256, 'steps': 42437, 'loss/train': 1.4320552349090576} +03/05/2022 15:21:36 - INFO - codeparrot_training - Step 42438: {'lr': 0.0004134167390919574, 'samples': 21728768, 'steps': 42438, 'loss/train': 1.6958187818527222} +03/05/2022 15:21:37 - INFO - codeparrot_training - Skipping example with length 102 (seq_length=1024) +03/05/2022 15:21:41 - INFO - codeparrot_training - Step 42439: {'lr': 0.0004134127230047362, 'samples': 21729280, 'steps': 42439, 'loss/train': 2.293339729309082} +03/05/2022 15:21:44 - INFO - codeparrot_training - Step 42440: {'lr': 0.00041340870684388375, 'samples': 21729792, 'steps': 42440, 'loss/train': 1.4697574377059937} +03/05/2022 15:21:45 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 15:21:49 - INFO - codeparrot_training - Step 42441: {'lr': 0.00041340469060940183, 'samples': 21730304, 'steps': 42441, 'loss/train': 1.8995330333709717} +03/05/2022 15:21:52 - INFO - codeparrot_training - Step 42442: {'lr': 0.0004134006743012923, 'samples': 21730816, 'steps': 42442, 'loss/train': 1.3152291774749756} +03/05/2022 15:21:53 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/05/2022 15:21:58 - INFO - codeparrot_training - Step 42443: {'lr': 0.00041339665791955695, 'samples': 21731328, 'steps': 42443, 'loss/train': 2.1756083965301514} +03/05/2022 15:22:01 - INFO - codeparrot_training - Step 42444: {'lr': 0.00041339264146419757, 'samples': 21731840, 'steps': 42444, 'loss/train': 1.6310796737670898} +03/05/2022 15:22:02 - INFO - codeparrot_training - Skipping example with length 300 (seq_length=1024) +03/05/2022 15:22:06 - INFO - codeparrot_training - Step 42445: {'lr': 0.000413388624935216, 'samples': 21732352, 'steps': 42445, 'loss/train': 0.7886888980865479} +03/05/2022 15:22:09 - INFO - codeparrot_training - Step 42446: {'lr': 0.00041338460833261403, 'samples': 21732864, 'steps': 42446, 'loss/train': 1.6947680711746216} +03/05/2022 15:22:10 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/05/2022 15:22:14 - INFO - codeparrot_training - Step 42447: {'lr': 0.0004133805916563935, 'samples': 21733376, 'steps': 42447, 'loss/train': 0.9065048098564148} +03/05/2022 15:22:18 - INFO - codeparrot_training - Step 42448: {'lr': 0.00041337657490655625, 'samples': 21733888, 'steps': 42448, 'loss/train': 2.1771962642669678} +03/05/2022 15:22:18 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/05/2022 15:22:23 - INFO - codeparrot_training - Step 42449: {'lr': 0.00041337255808310394, 'samples': 21734400, 'steps': 42449, 'loss/train': 1.387026309967041} +03/05/2022 15:22:26 - INFO - codeparrot_training - Step 42450: {'lr': 0.0004133685411860385, 'samples': 21734912, 'steps': 42450, 'loss/train': 2.0032689571380615} +03/05/2022 15:22:27 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/05/2022 15:22:31 - INFO - codeparrot_training - Step 42451: {'lr': 0.0004133645242153617, 'samples': 21735424, 'steps': 42451, 'loss/train': 1.8626126050949097} +03/05/2022 15:22:35 - INFO - codeparrot_training - Step 42452: {'lr': 0.0004133605071710754, 'samples': 21735936, 'steps': 42452, 'loss/train': 2.064732313156128} +03/05/2022 15:22:35 - INFO - codeparrot_training - Skipping example with length 866 (seq_length=1024) +03/05/2022 15:22:40 - INFO - codeparrot_training - Step 42453: {'lr': 0.00041335649005318133, 'samples': 21736448, 'steps': 42453, 'loss/train': 1.7235357761383057} +03/05/2022 15:22:43 - INFO - codeparrot_training - Step 42454: {'lr': 0.0004133524728616814, 'samples': 21736960, 'steps': 42454, 'loss/train': 1.4257060289382935} +03/05/2022 15:22:43 - INFO - codeparrot_training - Skipping example with length 530 (seq_length=1024) +03/05/2022 15:22:48 - INFO - codeparrot_training - Step 42455: {'lr': 0.00041334845559657735, 'samples': 21737472, 'steps': 42455, 'loss/train': 2.0949947834014893} +03/05/2022 15:22:51 - INFO - codeparrot_training - Step 42456: {'lr': 0.00041334443825787097, 'samples': 21737984, 'steps': 42456, 'loss/train': 0.9066054821014404} +03/05/2022 15:22:52 - INFO - codeparrot_training - Skipping example with length 709 (seq_length=1024) +03/05/2022 15:22:57 - INFO - codeparrot_training - Step 42457: {'lr': 0.0004133404208455642, 'samples': 21738496, 'steps': 42457, 'loss/train': 2.039267063140869} +03/05/2022 15:23:00 - INFO - codeparrot_training - Step 42458: {'lr': 0.00041333640335965865, 'samples': 21739008, 'steps': 42458, 'loss/train': 1.6362769603729248} +03/05/2022 15:23:01 - INFO - codeparrot_training - Skipping example with length 970 (seq_length=1024) +03/05/2022 15:23:05 - INFO - codeparrot_training - Step 42459: {'lr': 0.0004133323858001563, 'samples': 21739520, 'steps': 42459, 'loss/train': 2.334679365158081} +03/05/2022 15:23:08 - INFO - codeparrot_training - Step 42460: {'lr': 0.0004133283681670589, 'samples': 21740032, 'steps': 42460, 'loss/train': 2.0391411781311035} +03/05/2022 15:23:09 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/05/2022 15:23:14 - INFO - codeparrot_training - Step 42461: {'lr': 0.0004133243504603682, 'samples': 21740544, 'steps': 42461, 'loss/train': 1.0308313369750977} +03/05/2022 15:23:17 - INFO - codeparrot_training - Step 42462: {'lr': 0.0004133203326800861, 'samples': 21741056, 'steps': 42462, 'loss/train': 1.6099565029144287} +03/05/2022 15:23:18 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/05/2022 15:23:22 - INFO - codeparrot_training - Step 42463: {'lr': 0.0004133163148262144, 'samples': 21741568, 'steps': 42463, 'loss/train': 1.6617032289505005} +03/05/2022 15:23:25 - INFO - codeparrot_training - Step 42464: {'lr': 0.00041331229689875487, 'samples': 21742080, 'steps': 42464, 'loss/train': 1.3817763328552246} +03/05/2022 15:23:26 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/05/2022 15:23:31 - INFO - codeparrot_training - Step 42465: {'lr': 0.0004133082788977093, 'samples': 21742592, 'steps': 42465, 'loss/train': 1.9073724746704102} +03/05/2022 15:23:34 - INFO - codeparrot_training - Step 42466: {'lr': 0.00041330426082307963, 'samples': 21743104, 'steps': 42466, 'loss/train': 0.6224821209907532} +03/05/2022 15:23:35 - INFO - codeparrot_training - Skipping example with length 311 (seq_length=1024) +03/05/2022 15:23:39 - INFO - codeparrot_training - Step 42467: {'lr': 0.0004133002426748675, 'samples': 21743616, 'steps': 42467, 'loss/train': 1.4256278276443481} +03/05/2022 15:23:42 - INFO - codeparrot_training - Step 42468: {'lr': 0.0004132962244530749, 'samples': 21744128, 'steps': 42468, 'loss/train': 1.6559779644012451} +03/05/2022 15:23:43 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/05/2022 15:23:48 - INFO - codeparrot_training - Step 42469: {'lr': 0.0004132922061577035, 'samples': 21744640, 'steps': 42469, 'loss/train': 0.7829142808914185} +03/05/2022 15:23:51 - INFO - codeparrot_training - Step 42470: {'lr': 0.0004132881877887551, 'samples': 21745152, 'steps': 42470, 'loss/train': 6.349141597747803} +03/05/2022 15:23:52 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/05/2022 15:23:56 - INFO - codeparrot_training - Step 42471: {'lr': 0.0004132841693462315, 'samples': 21745664, 'steps': 42471, 'loss/train': 0.40681737661361694} +03/05/2022 15:23:59 - INFO - codeparrot_training - Step 42472: {'lr': 0.0004132801508301347, 'samples': 21746176, 'steps': 42472, 'loss/train': 0.9733253121376038} +03/05/2022 15:24:00 - INFO - codeparrot_training - Skipping example with length 833 (seq_length=1024) +03/05/2022 15:24:05 - INFO - codeparrot_training - Step 42473: {'lr': 0.0004132761322404663, 'samples': 21746688, 'steps': 42473, 'loss/train': 1.6759318113327026} +03/05/2022 15:24:08 - INFO - codeparrot_training - Step 42474: {'lr': 0.00041327211357722825, 'samples': 21747200, 'steps': 42474, 'loss/train': 1.726747751235962} +03/05/2022 15:24:09 - INFO - codeparrot_training - Skipping example with length 751 (seq_length=1024) +03/05/2022 15:24:13 - INFO - codeparrot_training - Step 42475: {'lr': 0.00041326809484042235, 'samples': 21747712, 'steps': 42475, 'loss/train': 1.0513757467269897} +03/05/2022 15:24:16 - INFO - codeparrot_training - Step 42476: {'lr': 0.0004132640760300503, 'samples': 21748224, 'steps': 42476, 'loss/train': 1.4438413381576538} +03/05/2022 15:24:18 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/05/2022 15:24:21 - INFO - codeparrot_training - Step 42477: {'lr': 0.000413260057146114, 'samples': 21748736, 'steps': 42477, 'loss/train': 1.534231424331665} +03/05/2022 15:24:25 - INFO - codeparrot_training - Step 42478: {'lr': 0.00041325603818861517, 'samples': 21749248, 'steps': 42478, 'loss/train': 2.1679012775421143} +03/05/2022 15:24:26 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/05/2022 15:24:30 - INFO - codeparrot_training - Step 42479: {'lr': 0.0004132520191575558, 'samples': 21749760, 'steps': 42479, 'loss/train': 2.3027191162109375} +03/05/2022 15:24:33 - INFO - codeparrot_training - Step 42480: {'lr': 0.0004132480000529375, 'samples': 21750272, 'steps': 42480, 'loss/train': 1.5098888874053955} +03/05/2022 15:24:34 - INFO - codeparrot_training - Skipping example with length 786 (seq_length=1024) +03/05/2022 15:24:39 - INFO - codeparrot_training - Step 42481: {'lr': 0.0004132439808747622, 'samples': 21750784, 'steps': 42481, 'loss/train': 2.518982410430908} +03/05/2022 15:24:42 - INFO - codeparrot_training - Step 42482: {'lr': 0.00041323996162303167, 'samples': 21751296, 'steps': 42482, 'loss/train': 1.8602962493896484} +03/05/2022 15:24:43 - INFO - codeparrot_training - Skipping example with length 50 (seq_length=1024) +03/05/2022 15:24:47 - INFO - codeparrot_training - Step 42483: {'lr': 0.0004132359422977477, 'samples': 21751808, 'steps': 42483, 'loss/train': 6.649305820465088} +03/05/2022 15:24:50 - INFO - codeparrot_training - Step 42484: {'lr': 0.0004132319228989122, 'samples': 21752320, 'steps': 42484, 'loss/train': 1.442028284072876} +03/05/2022 15:24:53 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/05/2022 15:24:56 - INFO - codeparrot_training - Step 42485: {'lr': 0.00041322790342652695, 'samples': 21752832, 'steps': 42485, 'loss/train': 1.2186248302459717} +03/05/2022 15:24:59 - INFO - codeparrot_training - Step 42486: {'lr': 0.00041322388388059366, 'samples': 21753344, 'steps': 42486, 'loss/train': 1.7214487791061401} +03/05/2022 15:25:02 - INFO - codeparrot_training - Step 42487: {'lr': 0.0004132198642611142, 'samples': 21753856, 'steps': 42487, 'loss/train': 1.349733591079712} +03/05/2022 15:25:02 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/05/2022 15:25:08 - INFO - codeparrot_training - Step 42488: {'lr': 0.0004132158445680904, 'samples': 21754368, 'steps': 42488, 'loss/train': 1.7935656309127808} +03/05/2022 15:25:11 - INFO - codeparrot_training - Step 42489: {'lr': 0.0004132118248015241, 'samples': 21754880, 'steps': 42489, 'loss/train': 1.5895205736160278} +03/05/2022 15:25:11 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/05/2022 15:25:16 - INFO - codeparrot_training - Step 42490: {'lr': 0.000413207804961417, 'samples': 21755392, 'steps': 42490, 'loss/train': 1.867658019065857} +03/05/2022 15:25:19 - INFO - codeparrot_training - Skipping example with length 416 (seq_length=1024) +03/05/2022 15:25:22 - INFO - codeparrot_training - Step 42491: {'lr': 0.000413203785047771, 'samples': 21755904, 'steps': 42491, 'loss/train': 0.2641412913799286} +03/05/2022 15:25:25 - INFO - codeparrot_training - Step 42492: {'lr': 0.00041319976506058785, 'samples': 21756416, 'steps': 42492, 'loss/train': 1.2600747346878052} +03/05/2022 15:25:28 - INFO - codeparrot_training - Step 42493: {'lr': 0.00041319574499986957, 'samples': 21756928, 'steps': 42493, 'loss/train': 1.9806921482086182} +03/05/2022 15:25:28 - INFO - codeparrot_training - Skipping example with length 861 (seq_length=1024) +03/05/2022 15:25:34 - INFO - codeparrot_training - Step 42494: {'lr': 0.0004131917248656177, 'samples': 21757440, 'steps': 42494, 'loss/train': 1.6440340280532837} +03/05/2022 15:25:37 - INFO - codeparrot_training - Step 42495: {'lr': 0.0004131877046578341, 'samples': 21757952, 'steps': 42495, 'loss/train': 1.8518306016921997} +03/05/2022 15:25:37 - INFO - codeparrot_training - Skipping example with length 960 (seq_length=1024) +03/05/2022 15:25:42 - INFO - codeparrot_training - Step 42496: {'lr': 0.0004131836843765207, 'samples': 21758464, 'steps': 42496, 'loss/train': 1.8813645839691162} +03/05/2022 15:25:45 - INFO - codeparrot_training - Step 42497: {'lr': 0.00041317966402167923, 'samples': 21758976, 'steps': 42497, 'loss/train': 1.596298336982727} +03/05/2022 15:25:45 - INFO - codeparrot_training - Skipping example with length 512 (seq_length=1024) +03/05/2022 15:25:51 - INFO - codeparrot_training - Step 42498: {'lr': 0.0004131756435933115, 'samples': 21759488, 'steps': 42498, 'loss/train': 0.694696843624115} +03/05/2022 15:25:54 - INFO - codeparrot_training - Step 42499: {'lr': 0.00041317162309141944, 'samples': 21760000, 'steps': 42499, 'loss/train': 1.6641772985458374} +03/05/2022 15:25:54 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/05/2022 15:25:59 - INFO - codeparrot_training - Step 42500: {'lr': 0.00041316760251600474, 'samples': 21760512, 'steps': 42500, 'loss/train': 1.6383837461471558} +03/05/2022 15:26:02 - INFO - codeparrot_training - Step 42501: {'lr': 0.00041316358186706915, 'samples': 21761024, 'steps': 42501, 'loss/train': 1.6520744562149048} +03/05/2022 15:26:02 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/05/2022 15:26:08 - INFO - codeparrot_training - Step 42502: {'lr': 0.0004131595611446146, 'samples': 21761536, 'steps': 42502, 'loss/train': 1.884994626045227} +03/05/2022 15:26:11 - INFO - codeparrot_training - Step 42503: {'lr': 0.0004131555403486429, 'samples': 21762048, 'steps': 42503, 'loss/train': 7.166080951690674} +03/05/2022 15:26:11 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/05/2022 15:26:16 - INFO - codeparrot_training - Step 42504: {'lr': 0.00041315151947915577, 'samples': 21762560, 'steps': 42504, 'loss/train': 1.6574573516845703} +03/05/2022 15:26:19 - INFO - codeparrot_training - Step 42505: {'lr': 0.0004131474985361551, 'samples': 21763072, 'steps': 42505, 'loss/train': 2.2335896492004395} +03/05/2022 15:26:20 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/05/2022 15:26:24 - INFO - codeparrot_training - Step 42506: {'lr': 0.0004131434775196428, 'samples': 21763584, 'steps': 42506, 'loss/train': 1.9857820272445679} +03/05/2022 15:26:28 - INFO - codeparrot_training - Step 42507: {'lr': 0.0004131394564296205, 'samples': 21764096, 'steps': 42507, 'loss/train': 1.2835943698883057} +03/05/2022 15:26:28 - INFO - codeparrot_training - Skipping example with length 275 (seq_length=1024) +03/05/2022 15:26:33 - INFO - codeparrot_training - Step 42508: {'lr': 0.00041313543526609, 'samples': 21764608, 'steps': 42508, 'loss/train': 1.729771614074707} +03/05/2022 15:26:36 - INFO - codeparrot_training - Step 42509: {'lr': 0.00041313141402905324, 'samples': 21765120, 'steps': 42509, 'loss/train': 2.312286138534546} +03/05/2022 15:26:37 - INFO - codeparrot_training - Skipping example with length 897 (seq_length=1024) +03/05/2022 15:26:41 - INFO - codeparrot_training - Step 42510: {'lr': 0.00041312739271851196, 'samples': 21765632, 'steps': 42510, 'loss/train': 1.4985514879226685} +03/05/2022 15:26:45 - INFO - codeparrot_training - Step 42511: {'lr': 0.0004131233713344681, 'samples': 21766144, 'steps': 42511, 'loss/train': 1.6756319999694824} +03/05/2022 15:26:45 - INFO - codeparrot_training - Skipping example with length 274 (seq_length=1024) +03/05/2022 15:26:50 - INFO - codeparrot_training - Step 42512: {'lr': 0.0004131193498769232, 'samples': 21766656, 'steps': 42512, 'loss/train': 1.8318690061569214} +03/05/2022 15:26:53 - INFO - codeparrot_training - Step 42513: {'lr': 0.0004131153283458794, 'samples': 21767168, 'steps': 42513, 'loss/train': 1.606142282485962} +03/05/2022 15:26:54 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/05/2022 15:26:58 - INFO - codeparrot_training - Step 42514: {'lr': 0.00041311130674133824, 'samples': 21767680, 'steps': 42514, 'loss/train': 1.9051495790481567} +03/05/2022 15:27:02 - INFO - codeparrot_training - Step 42515: {'lr': 0.0004131072850633017, 'samples': 21768192, 'steps': 42515, 'loss/train': 2.066157579421997} +03/05/2022 15:27:02 - INFO - codeparrot_training - Skipping example with length 78 (seq_length=1024) +03/05/2022 15:27:07 - INFO - codeparrot_training - Step 42516: {'lr': 0.0004131032633117715, 'samples': 21768704, 'steps': 42516, 'loss/train': 1.432417631149292} +03/05/2022 15:27:10 - INFO - codeparrot_training - Step 42517: {'lr': 0.0004130992414867495, 'samples': 21769216, 'steps': 42517, 'loss/train': 2.0037105083465576} +03/05/2022 15:27:11 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/05/2022 15:27:15 - INFO - codeparrot_training - Step 42518: {'lr': 0.0004130952195882375, 'samples': 21769728, 'steps': 42518, 'loss/train': 0.6045821309089661} +03/05/2022 15:27:19 - INFO - codeparrot_training - Step 42519: {'lr': 0.0004130911976162373, 'samples': 21770240, 'steps': 42519, 'loss/train': 1.6308612823486328} +03/05/2022 15:27:19 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/05/2022 15:27:24 - INFO - codeparrot_training - Step 42520: {'lr': 0.0004130871755707508, 'samples': 21770752, 'steps': 42520, 'loss/train': 2.060241460800171} +03/05/2022 15:27:27 - INFO - codeparrot_training - Step 42521: {'lr': 0.0004130831534517796, 'samples': 21771264, 'steps': 42521, 'loss/train': 2.0665388107299805} +03/05/2022 15:27:28 - INFO - codeparrot_training - Skipping example with length 652 (seq_length=1024) +03/05/2022 15:27:32 - INFO - codeparrot_training - Step 42522: {'lr': 0.00041307913125932574, 'samples': 21771776, 'steps': 42522, 'loss/train': 0.8008148670196533} +03/05/2022 15:27:36 - INFO - codeparrot_training - Step 42523: {'lr': 0.00041307510899339097, 'samples': 21772288, 'steps': 42523, 'loss/train': 1.3733952045440674} +03/05/2022 15:27:36 - INFO - codeparrot_training - Skipping example with length 330 (seq_length=1024) +03/05/2022 15:27:41 - INFO - codeparrot_training - Step 42524: {'lr': 0.00041307108665397695, 'samples': 21772800, 'steps': 42524, 'loss/train': 1.640984058380127} +03/05/2022 15:27:44 - INFO - codeparrot_training - Step 42525: {'lr': 0.00041306706424108563, 'samples': 21773312, 'steps': 42525, 'loss/train': 1.4104074239730835} +03/05/2022 15:27:46 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/05/2022 15:27:49 - INFO - codeparrot_training - Step 42526: {'lr': 0.0004130630417547189, 'samples': 21773824, 'steps': 42526, 'loss/train': 0.3811284899711609} +03/05/2022 15:27:53 - INFO - codeparrot_training - Step 42527: {'lr': 0.00041305901919487845, 'samples': 21774336, 'steps': 42527, 'loss/train': 2.302508592605591} +03/05/2022 15:27:54 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/05/2022 15:27:58 - INFO - codeparrot_training - Step 42528: {'lr': 0.0004130549965615661, 'samples': 21774848, 'steps': 42528, 'loss/train': 0.9531691670417786} +03/05/2022 15:28:01 - INFO - codeparrot_training - Step 42529: {'lr': 0.00041305097385478375, 'samples': 21775360, 'steps': 42529, 'loss/train': 1.5098739862442017} +03/05/2022 15:28:02 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/05/2022 15:28:07 - INFO - codeparrot_training - Step 42530: {'lr': 0.00041304695107453307, 'samples': 21775872, 'steps': 42530, 'loss/train': 0.6601076722145081} +03/05/2022 15:28:10 - INFO - codeparrot_training - Step 42531: {'lr': 0.000413042928220816, 'samples': 21776384, 'steps': 42531, 'loss/train': 1.3867470026016235} +03/05/2022 15:28:11 - INFO - codeparrot_training - Skipping example with length 51 (seq_length=1024) +03/05/2022 15:28:15 - INFO - codeparrot_training - Step 42532: {'lr': 0.0004130389052936342, 'samples': 21776896, 'steps': 42532, 'loss/train': 0.9817762970924377} +03/05/2022 15:28:18 - INFO - codeparrot_training - Step 42533: {'lr': 0.0004130348822929897, 'samples': 21777408, 'steps': 42533, 'loss/train': 2.1084346771240234} +03/05/2022 15:28:19 - INFO - codeparrot_training - Skipping example with length 341 (seq_length=1024) +03/05/2022 15:28:24 - INFO - codeparrot_training - Step 42534: {'lr': 0.0004130308592188842, 'samples': 21777920, 'steps': 42534, 'loss/train': 2.0390067100524902} +03/05/2022 15:28:27 - INFO - codeparrot_training - Step 42535: {'lr': 0.0004130268360713194, 'samples': 21778432, 'steps': 42535, 'loss/train': 1.0492684841156006} +03/05/2022 15:28:28 - INFO - codeparrot_training - Skipping example with length 809 (seq_length=1024) +03/05/2022 15:28:32 - INFO - codeparrot_training - Step 42536: {'lr': 0.0004130228128502973, 'samples': 21778944, 'steps': 42536, 'loss/train': 2.034926176071167} +03/05/2022 15:28:35 - INFO - codeparrot_training - Step 42537: {'lr': 0.0004130187895558196, 'samples': 21779456, 'steps': 42537, 'loss/train': 1.8263274431228638} +03/05/2022 15:28:36 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/05/2022 15:28:41 - INFO - codeparrot_training - Step 42538: {'lr': 0.00041301476618788827, 'samples': 21779968, 'steps': 42538, 'loss/train': 1.8252613544464111} +03/05/2022 15:28:44 - INFO - codeparrot_training - Step 42539: {'lr': 0.0004130107427465049, 'samples': 21780480, 'steps': 42539, 'loss/train': 1.6434887647628784} +03/05/2022 15:28:45 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/05/2022 15:28:49 - INFO - codeparrot_training - Step 42540: {'lr': 0.00041300671923167145, 'samples': 21780992, 'steps': 42540, 'loss/train': 1.6154977083206177} +03/05/2022 15:28:52 - INFO - codeparrot_training - Step 42541: {'lr': 0.00041300269564338956, 'samples': 21781504, 'steps': 42541, 'loss/train': 1.955030918121338} +03/05/2022 15:28:53 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/05/2022 15:28:57 - INFO - codeparrot_training - Step 42542: {'lr': 0.0004129986719816613, 'samples': 21782016, 'steps': 42542, 'loss/train': 0.5905227661132812} +03/05/2022 15:29:01 - INFO - codeparrot_training - Step 42543: {'lr': 0.0004129946482464883, 'samples': 21782528, 'steps': 42543, 'loss/train': 1.2630277872085571} +03/05/2022 15:29:01 - INFO - codeparrot_training - Skipping example with length 869 (seq_length=1024) +03/05/2022 15:29:06 - INFO - codeparrot_training - Step 42544: {'lr': 0.0004129906244378724, 'samples': 21783040, 'steps': 42544, 'loss/train': 3.113440990447998} +03/05/2022 15:29:09 - INFO - codeparrot_training - Step 42545: {'lr': 0.0004129866005558155, 'samples': 21783552, 'steps': 42545, 'loss/train': 5.0117411613464355} +03/05/2022 15:29:11 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/05/2022 15:29:14 - INFO - codeparrot_training - Step 42546: {'lr': 0.00041298257660031935, 'samples': 21784064, 'steps': 42546, 'loss/train': 1.933379888534546} +03/05/2022 15:29:18 - INFO - codeparrot_training - Step 42547: {'lr': 0.00041297855257138577, 'samples': 21784576, 'steps': 42547, 'loss/train': 1.893028974533081} +03/05/2022 15:29:19 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/05/2022 15:29:23 - INFO - codeparrot_training - Step 42548: {'lr': 0.0004129745284690165, 'samples': 21785088, 'steps': 42548, 'loss/train': 1.5394604206085205} +03/05/2022 15:29:26 - INFO - codeparrot_training - Step 42549: {'lr': 0.0004129705042932135, 'samples': 21785600, 'steps': 42549, 'loss/train': 1.9452452659606934} +03/05/2022 15:29:28 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/05/2022 15:29:31 - INFO - codeparrot_training - Step 42550: {'lr': 0.0004129664800439785, 'samples': 21786112, 'steps': 42550, 'loss/train': 1.3173611164093018} +03/05/2022 15:29:35 - INFO - codeparrot_training - Step 42551: {'lr': 0.0004129624557213133, 'samples': 21786624, 'steps': 42551, 'loss/train': 1.1900880336761475} +03/05/2022 15:29:36 - INFO - codeparrot_training - Skipping example with length 752 (seq_length=1024) +03/05/2022 15:29:40 - INFO - codeparrot_training - Step 42552: {'lr': 0.00041295843132521973, 'samples': 21787136, 'steps': 42552, 'loss/train': 0.641370952129364} +03/05/2022 15:29:43 - INFO - codeparrot_training - Step 42553: {'lr': 0.0004129544068556996, 'samples': 21787648, 'steps': 42553, 'loss/train': 1.1269659996032715} +03/05/2022 15:29:45 - INFO - codeparrot_training - Skipping example with length 502 (seq_length=1024) +03/05/2022 15:29:48 - INFO - codeparrot_training - Step 42554: {'lr': 0.00041295038231275473, 'samples': 21788160, 'steps': 42554, 'loss/train': 1.3340009450912476} +03/05/2022 15:29:52 - INFO - codeparrot_training - Step 42555: {'lr': 0.0004129463576963869, 'samples': 21788672, 'steps': 42555, 'loss/train': 1.7606674432754517} +03/05/2022 15:29:53 - INFO - codeparrot_training - Skipping example with length 717 (seq_length=1024) +03/05/2022 15:29:57 - INFO - codeparrot_training - Step 42556: {'lr': 0.000412942333006598, 'samples': 21789184, 'steps': 42556, 'loss/train': 1.4675034284591675} +03/05/2022 15:30:00 - INFO - codeparrot_training - Step 42557: {'lr': 0.0004129383082433898, 'samples': 21789696, 'steps': 42557, 'loss/train': 0.7369664907455444} +03/05/2022 15:30:02 - INFO - codeparrot_training - Skipping example with length 324 (seq_length=1024) +03/05/2022 15:30:05 - INFO - codeparrot_training - Step 42558: {'lr': 0.0004129342834067641, 'samples': 21790208, 'steps': 42558, 'loss/train': 1.4568068981170654} +03/05/2022 15:30:08 - INFO - codeparrot_training - Step 42559: {'lr': 0.0004129302584967227, 'samples': 21790720, 'steps': 42559, 'loss/train': 1.5860685110092163} +03/05/2022 15:30:10 - INFO - codeparrot_training - Skipping example with length 763 (seq_length=1024) +03/05/2022 15:30:14 - INFO - codeparrot_training - Step 42560: {'lr': 0.0004129262335132675, 'samples': 21791232, 'steps': 42560, 'loss/train': 2.3329436779022217} +03/05/2022 15:30:17 - INFO - codeparrot_training - Step 42561: {'lr': 0.00041292220845640023, 'samples': 21791744, 'steps': 42561, 'loss/train': 1.9236083030700684} +03/05/2022 15:30:19 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/05/2022 15:30:22 - INFO - codeparrot_training - Step 42562: {'lr': 0.00041291818332612275, 'samples': 21792256, 'steps': 42562, 'loss/train': 1.3149757385253906} +03/05/2022 15:30:26 - INFO - codeparrot_training - Step 42563: {'lr': 0.00041291415812243676, 'samples': 21792768, 'steps': 42563, 'loss/train': 1.8848885297775269} +03/05/2022 15:30:27 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/05/2022 15:30:31 - INFO - codeparrot_training - Step 42564: {'lr': 0.0004129101328453442, 'samples': 21793280, 'steps': 42564, 'loss/train': 0.8728896379470825} +03/05/2022 15:30:34 - INFO - codeparrot_training - Step 42565: {'lr': 0.0004129061074948469, 'samples': 21793792, 'steps': 42565, 'loss/train': 0.6925628781318665} +03/05/2022 15:30:36 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/05/2022 15:30:39 - INFO - codeparrot_training - Step 42566: {'lr': 0.0004129020820709466, 'samples': 21794304, 'steps': 42566, 'loss/train': 1.3255623579025269} +03/05/2022 15:30:42 - INFO - codeparrot_training - Step 42567: {'lr': 0.00041289805657364516, 'samples': 21794816, 'steps': 42567, 'loss/train': 2.0466012954711914} +03/05/2022 15:30:44 - INFO - codeparrot_training - Skipping example with length 1010 (seq_length=1024) +03/05/2022 15:30:48 - INFO - codeparrot_training - Step 42568: {'lr': 0.0004128940310029443, 'samples': 21795328, 'steps': 42568, 'loss/train': 1.6160099506378174} +03/05/2022 15:30:51 - INFO - codeparrot_training - Step 42569: {'lr': 0.0004128900053588459, 'samples': 21795840, 'steps': 42569, 'loss/train': 2.4752273559570312} +03/05/2022 15:30:53 - INFO - codeparrot_training - Skipping example with length 118 (seq_length=1024) +03/05/2022 15:30:56 - INFO - codeparrot_training - Step 42570: {'lr': 0.00041288597964135186, 'samples': 21796352, 'steps': 42570, 'loss/train': 1.463428020477295} +03/05/2022 15:30:59 - INFO - codeparrot_training - Step 42571: {'lr': 0.0004128819538504639, 'samples': 21796864, 'steps': 42571, 'loss/train': 1.576080560684204} +03/05/2022 15:31:01 - INFO - codeparrot_training - Skipping example with length 421 (seq_length=1024) +03/05/2022 15:31:05 - INFO - codeparrot_training - Step 42572: {'lr': 0.00041287792798618374, 'samples': 21797376, 'steps': 42572, 'loss/train': 1.3745331764221191} +03/05/2022 15:31:08 - INFO - codeparrot_training - Step 42573: {'lr': 0.00041287390204851343, 'samples': 21797888, 'steps': 42573, 'loss/train': 0.7311790585517883} +03/05/2022 15:31:10 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/05/2022 15:31:13 - INFO - codeparrot_training - Step 42574: {'lr': 0.0004128698760374546, 'samples': 21798400, 'steps': 42574, 'loss/train': 1.443576693534851} +03/05/2022 15:31:17 - INFO - codeparrot_training - Step 42575: {'lr': 0.0004128658499530091, 'samples': 21798912, 'steps': 42575, 'loss/train': 1.5759015083312988} +03/05/2022 15:31:18 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/05/2022 15:31:22 - INFO - codeparrot_training - Step 42576: {'lr': 0.00041286182379517876, 'samples': 21799424, 'steps': 42576, 'loss/train': 1.5545846223831177} +03/05/2022 15:31:25 - INFO - codeparrot_training - Step 42577: {'lr': 0.00041285779756396543, 'samples': 21799936, 'steps': 42577, 'loss/train': 1.825769305229187} +03/05/2022 15:31:27 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/05/2022 15:31:30 - INFO - codeparrot_training - Step 42578: {'lr': 0.00041285377125937085, 'samples': 21800448, 'steps': 42578, 'loss/train': 2.285174608230591} +03/05/2022 15:31:34 - INFO - codeparrot_training - Step 42579: {'lr': 0.0004128497448813969, 'samples': 21800960, 'steps': 42579, 'loss/train': 1.9578672647476196} +03/05/2022 15:31:35 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/05/2022 15:31:39 - INFO - codeparrot_training - Step 42580: {'lr': 0.0004128457184300454, 'samples': 21801472, 'steps': 42580, 'loss/train': 2.442570447921753} +03/05/2022 15:31:42 - INFO - codeparrot_training - Step 42581: {'lr': 0.0004128416919053181, 'samples': 21801984, 'steps': 42581, 'loss/train': 1.6674573421478271} +03/05/2022 15:31:44 - INFO - codeparrot_training - Skipping example with length 65 (seq_length=1024) +03/05/2022 15:31:48 - INFO - codeparrot_training - Step 42582: {'lr': 0.0004128376653072168, 'samples': 21802496, 'steps': 42582, 'loss/train': 1.295309066772461} +03/05/2022 15:31:51 - INFO - codeparrot_training - Step 42583: {'lr': 0.0004128336386357434, 'samples': 21803008, 'steps': 42583, 'loss/train': 0.7511467337608337} +03/05/2022 15:31:54 - INFO - codeparrot_training - Step 42584: {'lr': 0.0004128296118908997, 'samples': 21803520, 'steps': 42584, 'loss/train': 0.8100451231002808} +03/05/2022 15:31:56 - INFO - codeparrot_training - Skipping example with length 296 (seq_length=1024) +03/05/2022 15:32:00 - INFO - codeparrot_training - Step 42585: {'lr': 0.0004128255850726874, 'samples': 21804032, 'steps': 42585, 'loss/train': 1.252792239189148} +03/05/2022 15:32:03 - INFO - codeparrot_training - Step 42586: {'lr': 0.0004128215581811085, 'samples': 21804544, 'steps': 42586, 'loss/train': 2.2300076484680176} +03/05/2022 15:32:04 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/05/2022 15:32:08 - INFO - codeparrot_training - Step 42587: {'lr': 0.0004128175312161647, 'samples': 21805056, 'steps': 42587, 'loss/train': 2.2272799015045166} +03/05/2022 15:32:11 - INFO - codeparrot_training - Step 42588: {'lr': 0.00041281350417785777, 'samples': 21805568, 'steps': 42588, 'loss/train': 1.7098994255065918} +03/05/2022 15:32:13 - INFO - codeparrot_training - Skipping example with length 777 (seq_length=1024) +03/05/2022 15:32:17 - INFO - codeparrot_training - Step 42589: {'lr': 0.00041280947706618965, 'samples': 21806080, 'steps': 42589, 'loss/train': 2.0035574436187744} +03/05/2022 15:32:20 - INFO - codeparrot_training - Step 42590: {'lr': 0.0004128054498811621, 'samples': 21806592, 'steps': 42590, 'loss/train': 1.497901439666748} +03/05/2022 15:32:21 - INFO - codeparrot_training - Skipping example with length 492 (seq_length=1024) +03/05/2022 15:32:25 - INFO - codeparrot_training - Step 42591: {'lr': 0.0004128014226227769, 'samples': 21807104, 'steps': 42591, 'loss/train': 1.1938413381576538} +03/05/2022 15:32:28 - INFO - codeparrot_training - Step 42592: {'lr': 0.00041279739529103586, 'samples': 21807616, 'steps': 42592, 'loss/train': 1.180180549621582} +03/05/2022 15:32:30 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/05/2022 15:32:34 - INFO - codeparrot_training - Step 42593: {'lr': 0.0004127933678859409, 'samples': 21808128, 'steps': 42593, 'loss/train': 1.5542253255844116} +03/05/2022 15:32:37 - INFO - codeparrot_training - Step 42594: {'lr': 0.00041278934040749375, 'samples': 21808640, 'steps': 42594, 'loss/train': 1.9486687183380127} +03/05/2022 15:32:38 - INFO - codeparrot_training - Skipping example with length 437 (seq_length=1024) +03/05/2022 15:32:42 - INFO - codeparrot_training - Step 42595: {'lr': 0.0004127853128556962, 'samples': 21809152, 'steps': 42595, 'loss/train': 1.6040079593658447} +03/05/2022 15:32:45 - INFO - codeparrot_training - Step 42596: {'lr': 0.00041278128523055015, 'samples': 21809664, 'steps': 42596, 'loss/train': 2.4169797897338867} +03/05/2022 15:32:47 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/05/2022 15:32:50 - INFO - codeparrot_training - Step 42597: {'lr': 0.0004127772575320573, 'samples': 21810176, 'steps': 42597, 'loss/train': 1.780341386795044} +03/05/2022 15:32:54 - INFO - codeparrot_training - Step 42598: {'lr': 0.0004127732297602196, 'samples': 21810688, 'steps': 42598, 'loss/train': 1.6631861925125122} +03/05/2022 15:32:55 - INFO - codeparrot_training - Skipping example with length 480 (seq_length=1024) +03/05/2022 15:32:59 - INFO - codeparrot_training - Step 42599: {'lr': 0.0004127692019150387, 'samples': 21811200, 'steps': 42599, 'loss/train': 1.371807336807251} +03/05/2022 15:33:02 - INFO - codeparrot_training - Step 42600: {'lr': 0.00041276517399651657, 'samples': 21811712, 'steps': 42600, 'loss/train': 0.15567287802696228} +03/05/2022 15:33:04 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/05/2022 15:33:07 - INFO - codeparrot_training - Step 42601: {'lr': 0.00041276114600465497, 'samples': 21812224, 'steps': 42601, 'loss/train': 2.231785297393799} +03/05/2022 15:33:10 - INFO - codeparrot_training - Step 42602: {'lr': 0.0004127571179394557, 'samples': 21812736, 'steps': 42602, 'loss/train': 1.3226207494735718} +03/05/2022 15:33:12 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/05/2022 15:33:16 - INFO - codeparrot_training - Step 42603: {'lr': 0.0004127530898009205, 'samples': 21813248, 'steps': 42603, 'loss/train': 1.6350531578063965} +03/05/2022 15:33:19 - INFO - codeparrot_training - Step 42604: {'lr': 0.00041274906158905137, 'samples': 21813760, 'steps': 42604, 'loss/train': 1.4761563539505005} +03/05/2022 15:33:21 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/05/2022 15:33:24 - INFO - codeparrot_training - Step 42605: {'lr': 0.00041274503330384997, 'samples': 21814272, 'steps': 42605, 'loss/train': 2.2011334896087646} +03/05/2022 15:33:27 - INFO - codeparrot_training - Step 42606: {'lr': 0.0004127410049453182, 'samples': 21814784, 'steps': 42606, 'loss/train': 1.2807420492172241} +03/05/2022 15:33:29 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/05/2022 15:33:33 - INFO - codeparrot_training - Step 42607: {'lr': 0.00041273697651345785, 'samples': 21815296, 'steps': 42607, 'loss/train': 1.0556920766830444} +03/05/2022 15:33:36 - INFO - codeparrot_training - Step 42608: {'lr': 0.00041273294800827075, 'samples': 21815808, 'steps': 42608, 'loss/train': 1.9606720209121704} +03/05/2022 15:33:37 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/05/2022 15:33:41 - INFO - codeparrot_training - Step 42609: {'lr': 0.00041272891942975863, 'samples': 21816320, 'steps': 42609, 'loss/train': 1.7074124813079834} +03/05/2022 15:33:44 - INFO - codeparrot_training - Step 42610: {'lr': 0.00041272489077792343, 'samples': 21816832, 'steps': 42610, 'loss/train': 2.236747980117798} +03/05/2022 15:33:46 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/05/2022 15:33:49 - INFO - codeparrot_training - Step 42611: {'lr': 0.0004127208620527669, 'samples': 21817344, 'steps': 42611, 'loss/train': 1.8775275945663452} +03/05/2022 15:33:53 - INFO - codeparrot_training - Step 42612: {'lr': 0.00041271683325429075, 'samples': 21817856, 'steps': 42612, 'loss/train': 1.8763192892074585} +03/05/2022 15:33:54 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/05/2022 15:33:58 - INFO - codeparrot_training - Step 42613: {'lr': 0.00041271280438249705, 'samples': 21818368, 'steps': 42613, 'loss/train': 1.4818907976150513} +03/05/2022 15:34:01 - INFO - codeparrot_training - Step 42614: {'lr': 0.00041270877543738744, 'samples': 21818880, 'steps': 42614, 'loss/train': 1.4940688610076904} +03/05/2022 15:34:03 - INFO - codeparrot_training - Skipping example with length 378 (seq_length=1024) +03/05/2022 15:34:06 - INFO - codeparrot_training - Step 42615: {'lr': 0.0004127047464189637, 'samples': 21819392, 'steps': 42615, 'loss/train': 1.1418299674987793} +03/05/2022 15:34:10 - INFO - codeparrot_training - Step 42616: {'lr': 0.0004127007173272278, 'samples': 21819904, 'steps': 42616, 'loss/train': 2.095956325531006} +03/05/2022 15:34:11 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/05/2022 15:34:15 - INFO - codeparrot_training - Step 42617: {'lr': 0.0004126966881621814, 'samples': 21820416, 'steps': 42617, 'loss/train': 1.5094927549362183} +03/05/2022 15:34:18 - INFO - codeparrot_training - Step 42618: {'lr': 0.0004126926589238264, 'samples': 21820928, 'steps': 42618, 'loss/train': 1.07016122341156} +03/05/2022 15:34:21 - INFO - codeparrot_training - Skipping example with length 1006 (seq_length=1024) +03/05/2022 15:34:23 - INFO - codeparrot_training - Step 42619: {'lr': 0.00041268862961216457, 'samples': 21821440, 'steps': 42619, 'loss/train': 1.142103672027588} +03/05/2022 15:34:27 - INFO - codeparrot_training - Step 42620: {'lr': 0.00041268460022719783, 'samples': 21821952, 'steps': 42620, 'loss/train': 1.7631441354751587} +03/05/2022 15:34:29 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/05/2022 15:34:32 - INFO - codeparrot_training - Step 42621: {'lr': 0.0004126805707689279, 'samples': 21822464, 'steps': 42621, 'loss/train': 2.0135200023651123} +03/05/2022 15:34:35 - INFO - codeparrot_training - Step 42622: {'lr': 0.0004126765412373566, 'samples': 21822976, 'steps': 42622, 'loss/train': 1.859331488609314} +03/05/2022 15:34:37 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/05/2022 15:34:40 - INFO - codeparrot_training - Step 42623: {'lr': 0.0004126725116324858, 'samples': 21823488, 'steps': 42623, 'loss/train': 0.21420352160930634} +03/05/2022 15:34:44 - INFO - codeparrot_training - Step 42624: {'lr': 0.00041266848195431715, 'samples': 21824000, 'steps': 42624, 'loss/train': 2.3679468631744385} +03/05/2022 15:34:46 - INFO - codeparrot_training - Skipping example with length 95 (seq_length=1024) +03/05/2022 15:34:49 - INFO - codeparrot_training - Step 42625: {'lr': 0.00041266445220285267, 'samples': 21824512, 'steps': 42625, 'loss/train': 1.4078609943389893} +03/05/2022 15:34:52 - INFO - codeparrot_training - Step 42626: {'lr': 0.0004126604223780941, 'samples': 21825024, 'steps': 42626, 'loss/train': 1.360387921333313} +03/05/2022 15:34:54 - INFO - codeparrot_training - Skipping example with length 459 (seq_length=1024) +03/05/2022 15:34:57 - INFO - codeparrot_training - Step 42627: {'lr': 0.00041265639248004327, 'samples': 21825536, 'steps': 42627, 'loss/train': 1.6109942197799683} +03/05/2022 15:35:00 - INFO - codeparrot_training - Step 42628: {'lr': 0.000412652362508702, 'samples': 21826048, 'steps': 42628, 'loss/train': 1.788684606552124} +03/05/2022 15:35:02 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/05/2022 15:35:06 - INFO - codeparrot_training - Step 42629: {'lr': 0.000412648332464072, 'samples': 21826560, 'steps': 42629, 'loss/train': 2.1790688037872314} +03/05/2022 15:35:09 - INFO - codeparrot_training - Step 42630: {'lr': 0.00041264430234615526, 'samples': 21827072, 'steps': 42630, 'loss/train': 1.724709153175354} +03/05/2022 15:35:11 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/05/2022 15:35:14 - INFO - codeparrot_training - Step 42631: {'lr': 0.0004126402721549535, 'samples': 21827584, 'steps': 42631, 'loss/train': 1.519388198852539} +03/05/2022 15:35:17 - INFO - codeparrot_training - Step 42632: {'lr': 0.00041263624189046846, 'samples': 21828096, 'steps': 42632, 'loss/train': 1.9214985370635986} +03/05/2022 15:35:19 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/05/2022 15:35:23 - INFO - codeparrot_training - Step 42633: {'lr': 0.0004126322115527021, 'samples': 21828608, 'steps': 42633, 'loss/train': 1.6461073160171509} +03/05/2022 15:35:26 - INFO - codeparrot_training - Step 42634: {'lr': 0.00041262818114165615, 'samples': 21829120, 'steps': 42634, 'loss/train': 1.1113814115524292} +03/05/2022 15:35:28 - INFO - codeparrot_training - Skipping example with length 51 (seq_length=1024) +03/05/2022 15:35:31 - INFO - codeparrot_training - Step 42635: {'lr': 0.0004126241506573325, 'samples': 21829632, 'steps': 42635, 'loss/train': 1.8905797004699707} +03/05/2022 15:35:34 - INFO - codeparrot_training - Step 42636: {'lr': 0.00041262012009973283, 'samples': 21830144, 'steps': 42636, 'loss/train': 1.8902021646499634} +03/05/2022 15:35:36 - INFO - codeparrot_training - Skipping example with length 627 (seq_length=1024) +03/05/2022 15:35:40 - INFO - codeparrot_training - Step 42637: {'lr': 0.0004126160894688591, 'samples': 21830656, 'steps': 42637, 'loss/train': 1.685051441192627} +03/05/2022 15:35:43 - INFO - codeparrot_training - Step 42638: {'lr': 0.00041261205876471307, 'samples': 21831168, 'steps': 42638, 'loss/train': 1.5529205799102783} +03/05/2022 15:35:44 - INFO - codeparrot_training - Skipping example with length 573 (seq_length=1024) +03/05/2022 15:35:48 - INFO - codeparrot_training - Step 42639: {'lr': 0.0004126080279872966, 'samples': 21831680, 'steps': 42639, 'loss/train': 1.8368233442306519} +03/05/2022 15:35:51 - INFO - codeparrot_training - Step 42640: {'lr': 0.0004126039971366114, 'samples': 21832192, 'steps': 42640, 'loss/train': 1.1299426555633545} +03/05/2022 15:35:53 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/05/2022 15:35:56 - INFO - codeparrot_training - Step 42641: {'lr': 0.0004125999662126594, 'samples': 21832704, 'steps': 42641, 'loss/train': 1.6593204736709595} +03/05/2022 15:36:00 - INFO - codeparrot_training - Step 42642: {'lr': 0.00041259593521544223, 'samples': 21833216, 'steps': 42642, 'loss/train': 1.5966585874557495} +03/05/2022 15:36:01 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/05/2022 15:36:05 - INFO - codeparrot_training - Step 42643: {'lr': 0.00041259190414496194, 'samples': 21833728, 'steps': 42643, 'loss/train': 1.792637825012207} +03/05/2022 15:36:08 - INFO - codeparrot_training - Step 42644: {'lr': 0.00041258787300122026, 'samples': 21834240, 'steps': 42644, 'loss/train': 1.8549559116363525} +03/05/2022 15:36:10 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/05/2022 15:36:14 - INFO - codeparrot_training - Step 42645: {'lr': 0.000412583841784219, 'samples': 21834752, 'steps': 42645, 'loss/train': 1.924027442932129} +03/05/2022 15:36:17 - INFO - codeparrot_training - Step 42646: {'lr': 0.00041257981049395997, 'samples': 21835264, 'steps': 42646, 'loss/train': 1.9239075183868408} +03/05/2022 15:36:19 - INFO - codeparrot_training - Skipping example with length 395 (seq_length=1024) +03/05/2022 15:36:22 - INFO - codeparrot_training - Step 42647: {'lr': 0.000412575779130445, 'samples': 21835776, 'steps': 42647, 'loss/train': 1.7570056915283203} +03/05/2022 15:36:25 - INFO - codeparrot_training - Step 42648: {'lr': 0.0004125717476936758, 'samples': 21836288, 'steps': 42648, 'loss/train': 1.9527770280838013} +03/05/2022 15:36:27 - INFO - codeparrot_training - Skipping example with length 181 (seq_length=1024) +03/05/2022 15:36:30 - INFO - codeparrot_training - Step 42649: {'lr': 0.0004125677161836543, 'samples': 21836800, 'steps': 42649, 'loss/train': 0.9159680604934692} +03/05/2022 15:36:34 - INFO - codeparrot_training - Step 42650: {'lr': 0.00041256368460038237, 'samples': 21837312, 'steps': 42650, 'loss/train': 1.4534938335418701} +03/05/2022 15:36:36 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/05/2022 15:36:39 - INFO - codeparrot_training - Step 42651: {'lr': 0.00041255965294386174, 'samples': 21837824, 'steps': 42651, 'loss/train': 1.5101351737976074} +03/05/2022 15:36:42 - INFO - codeparrot_training - Step 42652: {'lr': 0.00041255562121409416, 'samples': 21838336, 'steps': 42652, 'loss/train': 1.7283003330230713} +03/05/2022 15:36:44 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/05/2022 15:36:47 - INFO - codeparrot_training - Step 42653: {'lr': 0.0004125515894110816, 'samples': 21838848, 'steps': 42653, 'loss/train': 2.093433141708374} +03/05/2022 15:36:50 - INFO - codeparrot_training - Step 42654: {'lr': 0.00041254755753482574, 'samples': 21839360, 'steps': 42654, 'loss/train': 1.8721247911453247} +03/05/2022 15:36:53 - INFO - codeparrot_training - Skipping example with length 545 (seq_length=1024) +03/05/2022 15:36:56 - INFO - codeparrot_training - Step 42655: {'lr': 0.00041254352558532854, 'samples': 21839872, 'steps': 42655, 'loss/train': 1.31758451461792} +03/05/2022 15:36:59 - INFO - codeparrot_training - Step 42656: {'lr': 0.0004125394935625917, 'samples': 21840384, 'steps': 42656, 'loss/train': 2.4140172004699707} +03/05/2022 15:37:01 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/05/2022 15:37:04 - INFO - codeparrot_training - Step 42657: {'lr': 0.00041253546146661704, 'samples': 21840896, 'steps': 42657, 'loss/train': 1.1278560161590576} +03/05/2022 15:37:07 - INFO - codeparrot_training - Step 42658: {'lr': 0.00041253142929740643, 'samples': 21841408, 'steps': 42658, 'loss/train': 1.9698598384857178} +03/05/2022 15:37:10 - INFO - codeparrot_training - Skipping example with length 402 (seq_length=1024) +03/05/2022 15:37:13 - INFO - codeparrot_training - Step 42659: {'lr': 0.00041252739705496165, 'samples': 21841920, 'steps': 42659, 'loss/train': 2.04785418510437} +03/05/2022 15:37:16 - INFO - codeparrot_training - Step 42660: {'lr': 0.00041252336473928455, 'samples': 21842432, 'steps': 42660, 'loss/train': 2.1213648319244385} +03/05/2022 15:37:19 - INFO - codeparrot_training - Step 42661: {'lr': 0.00041251933235037695, 'samples': 21842944, 'steps': 42661, 'loss/train': 1.4430707693099976} +03/05/2022 15:37:20 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/05/2022 15:37:25 - INFO - codeparrot_training - Step 42662: {'lr': 0.00041251529988824067, 'samples': 21843456, 'steps': 42662, 'loss/train': 0.15594890713691711} +03/05/2022 15:37:28 - INFO - codeparrot_training - Step 42663: {'lr': 0.0004125112673528775, 'samples': 21843968, 'steps': 42663, 'loss/train': 1.4359874725341797} +03/05/2022 15:37:28 - INFO - codeparrot_training - Skipping example with length 353 (seq_length=1024) +03/05/2022 15:37:33 - INFO - codeparrot_training - Step 42664: {'lr': 0.0004125072347442892, 'samples': 21844480, 'steps': 42664, 'loss/train': 1.8084276914596558} +03/05/2022 15:37:36 - INFO - codeparrot_training - Step 42665: {'lr': 0.0004125032020624776, 'samples': 21844992, 'steps': 42665, 'loss/train': 0.9795460104942322} +03/05/2022 15:37:36 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/05/2022 15:37:41 - INFO - codeparrot_training - Step 42666: {'lr': 0.0004124991693074447, 'samples': 21845504, 'steps': 42666, 'loss/train': 1.7954418659210205} +03/05/2022 15:37:45 - INFO - codeparrot_training - Step 42667: {'lr': 0.00041249513647919207, 'samples': 21846016, 'steps': 42667, 'loss/train': 1.1955927610397339} +03/05/2022 15:37:45 - INFO - codeparrot_training - Skipping example with length 539 (seq_length=1024) +03/05/2022 15:37:50 - INFO - codeparrot_training - Step 42668: {'lr': 0.00041249110357772167, 'samples': 21846528, 'steps': 42668, 'loss/train': 1.3253178596496582} +03/05/2022 15:37:53 - INFO - codeparrot_training - Step 42669: {'lr': 0.00041248707060303536, 'samples': 21847040, 'steps': 42669, 'loss/train': 1.6768676042556763} +03/05/2022 15:37:53 - INFO - codeparrot_training - Skipping example with length 218 (seq_length=1024) +03/05/2022 15:37:58 - INFO - codeparrot_training - Step 42670: {'lr': 0.00041248303755513484, 'samples': 21847552, 'steps': 42670, 'loss/train': 2.063546895980835} +03/05/2022 15:38:01 - INFO - codeparrot_training - Step 42671: {'lr': 0.00041247900443402194, 'samples': 21848064, 'steps': 42671, 'loss/train': 0.8799605965614319} +03/05/2022 15:38:02 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/05/2022 15:38:07 - INFO - codeparrot_training - Step 42672: {'lr': 0.00041247497123969844, 'samples': 21848576, 'steps': 42672, 'loss/train': 1.5178639888763428} +03/05/2022 15:38:10 - INFO - codeparrot_training - Step 42673: {'lr': 0.00041247093797216637, 'samples': 21849088, 'steps': 42673, 'loss/train': 1.8126683235168457} +03/05/2022 15:38:10 - INFO - codeparrot_training - Skipping example with length 273 (seq_length=1024) +03/05/2022 15:38:15 - INFO - codeparrot_training - Step 42674: {'lr': 0.00041246690463142733, 'samples': 21849600, 'steps': 42674, 'loss/train': 0.6818129420280457} +03/05/2022 15:38:18 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/05/2022 15:38:20 - INFO - codeparrot_training - Step 42675: {'lr': 0.0004124628712174833, 'samples': 21850112, 'steps': 42675, 'loss/train': 1.732179880142212} +03/05/2022 15:38:24 - INFO - codeparrot_training - Step 42676: {'lr': 0.0004124588377303359, 'samples': 21850624, 'steps': 42676, 'loss/train': 0.4207277297973633} +03/05/2022 15:38:26 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/05/2022 15:38:29 - INFO - codeparrot_training - Step 42677: {'lr': 0.00041245480416998704, 'samples': 21851136, 'steps': 42677, 'loss/train': 1.9485636949539185} +03/05/2022 15:38:32 - INFO - codeparrot_training - Step 42678: {'lr': 0.0004124507705364386, 'samples': 21851648, 'steps': 42678, 'loss/train': 1.9839813709259033} +03/05/2022 15:38:35 - INFO - codeparrot_training - Skipping example with length 956 (seq_length=1024) +03/05/2022 15:38:37 - INFO - codeparrot_training - Step 42679: {'lr': 0.0004124467368296924, 'samples': 21852160, 'steps': 42679, 'loss/train': 1.4094291925430298} +03/05/2022 15:38:41 - INFO - codeparrot_training - Step 42680: {'lr': 0.00041244270304975004, 'samples': 21852672, 'steps': 42680, 'loss/train': 1.4138314723968506} +03/05/2022 15:38:43 - INFO - codeparrot_training - Skipping example with length 834 (seq_length=1024) +03/05/2022 15:38:46 - INFO - codeparrot_training - Step 42681: {'lr': 0.0004124386691966137, 'samples': 21853184, 'steps': 42681, 'loss/train': 1.648011326789856} +03/05/2022 15:38:49 - INFO - codeparrot_training - Step 42682: {'lr': 0.00041243463527028493, 'samples': 21853696, 'steps': 42682, 'loss/train': 1.362849235534668} +03/05/2022 15:38:52 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/05/2022 15:38:54 - INFO - codeparrot_training - Step 42683: {'lr': 0.0004124306012707656, 'samples': 21854208, 'steps': 42683, 'loss/train': 1.96514892578125} +03/05/2022 15:38:58 - INFO - codeparrot_training - Step 42684: {'lr': 0.00041242656719805754, 'samples': 21854720, 'steps': 42684, 'loss/train': 0.2987481653690338} +03/05/2022 15:39:00 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/05/2022 15:39:03 - INFO - codeparrot_training - Step 42685: {'lr': 0.0004124225330521626, 'samples': 21855232, 'steps': 42685, 'loss/train': 1.2649592161178589} +03/05/2022 15:39:06 - INFO - codeparrot_training - Step 42686: {'lr': 0.0004124184988330826, 'samples': 21855744, 'steps': 42686, 'loss/train': 0.27233895659446716} +03/05/2022 15:39:09 - INFO - codeparrot_training - Skipping example with length 125 (seq_length=1024) +03/05/2022 15:39:11 - INFO - codeparrot_training - Step 42687: {'lr': 0.0004124144645408192, 'samples': 21856256, 'steps': 42687, 'loss/train': 0.8218533396720886} +03/05/2022 15:39:14 - INFO - codeparrot_training - Step 42688: {'lr': 0.0004124104301753745, 'samples': 21856768, 'steps': 42688, 'loss/train': 2.030330181121826} +03/05/2022 15:39:17 - INFO - codeparrot_training - Skipping example with length 171 (seq_length=1024) +03/05/2022 15:39:20 - INFO - codeparrot_training - Step 42689: {'lr': 0.0004124063957367501, 'samples': 21857280, 'steps': 42689, 'loss/train': 2.1226961612701416} +03/05/2022 15:39:23 - INFO - codeparrot_training - Step 42690: {'lr': 0.0004124023612249479, 'samples': 21857792, 'steps': 42690, 'loss/train': 1.7168673276901245} +03/05/2022 15:39:26 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/05/2022 15:39:28 - INFO - codeparrot_training - Step 42691: {'lr': 0.0004123983266399697, 'samples': 21858304, 'steps': 42691, 'loss/train': 1.4981063604354858} +03/05/2022 15:39:31 - INFO - codeparrot_training - Step 42692: {'lr': 0.0004123942919818173, 'samples': 21858816, 'steps': 42692, 'loss/train': 1.6874425411224365} +03/05/2022 15:39:34 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/05/2022 15:39:37 - INFO - codeparrot_training - Step 42693: {'lr': 0.00041239025725049256, 'samples': 21859328, 'steps': 42693, 'loss/train': 1.6723387241363525} +03/05/2022 15:39:40 - INFO - codeparrot_training - Step 42694: {'lr': 0.0004123862224459973, 'samples': 21859840, 'steps': 42694, 'loss/train': 1.9270416498184204} +03/05/2022 15:39:42 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/05/2022 15:39:45 - INFO - codeparrot_training - Step 42695: {'lr': 0.0004123821875683333, 'samples': 21860352, 'steps': 42695, 'loss/train': 2.2760426998138428} +03/05/2022 15:39:48 - INFO - codeparrot_training - Step 42696: {'lr': 0.0004123781526175023, 'samples': 21860864, 'steps': 42696, 'loss/train': 1.8160414695739746} +03/05/2022 15:39:51 - INFO - codeparrot_training - Skipping example with length 395 (seq_length=1024) +03/05/2022 15:39:53 - INFO - codeparrot_training - Step 42697: {'lr': 0.0004123741175935063, 'samples': 21861376, 'steps': 42697, 'loss/train': 1.3830658197402954} +03/05/2022 15:39:57 - INFO - codeparrot_training - Step 42698: {'lr': 0.000412370082496347, 'samples': 21861888, 'steps': 42698, 'loss/train': 2.0445711612701416} +03/05/2022 15:39:59 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/05/2022 15:40:02 - INFO - codeparrot_training - Step 42699: {'lr': 0.0004123660473260263, 'samples': 21862400, 'steps': 42699, 'loss/train': 1.601873517036438} +03/05/2022 15:40:05 - INFO - codeparrot_training - Step 42700: {'lr': 0.0004123620120825459, 'samples': 21862912, 'steps': 42700, 'loss/train': 2.14908504486084} +03/05/2022 15:40:08 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/05/2022 15:40:10 - INFO - codeparrot_training - Step 42701: {'lr': 0.00041235797676590776, 'samples': 21863424, 'steps': 42701, 'loss/train': 0.5090450644493103} +03/05/2022 15:40:14 - INFO - codeparrot_training - Step 42702: {'lr': 0.0004123539413761136, 'samples': 21863936, 'steps': 42702, 'loss/train': 2.2038376331329346} +03/05/2022 15:40:16 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/05/2022 15:40:19 - INFO - codeparrot_training - Step 42703: {'lr': 0.0004123499059131652, 'samples': 21864448, 'steps': 42703, 'loss/train': 1.02668035030365} +03/05/2022 15:40:22 - INFO - codeparrot_training - Step 42704: {'lr': 0.00041234587037706447, 'samples': 21864960, 'steps': 42704, 'loss/train': 1.4202343225479126} +03/05/2022 15:40:25 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/05/2022 15:40:28 - INFO - codeparrot_training - Step 42705: {'lr': 0.0004123418347678132, 'samples': 21865472, 'steps': 42705, 'loss/train': 1.851541519165039} +03/05/2022 15:40:31 - INFO - codeparrot_training - Step 42706: {'lr': 0.00041233779908541316, 'samples': 21865984, 'steps': 42706, 'loss/train': 1.4034013748168945} +03/05/2022 15:40:33 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/05/2022 15:40:36 - INFO - codeparrot_training - Step 42707: {'lr': 0.0004123337633298662, 'samples': 21866496, 'steps': 42707, 'loss/train': 0.8666094541549683} +03/05/2022 15:40:39 - INFO - codeparrot_training - Step 42708: {'lr': 0.0004123297275011743, 'samples': 21867008, 'steps': 42708, 'loss/train': 1.2090715169906616} +03/05/2022 15:40:42 - INFO - codeparrot_training - Skipping example with length 457 (seq_length=1024) +03/05/2022 15:40:44 - INFO - codeparrot_training - Step 42709: {'lr': 0.00041232569159933895, 'samples': 21867520, 'steps': 42709, 'loss/train': 1.7219743728637695} +03/05/2022 15:40:48 - INFO - codeparrot_training - Step 42710: {'lr': 0.00041232165562436225, 'samples': 21868032, 'steps': 42710, 'loss/train': 2.175124168395996} +03/05/2022 15:40:50 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/05/2022 15:40:53 - INFO - codeparrot_training - Step 42711: {'lr': 0.00041231761957624593, 'samples': 21868544, 'steps': 42711, 'loss/train': 2.863513946533203} +03/05/2022 15:40:56 - INFO - codeparrot_training - Step 42712: {'lr': 0.0004123135834549917, 'samples': 21869056, 'steps': 42712, 'loss/train': 1.7492223978042603} +03/05/2022 15:40:59 - INFO - codeparrot_training - Skipping example with length 253 (seq_length=1024) +03/05/2022 15:41:01 - INFO - codeparrot_training - Step 42713: {'lr': 0.00041230954726060155, 'samples': 21869568, 'steps': 42713, 'loss/train': 0.17172910273075104} +03/05/2022 15:41:05 - INFO - codeparrot_training - Step 42714: {'lr': 0.00041230551099307724, 'samples': 21870080, 'steps': 42714, 'loss/train': 1.8263894319534302} +03/05/2022 15:41:07 - INFO - codeparrot_training - Skipping example with length 188 (seq_length=1024) +03/05/2022 15:41:10 - INFO - codeparrot_training - Step 42715: {'lr': 0.0004123014746524205, 'samples': 21870592, 'steps': 42715, 'loss/train': 1.252118468284607} +03/05/2022 15:41:13 - INFO - codeparrot_training - Step 42716: {'lr': 0.0004122974382386333, 'samples': 21871104, 'steps': 42716, 'loss/train': 2.1624093055725098} +03/05/2022 15:41:16 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/05/2022 15:41:18 - INFO - codeparrot_training - Step 42717: {'lr': 0.00041229340175171733, 'samples': 21871616, 'steps': 42717, 'loss/train': 2.051084518432617} +03/05/2022 15:41:22 - INFO - codeparrot_training - Step 42718: {'lr': 0.00041228936519167446, 'samples': 21872128, 'steps': 42718, 'loss/train': 1.7689778804779053} +03/05/2022 15:41:24 - INFO - codeparrot_training - Skipping example with length 891 (seq_length=1024) +03/05/2022 15:41:27 - INFO - codeparrot_training - Step 42719: {'lr': 0.00041228532855850655, 'samples': 21872640, 'steps': 42719, 'loss/train': 1.4166237115859985} +03/05/2022 15:41:30 - INFO - codeparrot_training - Step 42720: {'lr': 0.0004122812918522153, 'samples': 21873152, 'steps': 42720, 'loss/train': 1.7485734224319458} +03/05/2022 15:41:33 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/05/2022 15:41:35 - INFO - codeparrot_training - Step 42721: {'lr': 0.0004122772550728027, 'samples': 21873664, 'steps': 42721, 'loss/train': 1.8916828632354736} +03/05/2022 15:41:38 - INFO - codeparrot_training - Step 42722: {'lr': 0.0004122732182202703, 'samples': 21874176, 'steps': 42722, 'loss/train': 1.2014061212539673} +03/05/2022 15:41:41 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/05/2022 15:41:44 - INFO - codeparrot_training - Step 42723: {'lr': 0.0004122691812946202, 'samples': 21874688, 'steps': 42723, 'loss/train': 1.3142404556274414} +03/05/2022 15:41:47 - INFO - codeparrot_training - Step 42724: {'lr': 0.00041226514429585417, 'samples': 21875200, 'steps': 42724, 'loss/train': 2.2698605060577393} +03/05/2022 15:41:49 - INFO - codeparrot_training - Skipping example with length 475 (seq_length=1024) +03/05/2022 15:41:52 - INFO - codeparrot_training - Step 42725: {'lr': 0.0004122611072239739, 'samples': 21875712, 'steps': 42725, 'loss/train': 1.5895013809204102} +03/05/2022 15:41:55 - INFO - codeparrot_training - Step 42726: {'lr': 0.00041225707007898127, 'samples': 21876224, 'steps': 42726, 'loss/train': 1.849104881286621} +03/05/2022 15:41:58 - INFO - codeparrot_training - Step 42727: {'lr': 0.0004122530328608781, 'samples': 21876736, 'steps': 42727, 'loss/train': 1.5859540700912476} +03/05/2022 15:41:59 - INFO - codeparrot_training - Skipping example with length 500 (seq_length=1024) +03/05/2022 15:42:04 - INFO - codeparrot_training - Step 42728: {'lr': 0.00041224899556966635, 'samples': 21877248, 'steps': 42728, 'loss/train': 1.7715801000595093} +03/05/2022 15:42:07 - INFO - codeparrot_training - Step 42729: {'lr': 0.00041224495820534757, 'samples': 21877760, 'steps': 42729, 'loss/train': 1.8469213247299194} +03/05/2022 15:42:07 - INFO - codeparrot_training - Skipping example with length 344 (seq_length=1024) +03/05/2022 15:42:13 - INFO - codeparrot_training - Step 42730: {'lr': 0.00041224092076792374, 'samples': 21878272, 'steps': 42730, 'loss/train': 1.386015772819519} +03/05/2022 15:42:16 - INFO - codeparrot_training - Step 42731: {'lr': 0.0004122368832573967, 'samples': 21878784, 'steps': 42731, 'loss/train': 1.1897765398025513} +03/05/2022 15:42:16 - INFO - codeparrot_training - Skipping example with length 127 (seq_length=1024) +03/05/2022 15:42:21 - INFO - codeparrot_training - Step 42732: {'lr': 0.00041223284567376816, 'samples': 21879296, 'steps': 42732, 'loss/train': 1.6038023233413696} +03/05/2022 15:42:24 - INFO - codeparrot_training - Step 42733: {'lr': 0.00041222880801704005, 'samples': 21879808, 'steps': 42733, 'loss/train': 1.625616431236267} +03/05/2022 15:42:25 - INFO - codeparrot_training - Skipping example with length 67 (seq_length=1024) +03/05/2022 15:42:30 - INFO - codeparrot_training - Step 42734: {'lr': 0.0004122247702872141, 'samples': 21880320, 'steps': 42734, 'loss/train': 2.000091314315796} +03/05/2022 15:42:33 - INFO - codeparrot_training - Step 42735: {'lr': 0.0004122207324842923, 'samples': 21880832, 'steps': 42735, 'loss/train': 1.4472936391830444} +03/05/2022 15:42:33 - INFO - codeparrot_training - Skipping example with length 42 (seq_length=1024) +03/05/2022 15:42:38 - INFO - codeparrot_training - Step 42736: {'lr': 0.00041221669460827614, 'samples': 21881344, 'steps': 42736, 'loss/train': 2.1857991218566895} +03/05/2022 15:42:41 - INFO - codeparrot_training - Step 42737: {'lr': 0.00041221265665916776, 'samples': 21881856, 'steps': 42737, 'loss/train': 1.8792146444320679} +03/05/2022 15:42:42 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 15:42:47 - INFO - codeparrot_training - Step 42738: {'lr': 0.00041220861863696886, 'samples': 21882368, 'steps': 42738, 'loss/train': 1.8833445310592651} +03/05/2022 15:42:50 - INFO - codeparrot_training - Step 42739: {'lr': 0.0004122045805416812, 'samples': 21882880, 'steps': 42739, 'loss/train': 2.2283530235290527} +03/05/2022 15:42:51 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/05/2022 15:42:55 - INFO - codeparrot_training - Step 42740: {'lr': 0.00041220054237330674, 'samples': 21883392, 'steps': 42740, 'loss/train': 1.7523986101150513} +03/05/2022 15:42:59 - INFO - codeparrot_training - Step 42741: {'lr': 0.00041219650413184714, 'samples': 21883904, 'steps': 42741, 'loss/train': 1.3041512966156006} +03/05/2022 15:42:59 - INFO - codeparrot_training - Skipping example with length 787 (seq_length=1024) +03/05/2022 15:43:04 - INFO - codeparrot_training - Step 42742: {'lr': 0.00041219246581730435, 'samples': 21884416, 'steps': 42742, 'loss/train': 2.254211902618408} +03/05/2022 15:43:07 - INFO - codeparrot_training - Step 42743: {'lr': 0.0004121884274296801, 'samples': 21884928, 'steps': 42743, 'loss/train': 1.5085549354553223} +03/05/2022 15:43:08 - INFO - codeparrot_training - Skipping example with length 147 (seq_length=1024) +03/05/2022 15:43:13 - INFO - codeparrot_training - Step 42744: {'lr': 0.00041218438896897623, 'samples': 21885440, 'steps': 42744, 'loss/train': 1.547971487045288} +03/05/2022 15:43:16 - INFO - codeparrot_training - Step 42745: {'lr': 0.00041218035043519464, 'samples': 21885952, 'steps': 42745, 'loss/train': 2.1144461631774902} +03/05/2022 15:43:19 - INFO - codeparrot_training - Step 42746: {'lr': 0.00041217631182833707, 'samples': 21886464, 'steps': 42746, 'loss/train': 1.9241397380828857} +03/05/2022 15:43:19 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/05/2022 15:43:25 - INFO - codeparrot_training - Step 42747: {'lr': 0.00041217227314840535, 'samples': 21886976, 'steps': 42747, 'loss/train': 0.9467313289642334} +03/05/2022 15:43:30 - INFO - codeparrot_training - Step 42748: {'lr': 0.00041216823439540134, 'samples': 21887488, 'steps': 42748, 'loss/train': 1.5583304166793823} +03/05/2022 15:43:33 - INFO - codeparrot_training - Step 42749: {'lr': 0.0004121641955693268, 'samples': 21888000, 'steps': 42749, 'loss/train': 0.7617923021316528} +03/05/2022 15:43:36 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/05/2022 15:43:38 - INFO - codeparrot_training - Step 42750: {'lr': 0.00041216015667018357, 'samples': 21888512, 'steps': 42750, 'loss/train': 2.350048780441284} +03/05/2022 15:43:42 - INFO - codeparrot_training - Step 42751: {'lr': 0.00041215611769797344, 'samples': 21889024, 'steps': 42751, 'loss/train': 1.6236004829406738} +03/05/2022 15:43:45 - INFO - codeparrot_training - Skipping example with length 738 (seq_length=1024) +03/05/2022 15:43:47 - INFO - codeparrot_training - Step 42752: {'lr': 0.00041215207865269833, 'samples': 21889536, 'steps': 42752, 'loss/train': 1.8739979267120361} +03/05/2022 15:43:50 - INFO - codeparrot_training - Step 42753: {'lr': 0.00041214803953435993, 'samples': 21890048, 'steps': 42753, 'loss/train': 0.5973657965660095} +03/05/2022 15:43:53 - INFO - codeparrot_training - Skipping example with length 767 (seq_length=1024) +03/05/2022 15:43:55 - INFO - codeparrot_training - Step 42754: {'lr': 0.0004121440003429602, 'samples': 21890560, 'steps': 42754, 'loss/train': 2.0812768936157227} +03/05/2022 15:43:59 - INFO - codeparrot_training - Step 42755: {'lr': 0.0004121399610785008, 'samples': 21891072, 'steps': 42755, 'loss/train': 2.3285913467407227} +03/05/2022 15:44:01 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/05/2022 15:44:04 - INFO - codeparrot_training - Step 42756: {'lr': 0.00041213592174098367, 'samples': 21891584, 'steps': 42756, 'loss/train': 1.6024234294891357} +03/05/2022 15:44:07 - INFO - codeparrot_training - Step 42757: {'lr': 0.00041213188233041065, 'samples': 21892096, 'steps': 42757, 'loss/train': 2.207092046737671} +03/05/2022 15:44:10 - INFO - codeparrot_training - Skipping example with length 926 (seq_length=1024) +03/05/2022 15:44:12 - INFO - codeparrot_training - Step 42758: {'lr': 0.00041212784284678345, 'samples': 21892608, 'steps': 42758, 'loss/train': 1.9851902723312378} +03/05/2022 15:44:15 - INFO - codeparrot_training - Step 42759: {'lr': 0.0004121238032901039, 'samples': 21893120, 'steps': 42759, 'loss/train': 1.4391775131225586} +03/05/2022 15:44:18 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/05/2022 15:44:21 - INFO - codeparrot_training - Step 42760: {'lr': 0.00041211976366037394, 'samples': 21893632, 'steps': 42760, 'loss/train': 1.590732455253601} +03/05/2022 15:44:24 - INFO - codeparrot_training - Step 42761: {'lr': 0.0004121157239575953, 'samples': 21894144, 'steps': 42761, 'loss/train': 1.5049831867218018} +03/05/2022 15:44:26 - INFO - codeparrot_training - Skipping example with length 789 (seq_length=1024) +03/05/2022 15:44:29 - INFO - codeparrot_training - Step 42762: {'lr': 0.0004121116841817699, 'samples': 21894656, 'steps': 42762, 'loss/train': 2.074350357055664} +03/05/2022 15:44:32 - INFO - codeparrot_training - Step 42763: {'lr': 0.00041210764433289936, 'samples': 21895168, 'steps': 42763, 'loss/train': 1.2239892482757568} +03/05/2022 15:44:35 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/05/2022 15:44:37 - INFO - codeparrot_training - Step 42764: {'lr': 0.0004121036044109856, 'samples': 21895680, 'steps': 42764, 'loss/train': 1.9655356407165527} +03/05/2022 15:44:41 - INFO - codeparrot_training - Step 42765: {'lr': 0.00041209956441603054, 'samples': 21896192, 'steps': 42765, 'loss/train': 1.6302776336669922} +03/05/2022 15:44:43 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/05/2022 15:44:46 - INFO - codeparrot_training - Step 42766: {'lr': 0.0004120955243480359, 'samples': 21896704, 'steps': 42766, 'loss/train': 2.19368839263916} +03/05/2022 15:44:49 - INFO - codeparrot_training - Step 42767: {'lr': 0.0004120914842070035, 'samples': 21897216, 'steps': 42767, 'loss/train': 1.770149827003479} +03/05/2022 15:44:52 - INFO - codeparrot_training - Step 42768: {'lr': 0.0004120874439929352, 'samples': 21897728, 'steps': 42768, 'loss/train': 1.7086412906646729} +03/05/2022 15:44:53 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/05/2022 15:44:58 - INFO - codeparrot_training - Step 42769: {'lr': 0.00041208340370583275, 'samples': 21898240, 'steps': 42769, 'loss/train': 2.256742477416992} +03/05/2022 15:45:01 - INFO - codeparrot_training - Step 42770: {'lr': 0.0004120793633456981, 'samples': 21898752, 'steps': 42770, 'loss/train': 1.6561298370361328} +03/05/2022 15:45:01 - INFO - codeparrot_training - Skipping example with length 729 (seq_length=1024) +03/05/2022 15:45:06 - INFO - codeparrot_training - Step 42771: {'lr': 0.0004120753229125329, 'samples': 21899264, 'steps': 42771, 'loss/train': 1.6714845895767212} +03/05/2022 15:45:09 - INFO - codeparrot_training - Step 42772: {'lr': 0.00041207128240633906, 'samples': 21899776, 'steps': 42772, 'loss/train': 1.1540937423706055} +03/05/2022 15:45:10 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/05/2022 15:45:15 - INFO - codeparrot_training - Step 42773: {'lr': 0.0004120672418271184, 'samples': 21900288, 'steps': 42773, 'loss/train': 1.5228469371795654} +03/05/2022 15:45:18 - INFO - codeparrot_training - Step 42774: {'lr': 0.0004120632011748728, 'samples': 21900800, 'steps': 42774, 'loss/train': 1.5609588623046875} +03/05/2022 15:45:18 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/05/2022 15:45:23 - INFO - codeparrot_training - Step 42775: {'lr': 0.00041205916044960406, 'samples': 21901312, 'steps': 42775, 'loss/train': 1.924135684967041} +03/05/2022 15:45:26 - INFO - codeparrot_training - Step 42776: {'lr': 0.0004120551196513139, 'samples': 21901824, 'steps': 42776, 'loss/train': 1.5275659561157227} +03/05/2022 15:45:26 - INFO - codeparrot_training - Skipping example with length 768 (seq_length=1024) +03/05/2022 15:45:31 - INFO - codeparrot_training - Step 42777: {'lr': 0.0004120510787800042, 'samples': 21902336, 'steps': 42777, 'loss/train': 1.9788434505462646} +03/05/2022 15:45:35 - INFO - codeparrot_training - Step 42778: {'lr': 0.0004120470378356768, 'samples': 21902848, 'steps': 42778, 'loss/train': 1.619396448135376} +03/05/2022 15:45:35 - INFO - codeparrot_training - Skipping example with length 810 (seq_length=1024) +03/05/2022 15:45:40 - INFO - codeparrot_training - Step 42779: {'lr': 0.00041204299681833344, 'samples': 21903360, 'steps': 42779, 'loss/train': 2.596681594848633} +03/05/2022 15:45:43 - INFO - codeparrot_training - Step 42780: {'lr': 0.00041203895572797613, 'samples': 21903872, 'steps': 42780, 'loss/train': 2.020738363265991} +03/05/2022 15:45:43 - INFO - codeparrot_training - Skipping example with length 552 (seq_length=1024) +03/05/2022 15:45:48 - INFO - codeparrot_training - Step 42781: {'lr': 0.00041203491456460653, 'samples': 21904384, 'steps': 42781, 'loss/train': 1.425352931022644} +03/05/2022 15:45:52 - INFO - codeparrot_training - Step 42782: {'lr': 0.00041203087332822644, 'samples': 21904896, 'steps': 42782, 'loss/train': 1.8406902551651} +03/05/2022 15:45:52 - INFO - codeparrot_training - Skipping example with length 537 (seq_length=1024) +03/05/2022 15:45:57 - INFO - codeparrot_training - Step 42783: {'lr': 0.0004120268320188378, 'samples': 21905408, 'steps': 42783, 'loss/train': 1.4105212688446045} +03/05/2022 15:46:00 - INFO - codeparrot_training - Step 42784: {'lr': 0.00041202279063644234, 'samples': 21905920, 'steps': 42784, 'loss/train': 2.1503026485443115} +03/05/2022 15:46:00 - INFO - codeparrot_training - Skipping example with length 1017 (seq_length=1024) +03/05/2022 15:46:05 - INFO - codeparrot_training - Step 42785: {'lr': 0.00041201874918104185, 'samples': 21906432, 'steps': 42785, 'loss/train': 0.555250346660614} +03/05/2022 15:46:09 - INFO - codeparrot_training - Step 42786: {'lr': 0.0004120147076526383, 'samples': 21906944, 'steps': 42786, 'loss/train': 1.325240969657898} +03/05/2022 15:46:09 - INFO - codeparrot_training - Skipping example with length 966 (seq_length=1024) +03/05/2022 15:46:14 - INFO - codeparrot_training - Step 42787: {'lr': 0.0004120106660512334, 'samples': 21907456, 'steps': 42787, 'loss/train': 1.6578264236450195} +03/05/2022 15:46:17 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/05/2022 15:46:19 - INFO - codeparrot_training - Step 42788: {'lr': 0.000412006624376829, 'samples': 21907968, 'steps': 42788, 'loss/train': 1.7829493284225464} +03/05/2022 15:46:22 - INFO - codeparrot_training - Step 42789: {'lr': 0.0004120025826294269, 'samples': 21908480, 'steps': 42789, 'loss/train': 1.9578344821929932} +03/05/2022 15:46:25 - INFO - codeparrot_training - Skipping example with length 100 (seq_length=1024) +03/05/2022 15:46:28 - INFO - codeparrot_training - Step 42790: {'lr': 0.00041199854080902897, 'samples': 21908992, 'steps': 42790, 'loss/train': 1.5284433364868164} +03/05/2022 15:46:31 - INFO - codeparrot_training - Step 42791: {'lr': 0.00041199449891563694, 'samples': 21909504, 'steps': 42791, 'loss/train': 1.8284574747085571} +03/05/2022 15:46:34 - INFO - codeparrot_training - Skipping example with length 655 (seq_length=1024) +03/05/2022 15:46:36 - INFO - codeparrot_training - Step 42792: {'lr': 0.00041199045694925273, 'samples': 21910016, 'steps': 42792, 'loss/train': 1.6553776264190674} +03/05/2022 15:46:39 - INFO - codeparrot_training - Step 42793: {'lr': 0.0004119864149098781, 'samples': 21910528, 'steps': 42793, 'loss/train': 2.029451608657837} +03/05/2022 15:46:42 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/05/2022 15:46:45 - INFO - codeparrot_training - Step 42794: {'lr': 0.0004119823727975149, 'samples': 21911040, 'steps': 42794, 'loss/train': 1.702526569366455} +03/05/2022 15:46:48 - INFO - codeparrot_training - Step 42795: {'lr': 0.00041197833061216494, 'samples': 21911552, 'steps': 42795, 'loss/train': 1.8386863470077515} +03/05/2022 15:46:51 - INFO - codeparrot_training - Step 42796: {'lr': 0.00041197428835383, 'samples': 21912064, 'steps': 42796, 'loss/train': 0.16086485981941223} +03/05/2022 15:46:51 - INFO - codeparrot_training - Skipping example with length 294 (seq_length=1024) +03/05/2022 15:46:56 - INFO - codeparrot_training - Step 42797: {'lr': 0.00041197024602251204, 'samples': 21912576, 'steps': 42797, 'loss/train': 2.0274221897125244} +03/05/2022 15:46:59 - INFO - codeparrot_training - Step 42798: {'lr': 0.0004119662036182127, 'samples': 21913088, 'steps': 42798, 'loss/train': 0.7845781445503235} +03/05/2022 15:47:00 - INFO - codeparrot_training - Skipping example with length 796 (seq_length=1024) +03/05/2022 15:47:05 - INFO - codeparrot_training - Step 42799: {'lr': 0.00041196216114093397, 'samples': 21913600, 'steps': 42799, 'loss/train': 1.8740154504776} +03/05/2022 15:47:08 - INFO - codeparrot_training - Step 42800: {'lr': 0.00041195811859067756, 'samples': 21914112, 'steps': 42800, 'loss/train': 1.9704105854034424} +03/05/2022 15:47:08 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/05/2022 15:47:13 - INFO - codeparrot_training - Step 42801: {'lr': 0.0004119540759674453, 'samples': 21914624, 'steps': 42801, 'loss/train': 1.5315043926239014} +03/05/2022 15:47:17 - INFO - codeparrot_training - Step 42802: {'lr': 0.000411950033271239, 'samples': 21915136, 'steps': 42802, 'loss/train': 1.0004606246948242} +03/05/2022 15:47:17 - INFO - codeparrot_training - Skipping example with length 231 (seq_length=1024) +03/05/2022 15:47:22 - INFO - codeparrot_training - Step 42803: {'lr': 0.0004119459905020606, 'samples': 21915648, 'steps': 42803, 'loss/train': 2.0930356979370117} +03/05/2022 15:47:25 - INFO - codeparrot_training - Step 42804: {'lr': 0.0004119419476599118, 'samples': 21916160, 'steps': 42804, 'loss/train': 1.023355484008789} +03/05/2022 15:47:26 - INFO - codeparrot_training - Skipping example with length 866 (seq_length=1024) +03/05/2022 15:47:30 - INFO - codeparrot_training - Step 42805: {'lr': 0.0004119379047447944, 'samples': 21916672, 'steps': 42805, 'loss/train': 0.625810444355011} +03/05/2022 15:47:34 - INFO - codeparrot_training - Step 42806: {'lr': 0.00041193386175671033, 'samples': 21917184, 'steps': 42806, 'loss/train': 1.6625497341156006} +03/05/2022 15:47:34 - INFO - codeparrot_training - Skipping example with length 977 (seq_length=1024) +03/05/2022 15:47:39 - INFO - codeparrot_training - Step 42807: {'lr': 0.0004119298186956613, 'samples': 21917696, 'steps': 42807, 'loss/train': 2.101712226867676} +03/05/2022 15:47:42 - INFO - codeparrot_training - Step 42808: {'lr': 0.00041192577556164924, 'samples': 21918208, 'steps': 42808, 'loss/train': 1.9134345054626465} +03/05/2022 15:47:42 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/05/2022 15:47:47 - INFO - codeparrot_training - Step 42809: {'lr': 0.000411921732354676, 'samples': 21918720, 'steps': 42809, 'loss/train': 1.7600204944610596} +03/05/2022 15:47:50 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/05/2022 15:47:53 - INFO - codeparrot_training - Step 42810: {'lr': 0.00041191768907474326, 'samples': 21919232, 'steps': 42810, 'loss/train': 1.8339204788208008} +03/05/2022 15:47:56 - INFO - codeparrot_training - Step 42811: {'lr': 0.00041191364572185286, 'samples': 21919744, 'steps': 42811, 'loss/train': 2.1058924198150635} +03/05/2022 15:47:59 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/05/2022 15:48:01 - INFO - codeparrot_training - Step 42812: {'lr': 0.0004119096022960067, 'samples': 21920256, 'steps': 42812, 'loss/train': 1.8266011476516724} +03/05/2022 15:48:04 - INFO - codeparrot_training - Step 42813: {'lr': 0.0004119055587972066, 'samples': 21920768, 'steps': 42813, 'loss/train': 1.161710500717163} +03/05/2022 15:48:07 - INFO - codeparrot_training - Skipping example with length 593 (seq_length=1024) +03/05/2022 15:48:10 - INFO - codeparrot_training - Step 42814: {'lr': 0.0004119015152254543, 'samples': 21921280, 'steps': 42814, 'loss/train': 1.6253845691680908} +03/05/2022 15:48:13 - INFO - codeparrot_training - Step 42815: {'lr': 0.00041189747158075176, 'samples': 21921792, 'steps': 42815, 'loss/train': 1.743550181388855} +03/05/2022 15:48:16 - INFO - codeparrot_training - Step 42816: {'lr': 0.00041189342786310067, 'samples': 21922304, 'steps': 42816, 'loss/train': 0.4571826159954071} +03/05/2022 15:48:17 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/05/2022 15:48:22 - INFO - codeparrot_training - Step 42817: {'lr': 0.0004118893840725029, 'samples': 21922816, 'steps': 42817, 'loss/train': 0.9150810241699219} +03/05/2022 15:48:25 - INFO - codeparrot_training - Step 42818: {'lr': 0.0004118853402089603, 'samples': 21923328, 'steps': 42818, 'loss/train': 2.244621992111206} +03/05/2022 15:48:25 - INFO - codeparrot_training - Skipping example with length 626 (seq_length=1024) +03/05/2022 15:48:30 - INFO - codeparrot_training - Step 42819: {'lr': 0.0004118812962724746, 'samples': 21923840, 'steps': 42819, 'loss/train': 1.1738590002059937} +03/05/2022 15:48:33 - INFO - codeparrot_training - Step 42820: {'lr': 0.00041187725226304775, 'samples': 21924352, 'steps': 42820, 'loss/train': 2.191650152206421} +03/05/2022 15:48:34 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/05/2022 15:48:38 - INFO - codeparrot_training - Step 42821: {'lr': 0.0004118732081806814, 'samples': 21924864, 'steps': 42821, 'loss/train': 1.415942668914795} +03/05/2022 15:48:42 - INFO - codeparrot_training - Step 42822: {'lr': 0.0004118691640253777, 'samples': 21925376, 'steps': 42822, 'loss/train': 1.2382804155349731} +03/05/2022 15:48:42 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/05/2022 15:48:47 - INFO - codeparrot_training - Step 42823: {'lr': 0.00041186511979713806, 'samples': 21925888, 'steps': 42823, 'loss/train': 1.3007097244262695} +03/05/2022 15:48:51 - INFO - codeparrot_training - Step 42824: {'lr': 0.00041186107549596453, 'samples': 21926400, 'steps': 42824, 'loss/train': 2.3830666542053223} +03/05/2022 15:48:53 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/05/2022 15:48:56 - INFO - codeparrot_training - Step 42825: {'lr': 0.0004118570311218589, 'samples': 21926912, 'steps': 42825, 'loss/train': 1.7277246713638306} +03/05/2022 15:48:59 - INFO - codeparrot_training - Step 42826: {'lr': 0.00041185298667482294, 'samples': 21927424, 'steps': 42826, 'loss/train': 1.489262580871582} +03/05/2022 15:49:02 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/05/2022 15:49:04 - INFO - codeparrot_training - Step 42827: {'lr': 0.0004118489421548586, 'samples': 21927936, 'steps': 42827, 'loss/train': 1.666790246963501} +03/05/2022 15:49:08 - INFO - codeparrot_training - Step 42828: {'lr': 0.00041184489756196764, 'samples': 21928448, 'steps': 42828, 'loss/train': 1.3520114421844482} +03/05/2022 15:49:10 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/05/2022 15:49:13 - INFO - codeparrot_training - Step 42829: {'lr': 0.0004118408528961519, 'samples': 21928960, 'steps': 42829, 'loss/train': 1.8272122144699097} +03/05/2022 15:49:16 - INFO - codeparrot_training - Step 42830: {'lr': 0.00041183680815741307, 'samples': 21929472, 'steps': 42830, 'loss/train': 1.4889106750488281} +03/05/2022 15:49:18 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/05/2022 15:49:21 - INFO - codeparrot_training - Step 42831: {'lr': 0.0004118327633457531, 'samples': 21929984, 'steps': 42831, 'loss/train': 2.300328016281128} +03/05/2022 15:49:24 - INFO - codeparrot_training - Step 42832: {'lr': 0.00041182871846117373, 'samples': 21930496, 'steps': 42832, 'loss/train': 1.9306714534759521} +03/05/2022 15:49:27 - INFO - codeparrot_training - Skipping example with length 753 (seq_length=1024) +03/05/2022 15:49:30 - INFO - codeparrot_training - Step 42833: {'lr': 0.0004118246735036769, 'samples': 21931008, 'steps': 42833, 'loss/train': 0.5739752054214478} +03/05/2022 15:49:33 - INFO - codeparrot_training - Step 42834: {'lr': 0.0004118206284732644, 'samples': 21931520, 'steps': 42834, 'loss/train': 1.9829003810882568} +03/05/2022 15:49:35 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/05/2022 15:49:38 - INFO - codeparrot_training - Step 42835: {'lr': 0.000411816583369938, 'samples': 21932032, 'steps': 42835, 'loss/train': 1.2517482042312622} +03/05/2022 15:49:41 - INFO - codeparrot_training - Step 42836: {'lr': 0.0004118125381936996, 'samples': 21932544, 'steps': 42836, 'loss/train': 1.5283795595169067} +03/05/2022 15:49:44 - INFO - codeparrot_training - Skipping example with length 940 (seq_length=1024) +03/05/2022 15:49:47 - INFO - codeparrot_training - Step 42837: {'lr': 0.0004118084929445508, 'samples': 21933056, 'steps': 42837, 'loss/train': 0.88991779088974} +03/05/2022 15:49:50 - INFO - codeparrot_training - Step 42838: {'lr': 0.0004118044476224937, 'samples': 21933568, 'steps': 42838, 'loss/train': 1.7270742654800415} +03/05/2022 15:49:52 - INFO - codeparrot_training - Skipping example with length 122 (seq_length=1024) +03/05/2022 15:49:55 - INFO - codeparrot_training - Step 42839: {'lr': 0.00041180040222753, 'samples': 21934080, 'steps': 42839, 'loss/train': 1.8535641431808472} +03/05/2022 15:49:58 - INFO - codeparrot_training - Step 42840: {'lr': 0.00041179635675966155, 'samples': 21934592, 'steps': 42840, 'loss/train': 1.3298977613449097} +03/05/2022 15:50:01 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/05/2022 15:50:03 - INFO - codeparrot_training - Step 42841: {'lr': 0.00041179231121889014, 'samples': 21935104, 'steps': 42841, 'loss/train': 1.8178684711456299} +03/05/2022 15:50:07 - INFO - codeparrot_training - Step 42842: {'lr': 0.0004117882656052176, 'samples': 21935616, 'steps': 42842, 'loss/train': 2.0543134212493896} +03/05/2022 15:50:09 - INFO - codeparrot_training - Skipping example with length 460 (seq_length=1024) +03/05/2022 15:50:12 - INFO - codeparrot_training - Step 42843: {'lr': 0.0004117842199186458, 'samples': 21936128, 'steps': 42843, 'loss/train': 1.6085585355758667} +03/05/2022 15:50:15 - INFO - codeparrot_training - Step 42844: {'lr': 0.00041178017415917655, 'samples': 21936640, 'steps': 42844, 'loss/train': 1.1796447038650513} +03/05/2022 15:50:17 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/05/2022 15:50:20 - INFO - codeparrot_training - Step 42845: {'lr': 0.00041177612832681156, 'samples': 21937152, 'steps': 42845, 'loss/train': 1.789637565612793} +03/05/2022 15:50:23 - INFO - codeparrot_training - Step 42846: {'lr': 0.00041177208242155285, 'samples': 21937664, 'steps': 42846, 'loss/train': 3.184438943862915} +03/05/2022 15:50:26 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/05/2022 15:50:29 - INFO - codeparrot_training - Step 42847: {'lr': 0.000411768036443402, 'samples': 21938176, 'steps': 42847, 'loss/train': 1.4647876024246216} +03/05/2022 15:50:32 - INFO - codeparrot_training - Step 42848: {'lr': 0.0004117639903923611, 'samples': 21938688, 'steps': 42848, 'loss/train': 1.1802353858947754} +03/05/2022 15:50:34 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/05/2022 15:50:37 - INFO - codeparrot_training - Step 42849: {'lr': 0.00041175994426843177, 'samples': 21939200, 'steps': 42849, 'loss/train': 1.9439866542816162} +03/05/2022 15:50:40 - INFO - codeparrot_training - Step 42850: {'lr': 0.00041175589807161597, 'samples': 21939712, 'steps': 42850, 'loss/train': 1.4544365406036377} +03/05/2022 15:50:43 - INFO - codeparrot_training - Skipping example with length 358 (seq_length=1024) +03/05/2022 15:50:46 - INFO - codeparrot_training - Step 42851: {'lr': 0.0004117518518019154, 'samples': 21940224, 'steps': 42851, 'loss/train': 1.6293222904205322} +03/05/2022 15:50:49 - INFO - codeparrot_training - Step 42852: {'lr': 0.00041174780545933195, 'samples': 21940736, 'steps': 42852, 'loss/train': 1.5851179361343384} +03/05/2022 15:50:52 - INFO - codeparrot_training - Step 42853: {'lr': 0.0004117437590438674, 'samples': 21941248, 'steps': 42853, 'loss/train': 0.5099883675575256} +03/05/2022 15:50:52 - INFO - codeparrot_training - Skipping example with length 851 (seq_length=1024) +03/05/2022 15:50:58 - INFO - codeparrot_training - Step 42854: {'lr': 0.0004117397125555237, 'samples': 21941760, 'steps': 42854, 'loss/train': 1.437559723854065} +03/05/2022 15:51:01 - INFO - codeparrot_training - Step 42855: {'lr': 0.00041173566599430245, 'samples': 21942272, 'steps': 42855, 'loss/train': 1.9179950952529907} +03/05/2022 15:51:01 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/05/2022 15:51:06 - INFO - codeparrot_training - Step 42856: {'lr': 0.00041173161936020573, 'samples': 21942784, 'steps': 42856, 'loss/train': 2.434746265411377} +03/05/2022 15:51:09 - INFO - codeparrot_training - Step 42857: {'lr': 0.0004117275726532352, 'samples': 21943296, 'steps': 42857, 'loss/train': 1.7837374210357666} +03/05/2022 15:51:09 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/05/2022 15:51:15 - INFO - codeparrot_training - Step 42858: {'lr': 0.0004117235258733927, 'samples': 21943808, 'steps': 42858, 'loss/train': 1.0763053894042969} +03/05/2022 15:51:17 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/05/2022 15:51:20 - INFO - codeparrot_training - Step 42859: {'lr': 0.00041171947902068006, 'samples': 21944320, 'steps': 42859, 'loss/train': 1.4975147247314453} +03/05/2022 15:51:23 - INFO - codeparrot_training - Step 42860: {'lr': 0.00041171543209509923, 'samples': 21944832, 'steps': 42860, 'loss/train': 1.8779345750808716} +03/05/2022 15:51:26 - INFO - codeparrot_training - Step 42861: {'lr': 0.0004117113850966517, 'samples': 21945344, 'steps': 42861, 'loss/train': 1.1424118280410767} +03/05/2022 15:51:27 - INFO - codeparrot_training - Skipping example with length 288 (seq_length=1024) +03/05/2022 15:51:32 - INFO - codeparrot_training - Step 42862: {'lr': 0.00041170733802533974, 'samples': 21945856, 'steps': 42862, 'loss/train': 1.5835983753204346} +03/05/2022 15:51:35 - INFO - codeparrot_training - Step 42863: {'lr': 0.0004117032908811649, 'samples': 21946368, 'steps': 42863, 'loss/train': 1.9538789987564087} +03/05/2022 15:51:36 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/05/2022 15:51:40 - INFO - codeparrot_training - Step 42864: {'lr': 0.000411699243664129, 'samples': 21946880, 'steps': 42864, 'loss/train': 1.421123743057251} +03/05/2022 15:51:43 - INFO - codeparrot_training - Step 42865: {'lr': 0.00041169519637423394, 'samples': 21947392, 'steps': 42865, 'loss/train': 1.6040817499160767} +03/05/2022 15:51:44 - INFO - codeparrot_training - Skipping example with length 58 (seq_length=1024) +03/05/2022 15:51:49 - INFO - codeparrot_training - Step 42866: {'lr': 0.0004116911490114815, 'samples': 21947904, 'steps': 42866, 'loss/train': 1.2416244745254517} +03/05/2022 15:51:52 - INFO - codeparrot_training - Step 42867: {'lr': 0.0004116871015758735, 'samples': 21948416, 'steps': 42867, 'loss/train': 1.6972624063491821} +03/05/2022 15:51:52 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/05/2022 15:51:57 - INFO - codeparrot_training - Step 42868: {'lr': 0.0004116830540674118, 'samples': 21948928, 'steps': 42868, 'loss/train': 1.759318470954895} +03/05/2022 15:52:00 - INFO - codeparrot_training - Step 42869: {'lr': 0.00041167900648609825, 'samples': 21949440, 'steps': 42869, 'loss/train': 0.9126895070075989} +03/05/2022 15:52:01 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/05/2022 15:52:05 - INFO - codeparrot_training - Step 42870: {'lr': 0.00041167495883193464, 'samples': 21949952, 'steps': 42870, 'loss/train': 2.0677406787872314} +03/05/2022 15:52:09 - INFO - codeparrot_training - Step 42871: {'lr': 0.00041167091110492273, 'samples': 21950464, 'steps': 42871, 'loss/train': 2.0678718090057373} +03/05/2022 15:52:09 - INFO - codeparrot_training - Skipping example with length 554 (seq_length=1024) +03/05/2022 15:52:14 - INFO - codeparrot_training - Step 42872: {'lr': 0.0004116668633050644, 'samples': 21950976, 'steps': 42872, 'loss/train': 0.8636013865470886} +03/05/2022 15:52:17 - INFO - codeparrot_training - Step 42873: {'lr': 0.0004116628154323616, 'samples': 21951488, 'steps': 42873, 'loss/train': 1.210692048072815} +03/05/2022 15:52:18 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/05/2022 15:52:22 - INFO - codeparrot_training - Step 42874: {'lr': 0.0004116587674868159, 'samples': 21952000, 'steps': 42874, 'loss/train': 1.8437429666519165} +03/05/2022 15:52:25 - INFO - codeparrot_training - Step 42875: {'lr': 0.00041165471946842924, 'samples': 21952512, 'steps': 42875, 'loss/train': 1.5773921012878418} +03/05/2022 15:52:31 - INFO - codeparrot_training - Step 42876: {'lr': 0.00041165067137720356, 'samples': 21953024, 'steps': 42876, 'loss/train': 2.0570290088653564} +03/05/2022 15:52:34 - INFO - codeparrot_training - Step 42877: {'lr': 0.00041164662321314054, 'samples': 21953536, 'steps': 42877, 'loss/train': 1.7495417594909668} +03/05/2022 15:52:35 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/05/2022 15:52:39 - INFO - codeparrot_training - Step 42878: {'lr': 0.000411642574976242, 'samples': 21954048, 'steps': 42878, 'loss/train': 1.7820298671722412} +03/05/2022 15:52:42 - INFO - codeparrot_training - Step 42879: {'lr': 0.0004116385266665099, 'samples': 21954560, 'steps': 42879, 'loss/train': 1.6945050954818726} +03/05/2022 15:52:43 - INFO - codeparrot_training - Skipping example with length 85 (seq_length=1024) +03/05/2022 15:52:48 - INFO - codeparrot_training - Step 42880: {'lr': 0.0004116344782839459, 'samples': 21955072, 'steps': 42880, 'loss/train': 1.6115431785583496} +03/05/2022 15:52:51 - INFO - codeparrot_training - Step 42881: {'lr': 0.00041163042982855194, 'samples': 21955584, 'steps': 42881, 'loss/train': 2.0423243045806885} +03/05/2022 15:52:52 - INFO - codeparrot_training - Skipping example with length 841 (seq_length=1024) +03/05/2022 15:52:56 - INFO - codeparrot_training - Step 42882: {'lr': 0.00041162638130032975, 'samples': 21956096, 'steps': 42882, 'loss/train': 1.9637259244918823} +03/05/2022 15:52:59 - INFO - codeparrot_training - Step 42883: {'lr': 0.00041162233269928126, 'samples': 21956608, 'steps': 42883, 'loss/train': 1.4328540563583374} +03/05/2022 15:53:00 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/05/2022 15:53:05 - INFO - codeparrot_training - Step 42884: {'lr': 0.0004116182840254082, 'samples': 21957120, 'steps': 42884, 'loss/train': 1.6122262477874756} +03/05/2022 15:53:08 - INFO - codeparrot_training - Step 42885: {'lr': 0.0004116142352787125, 'samples': 21957632, 'steps': 42885, 'loss/train': 1.80718994140625} +03/05/2022 15:53:09 - INFO - codeparrot_training - Skipping example with length 520 (seq_length=1024) +03/05/2022 15:53:13 - INFO - codeparrot_training - Step 42886: {'lr': 0.00041161018645919593, 'samples': 21958144, 'steps': 42886, 'loss/train': 1.8426876068115234} +03/05/2022 15:53:17 - INFO - codeparrot_training - Step 42887: {'lr': 0.00041160613756686015, 'samples': 21958656, 'steps': 42887, 'loss/train': 1.6919313669204712} +03/05/2022 15:53:17 - INFO - codeparrot_training - Skipping example with length 858 (seq_length=1024) +03/05/2022 15:53:22 - INFO - codeparrot_training - Step 42888: {'lr': 0.00041160208860170725, 'samples': 21959168, 'steps': 42888, 'loss/train': 1.940449595451355} +03/05/2022 15:53:25 - INFO - codeparrot_training - Step 42889: {'lr': 0.000411598039563739, 'samples': 21959680, 'steps': 42889, 'loss/train': 2.4017205238342285} +03/05/2022 15:53:26 - INFO - codeparrot_training - Skipping example with length 141 (seq_length=1024) +03/05/2022 15:53:30 - INFO - codeparrot_training - Step 42890: {'lr': 0.0004115939904529571, 'samples': 21960192, 'steps': 42890, 'loss/train': 1.8484723567962646} +03/05/2022 15:53:33 - INFO - codeparrot_training - Step 42891: {'lr': 0.00041158994126936347, 'samples': 21960704, 'steps': 42891, 'loss/train': 2.2017900943756104} +03/05/2022 15:53:34 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/05/2022 15:53:39 - INFO - codeparrot_training - Step 42892: {'lr': 0.0004115858920129598, 'samples': 21961216, 'steps': 42892, 'loss/train': 1.8025238513946533} +03/05/2022 15:53:42 - INFO - codeparrot_training - Step 42893: {'lr': 0.0004115818426837481, 'samples': 21961728, 'steps': 42893, 'loss/train': 2.434748649597168} +03/05/2022 15:53:42 - INFO - codeparrot_training - Skipping example with length 44 (seq_length=1024) +03/05/2022 15:53:47 - INFO - codeparrot_training - Step 42894: {'lr': 0.0004115777932817301, 'samples': 21962240, 'steps': 42894, 'loss/train': 0.6370025873184204} +03/05/2022 15:53:50 - INFO - codeparrot_training - Step 42895: {'lr': 0.00041157374380690765, 'samples': 21962752, 'steps': 42895, 'loss/train': 0.6113659143447876} +03/05/2022 15:53:50 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/05/2022 15:53:55 - INFO - codeparrot_training - Step 42896: {'lr': 0.0004115696942592826, 'samples': 21963264, 'steps': 42896, 'loss/train': 2.404186964035034} +03/05/2022 15:53:59 - INFO - codeparrot_training - Step 42897: {'lr': 0.0004115656446388567, 'samples': 21963776, 'steps': 42897, 'loss/train': 1.7795010805130005} +03/05/2022 15:53:59 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/05/2022 15:54:04 - INFO - codeparrot_training - Step 42898: {'lr': 0.00041156159494563183, 'samples': 21964288, 'steps': 42898, 'loss/train': 1.865553855895996} +03/05/2022 15:54:07 - INFO - codeparrot_training - Step 42899: {'lr': 0.00041155754517960974, 'samples': 21964800, 'steps': 42899, 'loss/train': 1.6442469358444214} +03/05/2022 15:54:07 - INFO - codeparrot_training - Skipping example with length 692 (seq_length=1024) +03/05/2022 15:54:12 - INFO - codeparrot_training - Step 42900: {'lr': 0.00041155349534079236, 'samples': 21965312, 'steps': 42900, 'loss/train': 1.4499289989471436} +03/05/2022 15:54:15 - INFO - codeparrot_training - Skipping example with length 187 (seq_length=1024) +03/05/2022 15:54:18 - INFO - codeparrot_training - Step 42901: {'lr': 0.0004115494454291815, 'samples': 21965824, 'steps': 42901, 'loss/train': 1.2988823652267456} +03/05/2022 15:54:21 - INFO - codeparrot_training - Step 42902: {'lr': 0.0004115453954447789, 'samples': 21966336, 'steps': 42902, 'loss/train': 0.23858216404914856} +03/05/2022 15:54:24 - INFO - codeparrot_training - Step 42903: {'lr': 0.0004115413453875865, 'samples': 21966848, 'steps': 42903, 'loss/train': 1.2502669095993042} +03/05/2022 15:54:24 - INFO - codeparrot_training - Skipping example with length 916 (seq_length=1024) +03/05/2022 15:54:30 - INFO - codeparrot_training - Step 42904: {'lr': 0.000411537295257606, 'samples': 21967360, 'steps': 42904, 'loss/train': 1.7900233268737793} +03/05/2022 15:54:32 - INFO - codeparrot_training - Skipping example with length 495 (seq_length=1024) +03/05/2022 15:54:35 - INFO - codeparrot_training - Step 42905: {'lr': 0.00041153324505483933, 'samples': 21967872, 'steps': 42905, 'loss/train': 1.5736699104309082} +03/05/2022 15:54:38 - INFO - codeparrot_training - Step 42906: {'lr': 0.0004115291947792882, 'samples': 21968384, 'steps': 42906, 'loss/train': 1.9686124324798584} +03/05/2022 15:54:41 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/05/2022 15:54:44 - INFO - codeparrot_training - Step 42907: {'lr': 0.00041152514443095454, 'samples': 21968896, 'steps': 42907, 'loss/train': 2.004068374633789} +03/05/2022 15:54:47 - INFO - codeparrot_training - Step 42908: {'lr': 0.00041152109400984015, 'samples': 21969408, 'steps': 42908, 'loss/train': 2.269676685333252} +03/05/2022 15:54:51 - INFO - codeparrot_training - Step 42909: {'lr': 0.0004115170435159469, 'samples': 21969920, 'steps': 42909, 'loss/train': 1.0077173709869385} +03/05/2022 15:54:54 - INFO - codeparrot_training - Step 42910: {'lr': 0.00041151299294927657, 'samples': 21970432, 'steps': 42910, 'loss/train': 2.0882325172424316} +03/05/2022 15:54:54 - INFO - codeparrot_training - Skipping example with length 280 (seq_length=1024) +03/05/2022 15:54:59 - INFO - codeparrot_training - Step 42911: {'lr': 0.0004115089423098309, 'samples': 21970944, 'steps': 42911, 'loss/train': 1.1544673442840576} +03/05/2022 15:55:02 - INFO - codeparrot_training - Step 42912: {'lr': 0.00041150489159761186, 'samples': 21971456, 'steps': 42912, 'loss/train': 1.2389448881149292} +03/05/2022 15:55:03 - INFO - codeparrot_training - Skipping example with length 850 (seq_length=1024) +03/05/2022 15:55:07 - INFO - codeparrot_training - Step 42913: {'lr': 0.00041150084081262105, 'samples': 21971968, 'steps': 42913, 'loss/train': 1.8110857009887695} +03/05/2022 15:55:11 - INFO - codeparrot_training - Step 42914: {'lr': 0.0004114967899548606, 'samples': 21972480, 'steps': 42914, 'loss/train': 2.040325403213501} +03/05/2022 15:55:11 - INFO - codeparrot_training - Skipping example with length 651 (seq_length=1024) +03/05/2022 15:55:16 - INFO - codeparrot_training - Step 42915: {'lr': 0.0004114927390243322, 'samples': 21972992, 'steps': 42915, 'loss/train': 1.530531406402588} +03/05/2022 15:55:19 - INFO - codeparrot_training - Step 42916: {'lr': 0.00041148868802103766, 'samples': 21973504, 'steps': 42916, 'loss/train': 1.8204318284988403} +03/05/2022 15:55:20 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/05/2022 15:55:24 - INFO - codeparrot_training - Step 42917: {'lr': 0.00041148463694497874, 'samples': 21974016, 'steps': 42917, 'loss/train': 1.5080429315567017} +03/05/2022 15:55:28 - INFO - codeparrot_training - Step 42918: {'lr': 0.00041148058579615733, 'samples': 21974528, 'steps': 42918, 'loss/train': 1.2990598678588867} +03/05/2022 15:55:28 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/05/2022 15:55:33 - INFO - codeparrot_training - Step 42919: {'lr': 0.00041147653457457534, 'samples': 21975040, 'steps': 42919, 'loss/train': 1.4375979900360107} +03/05/2022 15:55:36 - INFO - codeparrot_training - Step 42920: {'lr': 0.0004114724832802345, 'samples': 21975552, 'steps': 42920, 'loss/train': 0.32137230038642883} +03/05/2022 15:55:38 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/05/2022 15:55:41 - INFO - codeparrot_training - Step 42921: {'lr': 0.0004114684319131366, 'samples': 21976064, 'steps': 42921, 'loss/train': 1.8669859170913696} +03/05/2022 15:55:45 - INFO - codeparrot_training - Step 42922: {'lr': 0.00041146438047328347, 'samples': 21976576, 'steps': 42922, 'loss/train': 1.2115890979766846} +03/05/2022 15:55:46 - INFO - codeparrot_training - Skipping example with length 845 (seq_length=1024) +03/05/2022 15:55:50 - INFO - codeparrot_training - Step 42923: {'lr': 0.0004114603289606771, 'samples': 21977088, 'steps': 42923, 'loss/train': 1.4692821502685547} +03/05/2022 15:55:53 - INFO - codeparrot_training - Step 42924: {'lr': 0.00041145627737531915, 'samples': 21977600, 'steps': 42924, 'loss/train': 1.4174588918685913} +03/05/2022 15:55:55 - INFO - codeparrot_training - Skipping example with length 686 (seq_length=1024) +03/05/2022 15:55:58 - INFO - codeparrot_training - Step 42925: {'lr': 0.0004114522257172115, 'samples': 21978112, 'steps': 42925, 'loss/train': 2.3464009761810303} +03/05/2022 15:56:01 - INFO - codeparrot_training - Step 42926: {'lr': 0.000411448173986356, 'samples': 21978624, 'steps': 42926, 'loss/train': 1.4296643733978271} +03/05/2022 15:56:03 - INFO - codeparrot_training - Skipping example with length 129 (seq_length=1024) +03/05/2022 15:56:07 - INFO - codeparrot_training - Step 42927: {'lr': 0.0004114441221827544, 'samples': 21979136, 'steps': 42927, 'loss/train': 2.573824644088745} +03/05/2022 15:56:10 - INFO - codeparrot_training - Step 42928: {'lr': 0.0004114400703064085, 'samples': 21979648, 'steps': 42928, 'loss/train': 1.8425463438034058} +03/05/2022 15:56:11 - INFO - codeparrot_training - Skipping example with length 68 (seq_length=1024) +03/05/2022 15:56:15 - INFO - codeparrot_training - Step 42929: {'lr': 0.0004114360183573203, 'samples': 21980160, 'steps': 42929, 'loss/train': 2.2028112411499023} +03/05/2022 15:56:18 - INFO - codeparrot_training - Step 42930: {'lr': 0.0004114319663354915, 'samples': 21980672, 'steps': 42930, 'loss/train': 1.9389923810958862} +03/05/2022 15:56:19 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/05/2022 15:56:24 - INFO - codeparrot_training - Step 42931: {'lr': 0.000411427914240924, 'samples': 21981184, 'steps': 42931, 'loss/train': 1.643153429031372} +03/05/2022 15:56:27 - INFO - codeparrot_training - Step 42932: {'lr': 0.0004114238620736195, 'samples': 21981696, 'steps': 42932, 'loss/train': 1.4941760301589966} +03/05/2022 15:56:28 - INFO - codeparrot_training - Skipping example with length 888 (seq_length=1024) +03/05/2022 15:56:32 - INFO - codeparrot_training - Step 42933: {'lr': 0.00041141980983357986, 'samples': 21982208, 'steps': 42933, 'loss/train': 1.493929147720337} +03/05/2022 15:56:35 - INFO - codeparrot_training - Step 42934: {'lr': 0.000411415757520807, 'samples': 21982720, 'steps': 42934, 'loss/train': 2.0451931953430176} +03/05/2022 15:56:36 - INFO - codeparrot_training - Skipping example with length 118 (seq_length=1024) +03/05/2022 15:56:40 - INFO - codeparrot_training - Step 42935: {'lr': 0.00041141170513530267, 'samples': 21983232, 'steps': 42935, 'loss/train': 1.8647053241729736} +03/05/2022 15:56:44 - INFO - codeparrot_training - Step 42936: {'lr': 0.0004114076526770688, 'samples': 21983744, 'steps': 42936, 'loss/train': 2.3523550033569336} +03/05/2022 15:56:45 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/05/2022 15:56:49 - INFO - codeparrot_training - Step 42937: {'lr': 0.000411403600146107, 'samples': 21984256, 'steps': 42937, 'loss/train': 2.0750224590301514} +03/05/2022 15:56:52 - INFO - codeparrot_training - Step 42938: {'lr': 0.0004113995475424193, 'samples': 21984768, 'steps': 42938, 'loss/train': 1.3506728410720825} +03/05/2022 15:56:53 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/05/2022 15:56:57 - INFO - codeparrot_training - Step 42939: {'lr': 0.0004113954948660075, 'samples': 21985280, 'steps': 42939, 'loss/train': 2.0904605388641357} +03/05/2022 15:57:00 - INFO - codeparrot_training - Step 42940: {'lr': 0.00041139144211687327, 'samples': 21985792, 'steps': 42940, 'loss/train': 1.0733686685562134} +03/05/2022 15:57:01 - INFO - codeparrot_training - Skipping example with length 423 (seq_length=1024) +03/05/2022 15:57:06 - INFO - codeparrot_training - Step 42941: {'lr': 0.0004113873892950186, 'samples': 21986304, 'steps': 42941, 'loss/train': 1.2822695970535278} +03/05/2022 15:57:09 - INFO - codeparrot_training - Step 42942: {'lr': 0.00041138333640044523, 'samples': 21986816, 'steps': 42942, 'loss/train': 2.0626914501190186} +03/05/2022 15:57:10 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/05/2022 15:57:15 - INFO - codeparrot_training - Step 42943: {'lr': 0.0004113792834331551, 'samples': 21987328, 'steps': 42943, 'loss/train': 2.151146650314331} +03/05/2022 15:57:18 - INFO - codeparrot_training - Step 42944: {'lr': 0.00041137523039314994, 'samples': 21987840, 'steps': 42944, 'loss/train': 1.973894476890564} +03/05/2022 15:57:20 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/05/2022 15:57:23 - INFO - codeparrot_training - Step 42945: {'lr': 0.0004113711772804315, 'samples': 21988352, 'steps': 42945, 'loss/train': 1.8973215818405151} +03/05/2022 15:57:26 - INFO - codeparrot_training - Step 42946: {'lr': 0.0004113671240950018, 'samples': 21988864, 'steps': 42946, 'loss/train': 1.390800952911377} +03/05/2022 15:57:29 - INFO - codeparrot_training - Skipping example with length 527 (seq_length=1024) +03/05/2022 15:57:31 - INFO - codeparrot_training - Step 42947: {'lr': 0.0004113630708368625, 'samples': 21989376, 'steps': 42947, 'loss/train': 1.7099182605743408} +03/05/2022 15:57:35 - INFO - codeparrot_training - Step 42948: {'lr': 0.0004113590175060155, 'samples': 21989888, 'steps': 42948, 'loss/train': 1.5995155572891235} +03/05/2022 15:57:37 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/05/2022 15:57:40 - INFO - codeparrot_training - Step 42949: {'lr': 0.00041135496410246264, 'samples': 21990400, 'steps': 42949, 'loss/train': 1.1366403102874756} +03/05/2022 15:57:43 - INFO - codeparrot_training - Step 42950: {'lr': 0.0004113509106262058, 'samples': 21990912, 'steps': 42950, 'loss/train': 1.5743422508239746} +03/05/2022 15:57:46 - INFO - codeparrot_training - Skipping example with length 398 (seq_length=1024) +03/05/2022 15:57:48 - INFO - codeparrot_training - Step 42951: {'lr': 0.00041134685707724656, 'samples': 21991424, 'steps': 42951, 'loss/train': 1.8833248615264893} +03/05/2022 15:57:51 - INFO - codeparrot_training - Step 42952: {'lr': 0.000411342803455587, 'samples': 21991936, 'steps': 42952, 'loss/train': 1.6218280792236328} +03/05/2022 15:57:54 - INFO - codeparrot_training - Skipping example with length 393 (seq_length=1024) +03/05/2022 15:57:57 - INFO - codeparrot_training - Step 42953: {'lr': 0.0004113387497612289, 'samples': 21992448, 'steps': 42953, 'loss/train': 1.8175263404846191} +03/05/2022 15:58:00 - INFO - codeparrot_training - Step 42954: {'lr': 0.00041133469599417393, 'samples': 21992960, 'steps': 42954, 'loss/train': 2.064639091491699} +03/05/2022 15:58:02 - INFO - codeparrot_training - Skipping example with length 547 (seq_length=1024) +03/05/2022 15:58:05 - INFO - codeparrot_training - Step 42955: {'lr': 0.00041133064215442415, 'samples': 21993472, 'steps': 42955, 'loss/train': 1.1247656345367432} +03/05/2022 15:58:08 - INFO - codeparrot_training - Step 42956: {'lr': 0.0004113265882419812, 'samples': 21993984, 'steps': 42956, 'loss/train': 1.4436148405075073} +03/05/2022 15:58:11 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/05/2022 15:58:14 - INFO - codeparrot_training - Step 42957: {'lr': 0.0004113225342568471, 'samples': 21994496, 'steps': 42957, 'loss/train': 1.492915153503418} +03/05/2022 15:58:17 - INFO - codeparrot_training - Step 42958: {'lr': 0.00041131848019902343, 'samples': 21995008, 'steps': 42958, 'loss/train': 1.7489091157913208} +03/05/2022 15:58:19 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/05/2022 15:58:22 - INFO - codeparrot_training - Step 42959: {'lr': 0.0004113144260685122, 'samples': 21995520, 'steps': 42959, 'loss/train': 1.3100528717041016} +03/05/2022 15:58:25 - INFO - codeparrot_training - Step 42960: {'lr': 0.00041131037186531514, 'samples': 21996032, 'steps': 42960, 'loss/train': 1.251444935798645} +03/05/2022 15:58:28 - INFO - codeparrot_training - Skipping example with length 82 (seq_length=1024) +03/05/2022 15:58:30 - INFO - codeparrot_training - Step 42961: {'lr': 0.00041130631758943414, 'samples': 21996544, 'steps': 42961, 'loss/train': 1.9012699127197266} +03/05/2022 15:58:33 - INFO - codeparrot_training - Step 42962: {'lr': 0.00041130226324087094, 'samples': 21997056, 'steps': 42962, 'loss/train': 1.7917362451553345} +03/05/2022 15:58:36 - INFO - codeparrot_training - Skipping example with length 134 (seq_length=1024) +03/05/2022 15:58:39 - INFO - codeparrot_training - Step 42963: {'lr': 0.00041129820881962754, 'samples': 21997568, 'steps': 42963, 'loss/train': 2.2283413410186768} +03/05/2022 15:58:42 - INFO - codeparrot_training - Step 42964: {'lr': 0.0004112941543257056, 'samples': 21998080, 'steps': 42964, 'loss/train': 1.889428734779358} +03/05/2022 15:58:46 - INFO - codeparrot_training - Step 42965: {'lr': 0.00041129009975910704, 'samples': 21998592, 'steps': 42965, 'loss/train': 0.26018401980400085} +03/05/2022 15:58:46 - INFO - codeparrot_training - Skipping example with length 793 (seq_length=1024) +03/05/2022 15:58:51 - INFO - codeparrot_training - Step 42966: {'lr': 0.00041128604511983356, 'samples': 21999104, 'steps': 42966, 'loss/train': 2.5363352298736572} +03/05/2022 15:58:54 - INFO - codeparrot_training - Step 42967: {'lr': 0.00041128199040788715, 'samples': 21999616, 'steps': 42967, 'loss/train': 1.6047049760818481} +03/05/2022 15:58:54 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/05/2022 15:59:00 - INFO - codeparrot_training - Step 42968: {'lr': 0.00041127793562326955, 'samples': 22000128, 'steps': 42968, 'loss/train': 1.830857515335083} +03/05/2022 15:59:04 - INFO - codeparrot_training - Step 42969: {'lr': 0.0004112738807659826, 'samples': 22000640, 'steps': 42969, 'loss/train': 1.3830406665802002} +03/05/2022 15:59:07 - INFO - codeparrot_training - Step 42970: {'lr': 0.00041126982583602817, 'samples': 22001152, 'steps': 42970, 'loss/train': 2.480656147003174} +03/05/2022 15:59:08 - INFO - codeparrot_training - Skipping example with length 561 (seq_length=1024) +03/05/2022 15:59:12 - INFO - codeparrot_training - Step 42971: {'lr': 0.00041126577083340797, 'samples': 22001664, 'steps': 42971, 'loss/train': 0.6048572659492493} +03/05/2022 15:59:15 - INFO - codeparrot_training - Step 42972: {'lr': 0.000411261715758124, 'samples': 22002176, 'steps': 42972, 'loss/train': 1.6765623092651367} +03/05/2022 15:59:16 - INFO - codeparrot_training - Skipping example with length 81 (seq_length=1024) +03/05/2022 15:59:20 - INFO - codeparrot_training - Step 42973: {'lr': 0.0004112576606101779, 'samples': 22002688, 'steps': 42973, 'loss/train': 2.0490708351135254} +03/05/2022 15:59:23 - INFO - codeparrot_training - Step 42974: {'lr': 0.0004112536053895716, 'samples': 22003200, 'steps': 42974, 'loss/train': 2.15755033493042} +03/05/2022 15:59:25 - INFO - codeparrot_training - Skipping example with length 929 (seq_length=1024) +03/05/2022 15:59:29 - INFO - codeparrot_training - Step 42975: {'lr': 0.0004112495500963069, 'samples': 22003712, 'steps': 42975, 'loss/train': 1.8064204454421997} +03/05/2022 15:59:32 - INFO - codeparrot_training - Step 42976: {'lr': 0.00041124549473038564, 'samples': 22004224, 'steps': 42976, 'loss/train': 2.201092481613159} +03/05/2022 15:59:34 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/05/2022 15:59:37 - INFO - codeparrot_training - Step 42977: {'lr': 0.0004112414392918097, 'samples': 22004736, 'steps': 42977, 'loss/train': 0.9857216477394104} +03/05/2022 15:59:41 - INFO - codeparrot_training - Step 42978: {'lr': 0.00041123738378058083, 'samples': 22005248, 'steps': 42978, 'loss/train': 1.519026756286621} +03/05/2022 15:59:43 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 15:59:46 - INFO - codeparrot_training - Step 42979: {'lr': 0.0004112333281967009, 'samples': 22005760, 'steps': 42979, 'loss/train': 1.5576121807098389} +03/05/2022 15:59:49 - INFO - codeparrot_training - Step 42980: {'lr': 0.00041122927254017173, 'samples': 22006272, 'steps': 42980, 'loss/train': 2.104839563369751} +03/05/2022 15:59:51 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/05/2022 15:59:54 - INFO - codeparrot_training - Step 42981: {'lr': 0.0004112252168109951, 'samples': 22006784, 'steps': 42981, 'loss/train': 2.055675506591797} +03/05/2022 15:59:58 - INFO - codeparrot_training - Step 42982: {'lr': 0.0004112211610091728, 'samples': 22007296, 'steps': 42982, 'loss/train': 1.4533791542053223} +03/05/2022 16:00:00 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/05/2022 16:00:03 - INFO - codeparrot_training - Step 42983: {'lr': 0.0004112171051347069, 'samples': 22007808, 'steps': 42983, 'loss/train': 1.8485826253890991} +03/05/2022 16:00:06 - INFO - codeparrot_training - Step 42984: {'lr': 0.00041121304918759893, 'samples': 22008320, 'steps': 42984, 'loss/train': 2.1012234687805176} +03/05/2022 16:00:08 - INFO - codeparrot_training - Skipping example with length 75 (seq_length=1024) +03/05/2022 16:00:11 - INFO - codeparrot_training - Step 42985: {'lr': 0.00041120899316785095, 'samples': 22008832, 'steps': 42985, 'loss/train': 2.299145460128784} +03/05/2022 16:00:14 - INFO - codeparrot_training - Step 42986: {'lr': 0.00041120493707546456, 'samples': 22009344, 'steps': 42986, 'loss/train': 1.8893461227416992} +03/05/2022 16:00:16 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/05/2022 16:00:20 - INFO - codeparrot_training - Step 42987: {'lr': 0.00041120088091044183, 'samples': 22009856, 'steps': 42987, 'loss/train': 2.12126088142395} +03/05/2022 16:00:23 - INFO - codeparrot_training - Step 42988: {'lr': 0.0004111968246727844, 'samples': 22010368, 'steps': 42988, 'loss/train': 1.972978949546814} +03/05/2022 16:00:25 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/05/2022 16:00:28 - INFO - codeparrot_training - Step 42989: {'lr': 0.0004111927683624942, 'samples': 22010880, 'steps': 42989, 'loss/train': 1.6756961345672607} +03/05/2022 16:00:31 - INFO - codeparrot_training - Step 42990: {'lr': 0.00041118871197957306, 'samples': 22011392, 'steps': 42990, 'loss/train': 1.7594828605651855} +03/05/2022 16:00:33 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/05/2022 16:00:37 - INFO - codeparrot_training - Step 42991: {'lr': 0.00041118465552402274, 'samples': 22011904, 'steps': 42991, 'loss/train': 2.122166395187378} +03/05/2022 16:00:40 - INFO - codeparrot_training - Step 42992: {'lr': 0.00041118059899584503, 'samples': 22012416, 'steps': 42992, 'loss/train': 1.690854549407959} +03/05/2022 16:00:42 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/05/2022 16:00:45 - INFO - codeparrot_training - Step 42993: {'lr': 0.00041117654239504193, 'samples': 22012928, 'steps': 42993, 'loss/train': 1.6571182012557983} +03/05/2022 16:00:48 - INFO - codeparrot_training - Step 42994: {'lr': 0.0004111724857216151, 'samples': 22013440, 'steps': 42994, 'loss/train': 1.6810352802276611} +03/05/2022 16:00:50 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/05/2022 16:00:54 - INFO - codeparrot_training - Step 42995: {'lr': 0.0004111684289755665, 'samples': 22013952, 'steps': 42995, 'loss/train': 1.724513292312622} +03/05/2022 16:00:57 - INFO - codeparrot_training - Step 42996: {'lr': 0.00041116437215689785, 'samples': 22014464, 'steps': 42996, 'loss/train': 1.050251841545105} +03/05/2022 16:00:58 - INFO - codeparrot_training - Skipping example with length 316 (seq_length=1024) +03/05/2022 16:01:02 - INFO - codeparrot_training - Step 42997: {'lr': 0.000411160315265611, 'samples': 22014976, 'steps': 42997, 'loss/train': 1.862662672996521} +03/05/2022 16:01:05 - INFO - codeparrot_training - Step 42998: {'lr': 0.0004111562583017079, 'samples': 22015488, 'steps': 42998, 'loss/train': 1.9470300674438477} +03/05/2022 16:01:07 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/05/2022 16:01:11 - INFO - codeparrot_training - Step 42999: {'lr': 0.00041115220126519014, 'samples': 22016000, 'steps': 42999, 'loss/train': 1.9588584899902344} +03/05/2022 16:01:14 - INFO - codeparrot_training - Step 43000: {'lr': 0.00041114814415605977, 'samples': 22016512, 'steps': 43000, 'loss/train': 1.526210069656372} +03/05/2022 16:01:15 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/05/2022 16:01:19 - INFO - codeparrot_training - Step 43001: {'lr': 0.0004111440869743185, 'samples': 22017024, 'steps': 43001, 'loss/train': 1.1328762769699097} +03/05/2022 16:01:22 - INFO - codeparrot_training - Step 43002: {'lr': 0.00041114002971996824, 'samples': 22017536, 'steps': 43002, 'loss/train': 1.8909417390823364} +03/05/2022 16:01:23 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/05/2022 16:01:27 - INFO - codeparrot_training - Step 43003: {'lr': 0.0004111359723930107, 'samples': 22018048, 'steps': 43003, 'loss/train': 1.271471619606018} +03/05/2022 16:01:30 - INFO - codeparrot_training - Step 43004: {'lr': 0.00041113191499344784, 'samples': 22018560, 'steps': 43004, 'loss/train': 1.451917290687561} +03/05/2022 16:01:32 - INFO - codeparrot_training - Skipping example with length 446 (seq_length=1024) +03/05/2022 16:01:36 - INFO - codeparrot_training - Step 43005: {'lr': 0.0004111278575212814, 'samples': 22019072, 'steps': 43005, 'loss/train': 2.382460355758667} +03/05/2022 16:01:39 - INFO - codeparrot_training - Step 43006: {'lr': 0.0004111237999765132, 'samples': 22019584, 'steps': 43006, 'loss/train': 1.7496036291122437} +03/05/2022 16:01:40 - INFO - codeparrot_training - Skipping example with length 875 (seq_length=1024) +03/05/2022 16:01:44 - INFO - codeparrot_training - Step 43007: {'lr': 0.0004111197423591452, 'samples': 22020096, 'steps': 43007, 'loss/train': 1.74411141872406} +03/05/2022 16:01:47 - INFO - codeparrot_training - Step 43008: {'lr': 0.000411115684669179, 'samples': 22020608, 'steps': 43008, 'loss/train': 1.7294312715530396} +03/05/2022 16:01:48 - INFO - codeparrot_training - Skipping example with length 254 (seq_length=1024) +03/05/2022 16:01:53 - INFO - codeparrot_training - Step 43009: {'lr': 0.00041111162690661665, 'samples': 22021120, 'steps': 43009, 'loss/train': 0.9306980967521667} +03/05/2022 16:01:56 - INFO - codeparrot_training - Step 43010: {'lr': 0.00041110756907145984, 'samples': 22021632, 'steps': 43010, 'loss/train': 0.7423564195632935} +03/05/2022 16:01:57 - INFO - codeparrot_training - Skipping example with length 565 (seq_length=1024) +03/05/2022 16:02:01 - INFO - codeparrot_training - Step 43011: {'lr': 0.0004111035111637105, 'samples': 22022144, 'steps': 43011, 'loss/train': 1.8358217477798462} +03/05/2022 16:02:04 - INFO - codeparrot_training - Step 43012: {'lr': 0.00041109945318337034, 'samples': 22022656, 'steps': 43012, 'loss/train': 1.329107403755188} +03/05/2022 16:02:05 - INFO - codeparrot_training - Skipping example with length 848 (seq_length=1024) +03/05/2022 16:02:09 - INFO - codeparrot_training - Step 43013: {'lr': 0.00041109539513044127, 'samples': 22023168, 'steps': 43013, 'loss/train': 1.3814101219177246} +03/05/2022 16:02:13 - INFO - codeparrot_training - Step 43014: {'lr': 0.0004110913370049251, 'samples': 22023680, 'steps': 43014, 'loss/train': 1.1635452508926392} +03/05/2022 16:02:14 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/05/2022 16:02:18 - INFO - codeparrot_training - Step 43015: {'lr': 0.00041108727880682363, 'samples': 22024192, 'steps': 43015, 'loss/train': 1.109616994857788} +03/05/2022 16:02:21 - INFO - codeparrot_training - Step 43016: {'lr': 0.0004110832205361388, 'samples': 22024704, 'steps': 43016, 'loss/train': 2.077890634536743} +03/05/2022 16:02:22 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/05/2022 16:02:26 - INFO - codeparrot_training - Step 43017: {'lr': 0.0004110791621928723, 'samples': 22025216, 'steps': 43017, 'loss/train': 1.6168617010116577} +03/05/2022 16:02:30 - INFO - codeparrot_training - Step 43018: {'lr': 0.00041107510377702604, 'samples': 22025728, 'steps': 43018, 'loss/train': 1.6295809745788574} +03/05/2022 16:02:30 - INFO - codeparrot_training - Skipping example with length 357 (seq_length=1024) +03/05/2022 16:02:35 - INFO - codeparrot_training - Step 43019: {'lr': 0.00041107104528860186, 'samples': 22026240, 'steps': 43019, 'loss/train': 2.069366216659546} +03/05/2022 16:02:38 - INFO - codeparrot_training - Step 43020: {'lr': 0.00041106698672760145, 'samples': 22026752, 'steps': 43020, 'loss/train': 2.3758347034454346} +03/05/2022 16:02:40 - INFO - codeparrot_training - Skipping example with length 283 (seq_length=1024) +03/05/2022 16:02:43 - INFO - codeparrot_training - Step 43021: {'lr': 0.0004110629280940268, 'samples': 22027264, 'steps': 43021, 'loss/train': 1.8350683450698853} +03/05/2022 16:02:47 - INFO - codeparrot_training - Step 43022: {'lr': 0.0004110588693878796, 'samples': 22027776, 'steps': 43022, 'loss/train': 1.502535343170166} +03/05/2022 16:02:48 - INFO - codeparrot_training - Skipping example with length 320 (seq_length=1024) +03/05/2022 16:02:52 - INFO - codeparrot_training - Step 43023: {'lr': 0.0004110548106091619, 'samples': 22028288, 'steps': 43023, 'loss/train': 2.1090140342712402} +03/05/2022 16:02:55 - INFO - codeparrot_training - Step 43024: {'lr': 0.00041105075175787534, 'samples': 22028800, 'steps': 43024, 'loss/train': 0.7174452543258667} +03/05/2022 16:02:56 - INFO - codeparrot_training - Skipping example with length 503 (seq_length=1024) +03/05/2022 16:03:00 - INFO - codeparrot_training - Step 43025: {'lr': 0.00041104669283402174, 'samples': 22029312, 'steps': 43025, 'loss/train': 1.9419543743133545} +03/05/2022 16:03:04 - INFO - codeparrot_training - Step 43026: {'lr': 0.00041104263383760304, 'samples': 22029824, 'steps': 43026, 'loss/train': 1.4905227422714233} +03/05/2022 16:03:05 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/05/2022 16:03:09 - INFO - codeparrot_training - Step 43027: {'lr': 0.000411038574768621, 'samples': 22030336, 'steps': 43027, 'loss/train': 1.3694018125534058} +03/05/2022 16:03:12 - INFO - codeparrot_training - Step 43028: {'lr': 0.00041103451562707745, 'samples': 22030848, 'steps': 43028, 'loss/train': 0.2598229944705963} +03/05/2022 16:03:13 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/05/2022 16:03:17 - INFO - codeparrot_training - Step 43029: {'lr': 0.0004110304564129742, 'samples': 22031360, 'steps': 43029, 'loss/train': 1.549552083015442} +03/05/2022 16:03:21 - INFO - codeparrot_training - Step 43030: {'lr': 0.00041102639712631316, 'samples': 22031872, 'steps': 43030, 'loss/train': 1.8566228151321411} +03/05/2022 16:03:22 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/05/2022 16:03:26 - INFO - codeparrot_training - Step 43031: {'lr': 0.0004110223377670962, 'samples': 22032384, 'steps': 43031, 'loss/train': 1.228371500968933} +03/05/2022 16:03:29 - INFO - codeparrot_training - Step 43032: {'lr': 0.0004110182783353249, 'samples': 22032896, 'steps': 43032, 'loss/train': 1.9909286499023438} +03/05/2022 16:03:30 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/05/2022 16:03:34 - INFO - codeparrot_training - Step 43033: {'lr': 0.0004110142188310013, 'samples': 22033408, 'steps': 43033, 'loss/train': 1.7093251943588257} +03/05/2022 16:03:37 - INFO - codeparrot_training - Step 43034: {'lr': 0.0004110101592541272, 'samples': 22033920, 'steps': 43034, 'loss/train': 1.8380711078643799} +03/05/2022 16:03:38 - INFO - codeparrot_training - Skipping example with length 542 (seq_length=1024) +03/05/2022 16:03:43 - INFO - codeparrot_training - Step 43035: {'lr': 0.0004110060996047044, 'samples': 22034432, 'steps': 43035, 'loss/train': 1.180138349533081} +03/05/2022 16:03:46 - INFO - codeparrot_training - Step 43036: {'lr': 0.00041100203988273475, 'samples': 22034944, 'steps': 43036, 'loss/train': 2.098604679107666} +03/05/2022 16:03:47 - INFO - codeparrot_training - Skipping example with length 543 (seq_length=1024) +03/05/2022 16:03:51 - INFO - codeparrot_training - Step 43037: {'lr': 0.0004109979800882201, 'samples': 22035456, 'steps': 43037, 'loss/train': 2.079730272293091} +03/05/2022 16:03:54 - INFO - codeparrot_training - Step 43038: {'lr': 0.00041099392022116214, 'samples': 22035968, 'steps': 43038, 'loss/train': 1.6361215114593506} +03/05/2022 16:03:55 - INFO - codeparrot_training - Skipping example with length 895 (seq_length=1024) +03/05/2022 16:04:00 - INFO - codeparrot_training - Step 43039: {'lr': 0.0004109898602815629, 'samples': 22036480, 'steps': 43039, 'loss/train': 1.7473645210266113} +03/05/2022 16:04:03 - INFO - codeparrot_training - Step 43040: {'lr': 0.000410985800269424, 'samples': 22036992, 'steps': 43040, 'loss/train': 1.9737156629562378} +03/05/2022 16:04:04 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/05/2022 16:04:08 - INFO - codeparrot_training - Step 43041: {'lr': 0.00041098174018474747, 'samples': 22037504, 'steps': 43041, 'loss/train': 1.8876824378967285} +03/05/2022 16:04:11 - INFO - codeparrot_training - Step 43042: {'lr': 0.000410977680027535, 'samples': 22038016, 'steps': 43042, 'loss/train': 1.7406147718429565} +03/05/2022 16:04:13 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/05/2022 16:04:17 - INFO - codeparrot_training - Step 43043: {'lr': 0.00041097361979778853, 'samples': 22038528, 'steps': 43043, 'loss/train': 2.1136491298675537} +03/05/2022 16:04:20 - INFO - codeparrot_training - Step 43044: {'lr': 0.00041096955949550983, 'samples': 22039040, 'steps': 43044, 'loss/train': 1.7802170515060425} +03/05/2022 16:04:21 - INFO - codeparrot_training - Skipping example with length 251 (seq_length=1024) +03/05/2022 16:04:25 - INFO - codeparrot_training - Step 43045: {'lr': 0.00041096549912070067, 'samples': 22039552, 'steps': 43045, 'loss/train': 1.694657564163208} +03/05/2022 16:04:28 - INFO - codeparrot_training - Step 43046: {'lr': 0.000410961438673363, 'samples': 22040064, 'steps': 43046, 'loss/train': 1.7808825969696045} +03/05/2022 16:04:29 - INFO - codeparrot_training - Skipping example with length 761 (seq_length=1024) +03/05/2022 16:04:34 - INFO - codeparrot_training - Step 43047: {'lr': 0.0004109573781534985, 'samples': 22040576, 'steps': 43047, 'loss/train': 1.1797266006469727} +03/05/2022 16:04:37 - INFO - codeparrot_training - Step 43048: {'lr': 0.0004109533175611092, 'samples': 22041088, 'steps': 43048, 'loss/train': 1.654572606086731} +03/05/2022 16:04:37 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/05/2022 16:04:42 - INFO - codeparrot_training - Step 43049: {'lr': 0.0004109492568961968, 'samples': 22041600, 'steps': 43049, 'loss/train': 2.0845680236816406} +03/05/2022 16:04:45 - INFO - codeparrot_training - Step 43050: {'lr': 0.00041094519615876313, 'samples': 22042112, 'steps': 43050, 'loss/train': 1.8338444232940674} +03/05/2022 16:04:46 - INFO - codeparrot_training - Skipping example with length 126 (seq_length=1024) +03/05/2022 16:04:50 - INFO - codeparrot_training - Step 43051: {'lr': 0.0004109411353488101, 'samples': 22042624, 'steps': 43051, 'loss/train': 1.6461910009384155} +03/05/2022 16:04:54 - INFO - codeparrot_training - Step 43052: {'lr': 0.00041093707446633934, 'samples': 22043136, 'steps': 43052, 'loss/train': 1.8856909275054932} +03/05/2022 16:04:54 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/05/2022 16:04:59 - INFO - codeparrot_training - Step 43053: {'lr': 0.00041093301351135294, 'samples': 22043648, 'steps': 43053, 'loss/train': 2.0125715732574463} +03/05/2022 16:05:02 - INFO - codeparrot_training - Step 43054: {'lr': 0.00041092895248385255, 'samples': 22044160, 'steps': 43054, 'loss/train': 1.6833592653274536} +03/05/2022 16:05:03 - INFO - codeparrot_training - Skipping example with length 322 (seq_length=1024) +03/05/2022 16:05:07 - INFO - codeparrot_training - Step 43055: {'lr': 0.00041092489138384, 'samples': 22044672, 'steps': 43055, 'loss/train': 1.3324934244155884} +03/05/2022 16:05:10 - INFO - codeparrot_training - Step 43056: {'lr': 0.0004109208302113173, 'samples': 22045184, 'steps': 43056, 'loss/train': 1.9577224254608154} +03/05/2022 16:05:11 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/05/2022 16:05:16 - INFO - codeparrot_training - Step 43057: {'lr': 0.00041091676896628604, 'samples': 22045696, 'steps': 43057, 'loss/train': 0.4841378331184387} +03/05/2022 16:05:19 - INFO - codeparrot_training - Step 43058: {'lr': 0.00041091270764874823, 'samples': 22046208, 'steps': 43058, 'loss/train': 0.9508277773857117} +03/05/2022 16:05:19 - INFO - codeparrot_training - Skipping example with length 1021 (seq_length=1024) +03/05/2022 16:05:24 - INFO - codeparrot_training - Step 43059: {'lr': 0.0004109086462587056, 'samples': 22046720, 'steps': 43059, 'loss/train': 1.6298943758010864} +03/05/2022 16:05:27 - INFO - codeparrot_training - Step 43060: {'lr': 0.0004109045847961601, 'samples': 22047232, 'steps': 43060, 'loss/train': 1.6206494569778442} +03/05/2022 16:05:28 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/05/2022 16:05:33 - INFO - codeparrot_training - Step 43061: {'lr': 0.0004109005232611134, 'samples': 22047744, 'steps': 43061, 'loss/train': 1.2157680988311768} +03/05/2022 16:05:36 - INFO - codeparrot_training - Step 43062: {'lr': 0.00041089646165356743, 'samples': 22048256, 'steps': 43062, 'loss/train': 1.5649973154067993} +03/05/2022 16:05:36 - INFO - codeparrot_training - Skipping example with length 369 (seq_length=1024) +03/05/2022 16:05:41 - INFO - codeparrot_training - Step 43063: {'lr': 0.000410892399973524, 'samples': 22048768, 'steps': 43063, 'loss/train': 1.342923641204834} +03/05/2022 16:05:44 - INFO - codeparrot_training - Step 43064: {'lr': 0.00041088833822098495, 'samples': 22049280, 'steps': 43064, 'loss/train': 2.1289045810699463} +03/05/2022 16:05:44 - INFO - codeparrot_training - Skipping example with length 187 (seq_length=1024) +03/05/2022 16:05:50 - INFO - codeparrot_training - Step 43065: {'lr': 0.00041088427639595206, 'samples': 22049792, 'steps': 43065, 'loss/train': 2.1584956645965576} +03/05/2022 16:05:53 - INFO - codeparrot_training - Step 43066: {'lr': 0.0004108802144984273, 'samples': 22050304, 'steps': 43066, 'loss/train': 1.505391001701355} +03/05/2022 16:05:53 - INFO - codeparrot_training - Skipping example with length 629 (seq_length=1024) +03/05/2022 16:05:58 - INFO - codeparrot_training - Step 43067: {'lr': 0.0004108761525284123, 'samples': 22050816, 'steps': 43067, 'loss/train': 1.086782693862915} +03/05/2022 16:06:01 - INFO - codeparrot_training - Step 43068: {'lr': 0.000410872090485909, 'samples': 22051328, 'steps': 43068, 'loss/train': 1.4483222961425781} +03/05/2022 16:06:02 - INFO - codeparrot_training - Skipping example with length 730 (seq_length=1024) +03/05/2022 16:06:07 - INFO - codeparrot_training - Step 43069: {'lr': 0.00041086802837091916, 'samples': 22051840, 'steps': 43069, 'loss/train': 1.960574746131897} +03/05/2022 16:06:10 - INFO - codeparrot_training - Step 43070: {'lr': 0.00041086396618344475, 'samples': 22052352, 'steps': 43070, 'loss/train': 1.7231091260910034} +03/05/2022 16:06:13 - INFO - codeparrot_training - Step 43071: {'lr': 0.0004108599039234875, 'samples': 22052864, 'steps': 43071, 'loss/train': 2.0861830711364746} +03/05/2022 16:06:13 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/05/2022 16:06:19 - INFO - codeparrot_training - Step 43072: {'lr': 0.00041085584159104925, 'samples': 22053376, 'steps': 43072, 'loss/train': 2.222378969192505} +03/05/2022 16:06:22 - INFO - codeparrot_training - Step 43073: {'lr': 0.00041085177918613185, 'samples': 22053888, 'steps': 43073, 'loss/train': 2.0434725284576416} +03/05/2022 16:06:23 - INFO - codeparrot_training - Skipping example with length 511 (seq_length=1024) +03/05/2022 16:06:27 - INFO - codeparrot_training - Step 43074: {'lr': 0.0004108477167087371, 'samples': 22054400, 'steps': 43074, 'loss/train': 1.8143647909164429} +03/05/2022 16:06:31 - INFO - codeparrot_training - Step 43075: {'lr': 0.0004108436541588669, 'samples': 22054912, 'steps': 43075, 'loss/train': 1.689709186553955} +03/05/2022 16:06:31 - INFO - codeparrot_training - Skipping example with length 549 (seq_length=1024) +03/05/2022 16:06:36 - INFO - codeparrot_training - Step 43076: {'lr': 0.000410839591536523, 'samples': 22055424, 'steps': 43076, 'loss/train': 2.117950916290283} +03/05/2022 16:06:39 - INFO - codeparrot_training - Step 43077: {'lr': 0.00041083552884170726, 'samples': 22055936, 'steps': 43077, 'loss/train': 2.0496063232421875} +03/05/2022 16:06:40 - INFO - codeparrot_training - Skipping example with length 232 (seq_length=1024) +03/05/2022 16:06:44 - INFO - codeparrot_training - Step 43078: {'lr': 0.0004108314660744216, 'samples': 22056448, 'steps': 43078, 'loss/train': 2.437413454055786} +03/05/2022 16:06:47 - INFO - codeparrot_training - Step 43079: {'lr': 0.0004108274032346676, 'samples': 22056960, 'steps': 43079, 'loss/train': 1.9964866638183594} +03/05/2022 16:06:48 - INFO - codeparrot_training - Skipping example with length 157 (seq_length=1024) +03/05/2022 16:06:53 - INFO - codeparrot_training - Step 43080: {'lr': 0.0004108233403224474, 'samples': 22057472, 'steps': 43080, 'loss/train': 2.443490743637085} +03/05/2022 16:06:56 - INFO - codeparrot_training - Step 43081: {'lr': 0.0004108192773377626, 'samples': 22057984, 'steps': 43081, 'loss/train': 1.9517830610275269} +03/05/2022 16:06:57 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/05/2022 16:07:01 - INFO - codeparrot_training - Step 43082: {'lr': 0.0004108152142806151, 'samples': 22058496, 'steps': 43082, 'loss/train': 1.970909595489502} +03/05/2022 16:07:04 - INFO - codeparrot_training - Step 43083: {'lr': 0.00041081115115100677, 'samples': 22059008, 'steps': 43083, 'loss/train': 2.4584081172943115} +03/05/2022 16:07:05 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/05/2022 16:07:10 - INFO - codeparrot_training - Step 43084: {'lr': 0.0004108070879489395, 'samples': 22059520, 'steps': 43084, 'loss/train': 1.994110107421875} +03/05/2022 16:07:13 - INFO - codeparrot_training - Step 43085: {'lr': 0.0004108030246744149, 'samples': 22060032, 'steps': 43085, 'loss/train': 1.2431331872940063} +03/05/2022 16:07:13 - INFO - codeparrot_training - Skipping example with length 734 (seq_length=1024) +03/05/2022 16:07:18 - INFO - codeparrot_training - Step 43086: {'lr': 0.00041079896132743506, 'samples': 22060544, 'steps': 43086, 'loss/train': 1.2463271617889404} +03/05/2022 16:07:21 - INFO - codeparrot_training - Step 43087: {'lr': 0.0004107948979080016, 'samples': 22061056, 'steps': 43087, 'loss/train': 0.5642788410186768} +03/05/2022 16:07:22 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/05/2022 16:07:26 - INFO - codeparrot_training - Step 43088: {'lr': 0.00041079083441611646, 'samples': 22061568, 'steps': 43088, 'loss/train': 1.488057255744934} +03/05/2022 16:07:30 - INFO - codeparrot_training - Step 43089: {'lr': 0.0004107867708517815, 'samples': 22062080, 'steps': 43089, 'loss/train': 1.6993271112442017} +03/05/2022 16:07:31 - INFO - codeparrot_training - Skipping example with length 439 (seq_length=1024) +03/05/2022 16:07:35 - INFO - codeparrot_training - Step 43090: {'lr': 0.0004107827072149984, 'samples': 22062592, 'steps': 43090, 'loss/train': 1.4550457000732422} +03/05/2022 16:07:38 - INFO - codeparrot_training - Step 43091: {'lr': 0.0004107786435057692, 'samples': 22063104, 'steps': 43091, 'loss/train': 2.6793904304504395} +03/05/2022 16:07:39 - INFO - codeparrot_training - Skipping example with length 592 (seq_length=1024) +03/05/2022 16:07:43 - INFO - codeparrot_training - Step 43092: {'lr': 0.0004107745797240956, 'samples': 22063616, 'steps': 43092, 'loss/train': 1.2877134084701538} +03/05/2022 16:07:47 - INFO - codeparrot_training - Step 43093: {'lr': 0.0004107705158699794, 'samples': 22064128, 'steps': 43093, 'loss/train': 0.7764769196510315} +03/05/2022 16:07:47 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/05/2022 16:07:52 - INFO - codeparrot_training - Step 43094: {'lr': 0.00041076645194342254, 'samples': 22064640, 'steps': 43094, 'loss/train': 2.4741721153259277} +03/05/2022 16:07:55 - INFO - codeparrot_training - Step 43095: {'lr': 0.00041076238794442675, 'samples': 22065152, 'steps': 43095, 'loss/train': 1.2902302742004395} +03/05/2022 16:07:56 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/05/2022 16:08:00 - INFO - codeparrot_training - Step 43096: {'lr': 0.00041075832387299396, 'samples': 22065664, 'steps': 43096, 'loss/train': 1.503089189529419} +03/05/2022 16:08:04 - INFO - codeparrot_training - Step 43097: {'lr': 0.00041075425972912595, 'samples': 22066176, 'steps': 43097, 'loss/train': 1.8542871475219727} +03/05/2022 16:08:04 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/05/2022 16:08:09 - INFO - codeparrot_training - Step 43098: {'lr': 0.00041075019551282455, 'samples': 22066688, 'steps': 43098, 'loss/train': 2.089121103286743} +03/05/2022 16:08:12 - INFO - codeparrot_training - Step 43099: {'lr': 0.00041074613122409157, 'samples': 22067200, 'steps': 43099, 'loss/train': 2.1537768840789795} +03/05/2022 16:08:12 - INFO - codeparrot_training - Skipping example with length 840 (seq_length=1024) +03/05/2022 16:08:17 - INFO - codeparrot_training - Step 43100: {'lr': 0.0004107420668629289, 'samples': 22067712, 'steps': 43100, 'loss/train': 1.368057131767273} +03/05/2022 16:08:20 - INFO - codeparrot_training - Step 43101: {'lr': 0.00041073800242933826, 'samples': 22068224, 'steps': 43101, 'loss/train': 1.905125379562378} +03/05/2022 16:08:21 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 16:08:26 - INFO - codeparrot_training - Step 43102: {'lr': 0.00041073393792332157, 'samples': 22068736, 'steps': 43102, 'loss/train': 1.185733675956726} +03/05/2022 16:08:29 - INFO - codeparrot_training - Step 43103: {'lr': 0.0004107298733448807, 'samples': 22069248, 'steps': 43103, 'loss/train': 2.022883415222168} +03/05/2022 16:08:29 - INFO - codeparrot_training - Skipping example with length 274 (seq_length=1024) +03/05/2022 16:08:34 - INFO - codeparrot_training - Step 43104: {'lr': 0.0004107258086940174, 'samples': 22069760, 'steps': 43104, 'loss/train': 1.3742536306381226} +03/05/2022 16:08:37 - INFO - codeparrot_training - Step 43105: {'lr': 0.0004107217439707336, 'samples': 22070272, 'steps': 43105, 'loss/train': 1.2686127424240112} +03/05/2022 16:08:38 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/05/2022 16:08:42 - INFO - codeparrot_training - Step 43106: {'lr': 0.000410717679175031, 'samples': 22070784, 'steps': 43106, 'loss/train': 2.06044602394104} +03/05/2022 16:08:46 - INFO - codeparrot_training - Step 43107: {'lr': 0.00041071361430691143, 'samples': 22071296, 'steps': 43107, 'loss/train': 2.3357412815093994} +03/05/2022 16:08:46 - INFO - codeparrot_training - Skipping example with length 288 (seq_length=1024) +03/05/2022 16:08:51 - INFO - codeparrot_training - Step 43108: {'lr': 0.00041070954936637687, 'samples': 22071808, 'steps': 43108, 'loss/train': 0.7956318259239197} +03/05/2022 16:08:54 - INFO - codeparrot_training - Step 43109: {'lr': 0.00041070548435342903, 'samples': 22072320, 'steps': 43109, 'loss/train': 1.8337548971176147} +03/05/2022 16:08:54 - INFO - codeparrot_training - Skipping example with length 790 (seq_length=1024) +03/05/2022 16:08:59 - INFO - codeparrot_training - Step 43110: {'lr': 0.00041070141926806983, 'samples': 22072832, 'steps': 43110, 'loss/train': 0.719842255115509} +03/05/2022 16:09:02 - INFO - codeparrot_training - Step 43111: {'lr': 0.00041069735411030105, 'samples': 22073344, 'steps': 43111, 'loss/train': 1.3006435632705688} +03/05/2022 16:09:02 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/05/2022 16:09:08 - INFO - codeparrot_training - Step 43112: {'lr': 0.00041069328888012447, 'samples': 22073856, 'steps': 43112, 'loss/train': 1.6250613927841187} +03/05/2022 16:09:11 - INFO - codeparrot_training - Step 43113: {'lr': 0.000410689223577542, 'samples': 22074368, 'steps': 43113, 'loss/train': 1.3310869932174683} +03/05/2022 16:09:11 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/05/2022 16:09:16 - INFO - codeparrot_training - Step 43114: {'lr': 0.00041068515820255543, 'samples': 22074880, 'steps': 43114, 'loss/train': 1.6656626462936401} +03/05/2022 16:09:19 - INFO - codeparrot_training - Skipping example with length 122 (seq_length=1024) +03/05/2022 16:09:22 - INFO - codeparrot_training - Step 43115: {'lr': 0.00041068109275516665, 'samples': 22075392, 'steps': 43115, 'loss/train': 1.007974624633789} +03/05/2022 16:09:25 - INFO - codeparrot_training - Step 43116: {'lr': 0.0004106770272353774, 'samples': 22075904, 'steps': 43116, 'loss/train': 2.0368382930755615} +03/05/2022 16:09:28 - INFO - codeparrot_training - Skipping example with length 1020 (seq_length=1024) +03/05/2022 16:09:30 - INFO - codeparrot_training - Step 43117: {'lr': 0.00041067296164318956, 'samples': 22076416, 'steps': 43117, 'loss/train': 2.2699427604675293} +03/05/2022 16:09:33 - INFO - codeparrot_training - Step 43118: {'lr': 0.000410668895978605, 'samples': 22076928, 'steps': 43118, 'loss/train': 0.47049444913864136} +03/05/2022 16:09:36 - INFO - codeparrot_training - Skipping example with length 76 (seq_length=1024) +03/05/2022 16:09:39 - INFO - codeparrot_training - Step 43119: {'lr': 0.0004106648302416255, 'samples': 22077440, 'steps': 43119, 'loss/train': 1.2024662494659424} +03/05/2022 16:09:42 - INFO - codeparrot_training - Step 43120: {'lr': 0.0004106607644322529, 'samples': 22077952, 'steps': 43120, 'loss/train': 1.618593692779541} +03/05/2022 16:09:45 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/05/2022 16:09:47 - INFO - codeparrot_training - Step 43121: {'lr': 0.00041065669855048896, 'samples': 22078464, 'steps': 43121, 'loss/train': 1.4772942066192627} +03/05/2022 16:09:50 - INFO - codeparrot_training - Step 43122: {'lr': 0.0004106526325963357, 'samples': 22078976, 'steps': 43122, 'loss/train': 1.7195594310760498} +03/05/2022 16:09:53 - INFO - codeparrot_training - Step 43123: {'lr': 0.0004106485665697948, 'samples': 22079488, 'steps': 43123, 'loss/train': 1.0221295356750488} +03/05/2022 16:09:53 - INFO - codeparrot_training - Skipping example with length 755 (seq_length=1024) +03/05/2022 16:09:59 - INFO - codeparrot_training - Step 43124: {'lr': 0.00041064450047086814, 'samples': 22080000, 'steps': 43124, 'loss/train': 1.7321712970733643} +03/05/2022 16:10:02 - INFO - codeparrot_training - Step 43125: {'lr': 0.00041064043429955756, 'samples': 22080512, 'steps': 43125, 'loss/train': 1.5884205102920532} +03/05/2022 16:10:02 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/05/2022 16:10:07 - INFO - codeparrot_training - Step 43126: {'lr': 0.0004106363680558649, 'samples': 22081024, 'steps': 43126, 'loss/train': 1.4176160097122192} +03/05/2022 16:10:11 - INFO - codeparrot_training - Step 43127: {'lr': 0.0004106323017397919, 'samples': 22081536, 'steps': 43127, 'loss/train': 2.1790034770965576} +03/05/2022 16:10:12 - INFO - codeparrot_training - Skipping example with length 122 (seq_length=1024) +03/05/2022 16:10:16 - INFO - codeparrot_training - Step 43128: {'lr': 0.00041062823535134053, 'samples': 22082048, 'steps': 43128, 'loss/train': 1.397014856338501} +03/05/2022 16:10:19 - INFO - codeparrot_training - Step 43129: {'lr': 0.0004106241688905126, 'samples': 22082560, 'steps': 43129, 'loss/train': 1.8907394409179688} +03/05/2022 16:10:20 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/05/2022 16:10:24 - INFO - codeparrot_training - Step 43130: {'lr': 0.00041062010235730974, 'samples': 22083072, 'steps': 43130, 'loss/train': 2.2713260650634766} +03/05/2022 16:10:27 - INFO - codeparrot_training - Step 43131: {'lr': 0.0004106160357517341, 'samples': 22083584, 'steps': 43131, 'loss/train': 1.0937063694000244} +03/05/2022 16:10:28 - INFO - codeparrot_training - Skipping example with length 17 (seq_length=1024) +03/05/2022 16:10:33 - INFO - codeparrot_training - Step 43132: {'lr': 0.00041061196907378727, 'samples': 22084096, 'steps': 43132, 'loss/train': 3.005648374557495} +03/05/2022 16:10:36 - INFO - codeparrot_training - Step 43133: {'lr': 0.00041060790232347116, 'samples': 22084608, 'steps': 43133, 'loss/train': 2.260462999343872} +03/05/2022 16:10:37 - INFO - codeparrot_training - Skipping example with length 510 (seq_length=1024) +03/05/2022 16:10:41 - INFO - codeparrot_training - Step 43134: {'lr': 0.00041060383550078764, 'samples': 22085120, 'steps': 43134, 'loss/train': 1.8415838479995728} +03/05/2022 16:10:44 - INFO - codeparrot_training - Step 43135: {'lr': 0.00041059976860573845, 'samples': 22085632, 'steps': 43135, 'loss/train': 2.1916239261627197} +03/05/2022 16:10:45 - INFO - codeparrot_training - Skipping example with length 146 (seq_length=1024) +03/05/2022 16:10:50 - INFO - codeparrot_training - Step 43136: {'lr': 0.00041059570163832555, 'samples': 22086144, 'steps': 43136, 'loss/train': 1.5917460918426514} +03/05/2022 16:10:53 - INFO - codeparrot_training - Step 43137: {'lr': 0.00041059163459855066, 'samples': 22086656, 'steps': 43137, 'loss/train': 0.9320889115333557} +03/05/2022 16:10:54 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/05/2022 16:10:58 - INFO - codeparrot_training - Step 43138: {'lr': 0.00041058756748641573, 'samples': 22087168, 'steps': 43138, 'loss/train': 1.4843759536743164} +03/05/2022 16:11:01 - INFO - codeparrot_training - Step 43139: {'lr': 0.0004105835003019225, 'samples': 22087680, 'steps': 43139, 'loss/train': 1.759590983390808} +03/05/2022 16:11:02 - INFO - codeparrot_training - Skipping example with length 494 (seq_length=1024) +03/05/2022 16:11:06 - INFO - codeparrot_training - Step 43140: {'lr': 0.00041057943304507273, 'samples': 22088192, 'steps': 43140, 'loss/train': 0.31049492955207825} +03/05/2022 16:11:10 - INFO - codeparrot_training - Step 43141: {'lr': 0.0004105753657158684, 'samples': 22088704, 'steps': 43141, 'loss/train': 1.4876688718795776} +03/05/2022 16:11:10 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/05/2022 16:11:15 - INFO - codeparrot_training - Step 43142: {'lr': 0.00041057129831431133, 'samples': 22089216, 'steps': 43142, 'loss/train': 2.0918872356414795} +03/05/2022 16:11:19 - INFO - codeparrot_training - Step 43143: {'lr': 0.00041056723084040324, 'samples': 22089728, 'steps': 43143, 'loss/train': 2.170022487640381} +03/05/2022 16:11:22 - INFO - codeparrot_training - Step 43144: {'lr': 0.00041056316329414613, 'samples': 22090240, 'steps': 43144, 'loss/train': 0.778674304485321} +03/05/2022 16:11:22 - INFO - codeparrot_training - Skipping example with length 197 (seq_length=1024) +03/05/2022 16:11:27 - INFO - codeparrot_training - Step 43145: {'lr': 0.00041055909567554166, 'samples': 22090752, 'steps': 43145, 'loss/train': 1.5231820344924927} +03/05/2022 16:11:30 - INFO - codeparrot_training - Step 43146: {'lr': 0.00041055502798459175, 'samples': 22091264, 'steps': 43146, 'loss/train': 2.150735855102539} +03/05/2022 16:11:30 - INFO - codeparrot_training - Skipping example with length 533 (seq_length=1024) +03/05/2022 16:11:35 - INFO - codeparrot_training - Step 43147: {'lr': 0.00041055096022129823, 'samples': 22091776, 'steps': 43147, 'loss/train': 1.4834394454956055} +03/05/2022 16:11:39 - INFO - codeparrot_training - Step 43148: {'lr': 0.0004105468923856629, 'samples': 22092288, 'steps': 43148, 'loss/train': 2.0487375259399414} +03/05/2022 16:11:39 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/05/2022 16:11:44 - INFO - codeparrot_training - Step 43149: {'lr': 0.00041054282447768763, 'samples': 22092800, 'steps': 43149, 'loss/train': 0.9862779974937439} +03/05/2022 16:11:47 - INFO - codeparrot_training - Step 43150: {'lr': 0.00041053875649737424, 'samples': 22093312, 'steps': 43150, 'loss/train': 0.07504279911518097} +03/05/2022 16:11:47 - INFO - codeparrot_training - Skipping example with length 518 (seq_length=1024) +03/05/2022 16:11:52 - INFO - codeparrot_training - Step 43151: {'lr': 0.0004105346884447246, 'samples': 22093824, 'steps': 43151, 'loss/train': 0.6164417266845703} +03/05/2022 16:11:56 - INFO - codeparrot_training - Step 43152: {'lr': 0.00041053062031974055, 'samples': 22094336, 'steps': 43152, 'loss/train': 1.6074323654174805} +03/05/2022 16:11:56 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/05/2022 16:12:01 - INFO - codeparrot_training - Step 43153: {'lr': 0.00041052655212242377, 'samples': 22094848, 'steps': 43153, 'loss/train': 1.0334174633026123} +03/05/2022 16:12:04 - INFO - codeparrot_training - Step 43154: {'lr': 0.00041052248385277623, 'samples': 22095360, 'steps': 43154, 'loss/train': 1.5558253526687622} +03/05/2022 16:12:04 - INFO - codeparrot_training - Skipping example with length 962 (seq_length=1024) +03/05/2022 16:12:09 - INFO - codeparrot_training - Step 43155: {'lr': 0.0004105184155107998, 'samples': 22095872, 'steps': 43155, 'loss/train': 2.41328763961792} +03/05/2022 16:12:12 - INFO - codeparrot_training - Step 43156: {'lr': 0.00041051434709649614, 'samples': 22096384, 'steps': 43156, 'loss/train': 1.7105889320373535} +03/05/2022 16:12:12 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/05/2022 16:12:18 - INFO - codeparrot_training - Step 43157: {'lr': 0.0004105102786098672, 'samples': 22096896, 'steps': 43157, 'loss/train': 2.016397476196289} +03/05/2022 16:12:21 - INFO - codeparrot_training - Skipping example with length 392 (seq_length=1024) +03/05/2022 16:12:23 - INFO - codeparrot_training - Step 43158: {'lr': 0.0004105062100509149, 'samples': 22097408, 'steps': 43158, 'loss/train': 1.8025619983673096} +03/05/2022 16:12:27 - INFO - codeparrot_training - Step 43159: {'lr': 0.000410502141419641, 'samples': 22097920, 'steps': 43159, 'loss/train': 1.3668476343154907} +03/05/2022 16:12:30 - INFO - codeparrot_training - Step 43160: {'lr': 0.00041049807271604724, 'samples': 22098432, 'steps': 43160, 'loss/train': 3.878596544265747} +03/05/2022 16:12:31 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/05/2022 16:12:35 - INFO - codeparrot_training - Step 43161: {'lr': 0.00041049400394013545, 'samples': 22098944, 'steps': 43161, 'loss/train': 1.3743218183517456} +03/05/2022 16:12:38 - INFO - codeparrot_training - Step 43162: {'lr': 0.0004104899350919077, 'samples': 22099456, 'steps': 43162, 'loss/train': 1.2171283960342407} +03/05/2022 16:12:39 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/05/2022 16:12:44 - INFO - codeparrot_training - Step 43163: {'lr': 0.0004104858661713655, 'samples': 22099968, 'steps': 43163, 'loss/train': 1.9445648193359375} +03/05/2022 16:12:47 - INFO - codeparrot_training - Step 43164: {'lr': 0.00041048179717851095, 'samples': 22100480, 'steps': 43164, 'loss/train': 2.04170298576355} +03/05/2022 16:12:48 - INFO - codeparrot_training - Skipping example with length 591 (seq_length=1024) +03/05/2022 16:12:52 - INFO - codeparrot_training - Step 43165: {'lr': 0.00041047772811334584, 'samples': 22100992, 'steps': 43165, 'loss/train': 2.1508636474609375} +03/05/2022 16:12:55 - INFO - codeparrot_training - Step 43166: {'lr': 0.0004104736589758719, 'samples': 22101504, 'steps': 43166, 'loss/train': 2.0004007816314697} +03/05/2022 16:12:56 - INFO - codeparrot_training - Skipping example with length 736 (seq_length=1024) +03/05/2022 16:13:01 - INFO - codeparrot_training - Step 43167: {'lr': 0.0004104695897660909, 'samples': 22102016, 'steps': 43167, 'loss/train': 1.569664478302002} +03/05/2022 16:13:04 - INFO - codeparrot_training - Step 43168: {'lr': 0.0004104655204840048, 'samples': 22102528, 'steps': 43168, 'loss/train': 1.326568365097046} +03/05/2022 16:13:05 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/05/2022 16:13:09 - INFO - codeparrot_training - Step 43169: {'lr': 0.0004104614511296155, 'samples': 22103040, 'steps': 43169, 'loss/train': 1.8442031145095825} +03/05/2022 16:13:12 - INFO - codeparrot_training - Step 43170: {'lr': 0.00041045738170292467, 'samples': 22103552, 'steps': 43170, 'loss/train': 2.9598910808563232} +03/05/2022 16:13:13 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/05/2022 16:13:17 - INFO - codeparrot_training - Step 43171: {'lr': 0.0004104533122039342, 'samples': 22104064, 'steps': 43171, 'loss/train': 1.3943384885787964} +03/05/2022 16:13:21 - INFO - codeparrot_training - Step 43172: {'lr': 0.00041044924263264603, 'samples': 22104576, 'steps': 43172, 'loss/train': 1.8599486351013184} +03/05/2022 16:13:21 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/05/2022 16:13:26 - INFO - codeparrot_training - Step 43173: {'lr': 0.00041044517298906194, 'samples': 22105088, 'steps': 43173, 'loss/train': 1.2869185209274292} +03/05/2022 16:13:29 - INFO - codeparrot_training - Step 43174: {'lr': 0.0004104411032731836, 'samples': 22105600, 'steps': 43174, 'loss/train': 2.4233741760253906} +03/05/2022 16:13:30 - INFO - codeparrot_training - Skipping example with length 222 (seq_length=1024) +03/05/2022 16:13:34 - INFO - codeparrot_training - Step 43175: {'lr': 0.00041043703348501304, 'samples': 22106112, 'steps': 43175, 'loss/train': 1.8125391006469727} +03/05/2022 16:13:38 - INFO - codeparrot_training - Step 43176: {'lr': 0.0004104329636245521, 'samples': 22106624, 'steps': 43176, 'loss/train': 1.748113989830017} +03/05/2022 16:13:38 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/05/2022 16:13:43 - INFO - codeparrot_training - Step 43177: {'lr': 0.0004104288936918024, 'samples': 22107136, 'steps': 43177, 'loss/train': 1.557985782623291} +03/05/2022 16:13:46 - INFO - codeparrot_training - Step 43178: {'lr': 0.00041042482368676604, 'samples': 22107648, 'steps': 43178, 'loss/train': 0.6941673159599304} +03/05/2022 16:13:47 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/05/2022 16:13:51 - INFO - codeparrot_training - Step 43179: {'lr': 0.00041042075360944464, 'samples': 22108160, 'steps': 43179, 'loss/train': 1.6014760732650757} +03/05/2022 16:13:55 - INFO - codeparrot_training - Step 43180: {'lr': 0.0004104166834598402, 'samples': 22108672, 'steps': 43180, 'loss/train': 3.426985502243042} +03/05/2022 16:13:55 - INFO - codeparrot_training - Skipping example with length 372 (seq_length=1024) +03/05/2022 16:14:00 - INFO - codeparrot_training - Step 43181: {'lr': 0.00041041261323795437, 'samples': 22109184, 'steps': 43181, 'loss/train': 1.8814622163772583} +03/05/2022 16:14:03 - INFO - codeparrot_training - Step 43182: {'lr': 0.0004104085429437892, 'samples': 22109696, 'steps': 43182, 'loss/train': 1.997185230255127} +03/05/2022 16:14:03 - INFO - codeparrot_training - Skipping example with length 741 (seq_length=1024) +03/05/2022 16:14:08 - INFO - codeparrot_training - Step 43183: {'lr': 0.00041040447257734635, 'samples': 22110208, 'steps': 43183, 'loss/train': 1.8427027463912964} +03/05/2022 16:14:11 - INFO - codeparrot_training - Step 43184: {'lr': 0.00041040040213862774, 'samples': 22110720, 'steps': 43184, 'loss/train': 1.0233168601989746} +03/05/2022 16:14:11 - INFO - codeparrot_training - Skipping example with length 819 (seq_length=1024) +03/05/2022 16:14:17 - INFO - codeparrot_training - Step 43185: {'lr': 0.00041039633162763523, 'samples': 22111232, 'steps': 43185, 'loss/train': 1.1088801622390747} +03/05/2022 16:14:20 - INFO - codeparrot_training - Step 43186: {'lr': 0.00041039226104437056, 'samples': 22111744, 'steps': 43186, 'loss/train': 1.1567984819412231} +03/05/2022 16:14:20 - INFO - codeparrot_training - Skipping example with length 183 (seq_length=1024) +03/05/2022 16:14:25 - INFO - codeparrot_training - Step 43187: {'lr': 0.0004103881903888356, 'samples': 22112256, 'steps': 43187, 'loss/train': 2.084285259246826} +03/05/2022 16:14:28 - INFO - codeparrot_training - Step 43188: {'lr': 0.0004103841196610322, 'samples': 22112768, 'steps': 43188, 'loss/train': 1.1020394563674927} +03/05/2022 16:14:29 - INFO - codeparrot_training - Skipping example with length 400 (seq_length=1024) +03/05/2022 16:14:34 - INFO - codeparrot_training - Step 43189: {'lr': 0.0004103800488609622, 'samples': 22113280, 'steps': 43189, 'loss/train': 1.9646868705749512} +03/05/2022 16:14:37 - INFO - codeparrot_training - Step 43190: {'lr': 0.0004103759779886274, 'samples': 22113792, 'steps': 43190, 'loss/train': 0.8072062134742737} +03/05/2022 16:14:37 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/05/2022 16:14:42 - INFO - codeparrot_training - Step 43191: {'lr': 0.0004103719070440297, 'samples': 22114304, 'steps': 43191, 'loss/train': 1.7499758005142212} +03/05/2022 16:14:46 - INFO - codeparrot_training - Step 43192: {'lr': 0.00041036783602717086, 'samples': 22114816, 'steps': 43192, 'loss/train': 2.080030679702759} +03/05/2022 16:14:46 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/05/2022 16:14:51 - INFO - codeparrot_training - Step 43193: {'lr': 0.00041036376493805286, 'samples': 22115328, 'steps': 43193, 'loss/train': 1.2478511333465576} +03/05/2022 16:14:54 - INFO - codeparrot_training - Step 43194: {'lr': 0.0004103596937766773, 'samples': 22115840, 'steps': 43194, 'loss/train': 2.2631237506866455} +03/05/2022 16:14:54 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/05/2022 16:14:59 - INFO - codeparrot_training - Step 43195: {'lr': 0.00041035562254304614, 'samples': 22116352, 'steps': 43195, 'loss/train': 1.521174669265747} +03/05/2022 16:15:02 - INFO - codeparrot_training - Step 43196: {'lr': 0.00041035155123716127, 'samples': 22116864, 'steps': 43196, 'loss/train': 1.5661031007766724} +03/05/2022 16:15:02 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/05/2022 16:15:08 - INFO - codeparrot_training - Step 43197: {'lr': 0.00041034747985902446, 'samples': 22117376, 'steps': 43197, 'loss/train': 1.6527433395385742} +03/05/2022 16:15:11 - INFO - codeparrot_training - Step 43198: {'lr': 0.0004103434084086375, 'samples': 22117888, 'steps': 43198, 'loss/train': 2.2505178451538086} +03/05/2022 16:15:11 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/05/2022 16:15:16 - INFO - codeparrot_training - Step 43199: {'lr': 0.0004103393368860023, 'samples': 22118400, 'steps': 43199, 'loss/train': 2.1395835876464844} +03/05/2022 16:15:19 - INFO - codeparrot_training - Step 43200: {'lr': 0.0004103352652911206, 'samples': 22118912, 'steps': 43200, 'loss/train': 0.9425106644630432} +03/05/2022 16:15:19 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/05/2022 16:15:25 - INFO - codeparrot_training - Step 43201: {'lr': 0.0004103311936239944, 'samples': 22119424, 'steps': 43201, 'loss/train': 1.9217737913131714} +03/05/2022 16:15:28 - INFO - codeparrot_training - Step 43202: {'lr': 0.0004103271218846254, 'samples': 22119936, 'steps': 43202, 'loss/train': 1.6151843070983887} +03/05/2022 16:15:28 - INFO - codeparrot_training - Skipping example with length 247 (seq_length=1024) +03/05/2022 16:15:33 - INFO - codeparrot_training - Step 43203: {'lr': 0.00041032305007301554, 'samples': 22120448, 'steps': 43203, 'loss/train': 1.7006281614303589} +03/05/2022 16:15:36 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/05/2022 16:15:38 - INFO - codeparrot_training - Step 43204: {'lr': 0.00041031897818916645, 'samples': 22120960, 'steps': 43204, 'loss/train': 1.8964240550994873} +03/05/2022 16:15:41 - INFO - codeparrot_training - Step 43205: {'lr': 0.0004103149062330802, 'samples': 22121472, 'steps': 43205, 'loss/train': 2.3115744590759277} +03/05/2022 16:15:44 - INFO - codeparrot_training - Skipping example with length 525 (seq_length=1024) +03/05/2022 16:15:47 - INFO - codeparrot_training - Step 43206: {'lr': 0.00041031083420475854, 'samples': 22121984, 'steps': 43206, 'loss/train': 2.4273064136505127} +03/05/2022 16:15:50 - INFO - codeparrot_training - Step 43207: {'lr': 0.00041030676210420324, 'samples': 22122496, 'steps': 43207, 'loss/train': 1.872268557548523} +03/05/2022 16:15:53 - INFO - codeparrot_training - Step 43208: {'lr': 0.0004103026899314162, 'samples': 22123008, 'steps': 43208, 'loss/train': 1.7310543060302734} +03/05/2022 16:15:53 - INFO - codeparrot_training - Skipping example with length 636 (seq_length=1024) +03/05/2022 16:15:58 - INFO - codeparrot_training - Step 43209: {'lr': 0.00041029861768639934, 'samples': 22123520, 'steps': 43209, 'loss/train': 1.9417424201965332} +03/05/2022 16:16:02 - INFO - codeparrot_training - Step 43210: {'lr': 0.0004102945453691542, 'samples': 22124032, 'steps': 43210, 'loss/train': 1.2173815965652466} +03/05/2022 16:16:02 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/05/2022 16:16:07 - INFO - codeparrot_training - Step 43211: {'lr': 0.00041029047297968293, 'samples': 22124544, 'steps': 43211, 'loss/train': 2.1224944591522217} +03/05/2022 16:16:10 - INFO - codeparrot_training - Step 43212: {'lr': 0.00041028640051798726, 'samples': 22125056, 'steps': 43212, 'loss/train': 1.7711994647979736} +03/05/2022 16:16:11 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/05/2022 16:16:16 - INFO - codeparrot_training - Step 43213: {'lr': 0.000410282327984069, 'samples': 22125568, 'steps': 43213, 'loss/train': 2.022165298461914} +03/05/2022 16:16:19 - INFO - codeparrot_training - Step 43214: {'lr': 0.00041027825537792993, 'samples': 22126080, 'steps': 43214, 'loss/train': 1.6552820205688477} +03/05/2022 16:16:19 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/05/2022 16:16:24 - INFO - codeparrot_training - Step 43215: {'lr': 0.0004102741826995721, 'samples': 22126592, 'steps': 43215, 'loss/train': 1.7351596355438232} +03/05/2022 16:16:27 - INFO - codeparrot_training - Step 43216: {'lr': 0.000410270109948997, 'samples': 22127104, 'steps': 43216, 'loss/train': 1.6724952459335327} +03/05/2022 16:16:28 - INFO - codeparrot_training - Skipping example with length 62 (seq_length=1024) +03/05/2022 16:16:32 - INFO - codeparrot_training - Step 43217: {'lr': 0.0004102660371262068, 'samples': 22127616, 'steps': 43217, 'loss/train': 0.5624778270721436} +03/05/2022 16:16:36 - INFO - codeparrot_training - Step 43218: {'lr': 0.0004102619642312031, 'samples': 22128128, 'steps': 43218, 'loss/train': 1.2636492252349854} +03/05/2022 16:16:36 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/05/2022 16:16:41 - INFO - codeparrot_training - Step 43219: {'lr': 0.00041025789126398793, 'samples': 22128640, 'steps': 43219, 'loss/train': 1.4325833320617676} +03/05/2022 16:16:44 - INFO - codeparrot_training - Step 43220: {'lr': 0.000410253818224563, 'samples': 22129152, 'steps': 43220, 'loss/train': 1.560046672821045} +03/05/2022 16:16:45 - INFO - codeparrot_training - Skipping example with length 414 (seq_length=1024) +03/05/2022 16:16:49 - INFO - codeparrot_training - Step 43221: {'lr': 0.0004102497451129302, 'samples': 22129664, 'steps': 43221, 'loss/train': 1.0455875396728516} +03/05/2022 16:16:53 - INFO - codeparrot_training - Step 43222: {'lr': 0.00041024567192909125, 'samples': 22130176, 'steps': 43222, 'loss/train': 1.946094274520874} +03/05/2022 16:16:53 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/05/2022 16:16:58 - INFO - codeparrot_training - Step 43223: {'lr': 0.0004102415986730481, 'samples': 22130688, 'steps': 43223, 'loss/train': 2.1657912731170654} +03/05/2022 16:17:01 - INFO - codeparrot_training - Step 43224: {'lr': 0.0004102375253448026, 'samples': 22131200, 'steps': 43224, 'loss/train': 2.1871206760406494} +03/05/2022 16:17:02 - INFO - codeparrot_training - Skipping example with length 595 (seq_length=1024) +03/05/2022 16:17:06 - INFO - codeparrot_training - Step 43225: {'lr': 0.0004102334519443565, 'samples': 22131712, 'steps': 43225, 'loss/train': 1.885581612586975} +03/05/2022 16:17:10 - INFO - codeparrot_training - Step 43226: {'lr': 0.0004102293784717117, 'samples': 22132224, 'steps': 43226, 'loss/train': 2.225637912750244} +03/05/2022 16:17:11 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/05/2022 16:17:15 - INFO - codeparrot_training - Step 43227: {'lr': 0.00041022530492687006, 'samples': 22132736, 'steps': 43227, 'loss/train': 1.4710289239883423} +03/05/2022 16:17:18 - INFO - codeparrot_training - Step 43228: {'lr': 0.0004102212313098333, 'samples': 22133248, 'steps': 43228, 'loss/train': 2.4985575675964355} +03/05/2022 16:17:19 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/05/2022 16:17:23 - INFO - codeparrot_training - Step 43229: {'lr': 0.00041021715762060336, 'samples': 22133760, 'steps': 43229, 'loss/train': 1.7442725896835327} +03/05/2022 16:17:27 - INFO - codeparrot_training - Step 43230: {'lr': 0.000410213083859182, 'samples': 22134272, 'steps': 43230, 'loss/train': 2.0833709239959717} +03/05/2022 16:17:28 - INFO - codeparrot_training - Skipping example with length 918 (seq_length=1024) +03/05/2022 16:17:32 - INFO - codeparrot_training - Step 43231: {'lr': 0.0004102090100255711, 'samples': 22134784, 'steps': 43231, 'loss/train': 1.6969125270843506} +03/05/2022 16:17:35 - INFO - codeparrot_training - Step 43232: {'lr': 0.00041020493611977263, 'samples': 22135296, 'steps': 43232, 'loss/train': 2.641444206237793} +03/05/2022 16:17:36 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/05/2022 16:17:40 - INFO - codeparrot_training - Step 43233: {'lr': 0.0004102008621417881, 'samples': 22135808, 'steps': 43233, 'loss/train': 1.8641031980514526} +03/05/2022 16:17:44 - INFO - codeparrot_training - Step 43234: {'lr': 0.0004101967880916196, 'samples': 22136320, 'steps': 43234, 'loss/train': 2.289092540740967} +03/05/2022 16:17:45 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/05/2022 16:17:49 - INFO - codeparrot_training - Step 43235: {'lr': 0.00041019271396926894, 'samples': 22136832, 'steps': 43235, 'loss/train': 1.8249212503433228} +03/05/2022 16:17:52 - INFO - codeparrot_training - Step 43236: {'lr': 0.0004101886397747379, 'samples': 22137344, 'steps': 43236, 'loss/train': 1.6895931959152222} +03/05/2022 16:17:53 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/05/2022 16:17:57 - INFO - codeparrot_training - Step 43237: {'lr': 0.0004101845655080283, 'samples': 22137856, 'steps': 43237, 'loss/train': 2.1939969062805176} +03/05/2022 16:18:00 - INFO - codeparrot_training - Step 43238: {'lr': 0.00041018049116914204, 'samples': 22138368, 'steps': 43238, 'loss/train': 1.3934543132781982} +03/05/2022 16:18:02 - INFO - codeparrot_training - Skipping example with length 417 (seq_length=1024) +03/05/2022 16:18:06 - INFO - codeparrot_training - Step 43239: {'lr': 0.00041017641675808095, 'samples': 22138880, 'steps': 43239, 'loss/train': 2.0057244300842285} +03/05/2022 16:18:09 - INFO - codeparrot_training - Step 43240: {'lr': 0.00041017234227484675, 'samples': 22139392, 'steps': 43240, 'loss/train': 1.9136486053466797} +03/05/2022 16:18:10 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/05/2022 16:18:14 - INFO - codeparrot_training - Step 43241: {'lr': 0.0004101682677194414, 'samples': 22139904, 'steps': 43241, 'loss/train': 1.637627124786377} +03/05/2022 16:18:17 - INFO - codeparrot_training - Step 43242: {'lr': 0.0004101641930918667, 'samples': 22140416, 'steps': 43242, 'loss/train': 2.117596387863159} +03/05/2022 16:18:19 - INFO - codeparrot_training - Skipping example with length 149 (seq_length=1024) +03/05/2022 16:18:23 - INFO - codeparrot_training - Step 43243: {'lr': 0.00041016011839212446, 'samples': 22140928, 'steps': 43243, 'loss/train': 1.7380921840667725} +03/05/2022 16:18:26 - INFO - codeparrot_training - Step 43244: {'lr': 0.0004101560436202166, 'samples': 22141440, 'steps': 43244, 'loss/train': 0.13180974125862122} +03/05/2022 16:18:27 - INFO - codeparrot_training - Skipping example with length 341 (seq_length=1024) +03/05/2022 16:18:31 - INFO - codeparrot_training - Step 43245: {'lr': 0.0004101519687761449, 'samples': 22141952, 'steps': 43245, 'loss/train': 2.1192612648010254} +03/05/2022 16:18:34 - INFO - codeparrot_training - Step 43246: {'lr': 0.00041014789385991114, 'samples': 22142464, 'steps': 43246, 'loss/train': 2.1228344440460205} +03/05/2022 16:18:36 - INFO - codeparrot_training - Skipping example with length 525 (seq_length=1024) +03/05/2022 16:18:40 - INFO - codeparrot_training - Step 43247: {'lr': 0.00041014381887151727, 'samples': 22142976, 'steps': 43247, 'loss/train': 2.1630313396453857} +03/05/2022 16:18:43 - INFO - codeparrot_training - Step 43248: {'lr': 0.00041013974381096503, 'samples': 22143488, 'steps': 43248, 'loss/train': 1.7150437831878662} +03/05/2022 16:18:44 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/05/2022 16:18:48 - INFO - codeparrot_training - Step 43249: {'lr': 0.00041013566867825627, 'samples': 22144000, 'steps': 43249, 'loss/train': 1.8639549016952515} +03/05/2022 16:18:51 - INFO - codeparrot_training - Step 43250: {'lr': 0.00041013159347339293, 'samples': 22144512, 'steps': 43250, 'loss/train': 1.2859350442886353} +03/05/2022 16:18:53 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/05/2022 16:18:57 - INFO - codeparrot_training - Step 43251: {'lr': 0.0004101275181963767, 'samples': 22145024, 'steps': 43251, 'loss/train': 1.7344311475753784} +03/05/2022 16:19:00 - INFO - codeparrot_training - Step 43252: {'lr': 0.0004101234428472095, 'samples': 22145536, 'steps': 43252, 'loss/train': 2.007427930831909} +03/05/2022 16:19:02 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/05/2022 16:19:05 - INFO - codeparrot_training - Step 43253: {'lr': 0.0004101193674258931, 'samples': 22146048, 'steps': 43253, 'loss/train': 0.5492433309555054} +03/05/2022 16:19:08 - INFO - codeparrot_training - Step 43254: {'lr': 0.00041011529193242947, 'samples': 22146560, 'steps': 43254, 'loss/train': 2.0629773139953613} +03/05/2022 16:19:10 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/05/2022 16:19:14 - INFO - codeparrot_training - Step 43255: {'lr': 0.00041011121636682024, 'samples': 22147072, 'steps': 43255, 'loss/train': 1.9129955768585205} +03/05/2022 16:19:17 - INFO - codeparrot_training - Step 43256: {'lr': 0.0004101071407290675, 'samples': 22147584, 'steps': 43256, 'loss/train': 1.7603520154953003} +03/05/2022 16:19:19 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/05/2022 16:19:22 - INFO - codeparrot_training - Step 43257: {'lr': 0.00041010306501917287, 'samples': 22148096, 'steps': 43257, 'loss/train': 1.894473671913147} +03/05/2022 16:19:25 - INFO - codeparrot_training - Step 43258: {'lr': 0.0004100989892371383, 'samples': 22148608, 'steps': 43258, 'loss/train': 0.7726580500602722} +03/05/2022 16:19:27 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/05/2022 16:19:31 - INFO - codeparrot_training - Step 43259: {'lr': 0.00041009491338296557, 'samples': 22149120, 'steps': 43259, 'loss/train': 1.3996249437332153} +03/05/2022 16:19:34 - INFO - codeparrot_training - Step 43260: {'lr': 0.00041009083745665654, 'samples': 22149632, 'steps': 43260, 'loss/train': 2.183591365814209} +03/05/2022 16:19:37 - INFO - codeparrot_training - Step 43261: {'lr': 0.0004100867614582131, 'samples': 22150144, 'steps': 43261, 'loss/train': 6.1011528968811035} +03/05/2022 16:19:38 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/05/2022 16:19:43 - INFO - codeparrot_training - Step 43262: {'lr': 0.00041008268538763703, 'samples': 22150656, 'steps': 43262, 'loss/train': 1.5988047122955322} +03/05/2022 16:19:46 - INFO - codeparrot_training - Step 43263: {'lr': 0.00041007860924493014, 'samples': 22151168, 'steps': 43263, 'loss/train': 2.19834566116333} +03/05/2022 16:19:47 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/05/2022 16:19:51 - INFO - codeparrot_training - Step 43264: {'lr': 0.0004100745330300943, 'samples': 22151680, 'steps': 43264, 'loss/train': 1.936963677406311} +03/05/2022 16:19:54 - INFO - codeparrot_training - Step 43265: {'lr': 0.0004100704567431314, 'samples': 22152192, 'steps': 43265, 'loss/train': 2.0785539150238037} +03/05/2022 16:19:55 - INFO - codeparrot_training - Skipping example with length 46 (seq_length=1024) +03/05/2022 16:19:59 - INFO - codeparrot_training - Step 43266: {'lr': 0.0004100663803840431, 'samples': 22152704, 'steps': 43266, 'loss/train': 0.9911612868309021} +03/05/2022 16:20:03 - INFO - codeparrot_training - Step 43267: {'lr': 0.0004100623039528315, 'samples': 22153216, 'steps': 43267, 'loss/train': 1.738268494606018} +03/05/2022 16:20:04 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/05/2022 16:20:08 - INFO - codeparrot_training - Step 43268: {'lr': 0.0004100582274494982, 'samples': 22153728, 'steps': 43268, 'loss/train': 1.795452356338501} +03/05/2022 16:20:11 - INFO - codeparrot_training - Step 43269: {'lr': 0.00041005415087404516, 'samples': 22154240, 'steps': 43269, 'loss/train': 1.401180386543274} +03/05/2022 16:20:13 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/05/2022 16:20:16 - INFO - codeparrot_training - Step 43270: {'lr': 0.0004100500742264742, 'samples': 22154752, 'steps': 43270, 'loss/train': 1.5610294342041016} +03/05/2022 16:20:19 - INFO - codeparrot_training - Step 43271: {'lr': 0.0004100459975067871, 'samples': 22155264, 'steps': 43271, 'loss/train': 2.042935371398926} +03/05/2022 16:20:21 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/05/2022 16:20:25 - INFO - codeparrot_training - Step 43272: {'lr': 0.0004100419207149858, 'samples': 22155776, 'steps': 43272, 'loss/train': 1.2920318841934204} +03/05/2022 16:20:28 - INFO - codeparrot_training - Step 43273: {'lr': 0.0004100378438510721, 'samples': 22156288, 'steps': 43273, 'loss/train': 1.3089075088500977} +03/05/2022 16:20:30 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/05/2022 16:20:33 - INFO - codeparrot_training - Step 43274: {'lr': 0.00041003376691504777, 'samples': 22156800, 'steps': 43274, 'loss/train': 2.227097749710083} +03/05/2022 16:20:36 - INFO - codeparrot_training - Step 43275: {'lr': 0.0004100296899069147, 'samples': 22157312, 'steps': 43275, 'loss/train': 1.2516378164291382} +03/05/2022 16:20:39 - INFO - codeparrot_training - Skipping example with length 304 (seq_length=1024) +03/05/2022 16:20:42 - INFO - codeparrot_training - Step 43276: {'lr': 0.0004100256128266747, 'samples': 22157824, 'steps': 43276, 'loss/train': 1.8187108039855957} +03/05/2022 16:20:45 - INFO - codeparrot_training - Step 43277: {'lr': 0.00041002153567432965, 'samples': 22158336, 'steps': 43277, 'loss/train': 2.208329439163208} +03/05/2022 16:20:47 - INFO - codeparrot_training - Skipping example with length 553 (seq_length=1024) +03/05/2022 16:20:50 - INFO - codeparrot_training - Step 43278: {'lr': 0.00041001745844988134, 'samples': 22158848, 'steps': 43278, 'loss/train': 1.4814634323120117} +03/05/2022 16:20:53 - INFO - codeparrot_training - Step 43279: {'lr': 0.00041001338115333175, 'samples': 22159360, 'steps': 43279, 'loss/train': 1.9411381483078003} +03/05/2022 16:20:55 - INFO - codeparrot_training - Skipping example with length 521 (seq_length=1024) +03/05/2022 16:20:59 - INFO - codeparrot_training - Step 43280: {'lr': 0.0004100093037846825, 'samples': 22159872, 'steps': 43280, 'loss/train': 1.612526297569275} +03/05/2022 16:21:02 - INFO - codeparrot_training - Step 43281: {'lr': 0.0004100052263439355, 'samples': 22160384, 'steps': 43281, 'loss/train': 1.5964199304580688} +03/05/2022 16:21:03 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/05/2022 16:21:07 - INFO - codeparrot_training - Step 43282: {'lr': 0.00041000114883109264, 'samples': 22160896, 'steps': 43282, 'loss/train': 1.277388334274292} +03/05/2022 16:21:10 - INFO - codeparrot_training - Step 43283: {'lr': 0.00040999707124615573, 'samples': 22161408, 'steps': 43283, 'loss/train': 1.9545600414276123} +03/05/2022 16:21:12 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/05/2022 16:21:16 - INFO - codeparrot_training - Step 43284: {'lr': 0.00040999299358912664, 'samples': 22161920, 'steps': 43284, 'loss/train': 1.9750738143920898} +03/05/2022 16:21:19 - INFO - codeparrot_training - Step 43285: {'lr': 0.00040998891586000716, 'samples': 22162432, 'steps': 43285, 'loss/train': 1.8884276151657104} +03/05/2022 16:21:20 - INFO - codeparrot_training - Skipping example with length 143 (seq_length=1024) +03/05/2022 16:21:24 - INFO - codeparrot_training - Step 43286: {'lr': 0.0004099848380587992, 'samples': 22162944, 'steps': 43286, 'loss/train': 2.157059907913208} +03/05/2022 16:21:27 - INFO - codeparrot_training - Step 43287: {'lr': 0.00040998076018550444, 'samples': 22163456, 'steps': 43287, 'loss/train': 2.160559892654419} +03/05/2022 16:21:29 - INFO - codeparrot_training - Skipping example with length 806 (seq_length=1024) +03/05/2022 16:21:33 - INFO - codeparrot_training - Step 43288: {'lr': 0.00040997668224012485, 'samples': 22163968, 'steps': 43288, 'loss/train': 1.0545042753219604} +03/05/2022 16:21:36 - INFO - codeparrot_training - Step 43289: {'lr': 0.00040997260422266223, 'samples': 22164480, 'steps': 43289, 'loss/train': 1.4148080348968506} +03/05/2022 16:21:38 - INFO - codeparrot_training - Skipping example with length 333 (seq_length=1024) +03/05/2022 16:21:41 - INFO - codeparrot_training - Step 43290: {'lr': 0.00040996852613311844, 'samples': 22164992, 'steps': 43290, 'loss/train': 2.2292966842651367} +03/05/2022 16:21:44 - INFO - codeparrot_training - Step 43291: {'lr': 0.00040996444797149526, 'samples': 22165504, 'steps': 43291, 'loss/train': 1.8932799100875854} +03/05/2022 16:21:46 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/05/2022 16:21:50 - INFO - codeparrot_training - Step 43292: {'lr': 0.0004099603697377946, 'samples': 22166016, 'steps': 43292, 'loss/train': 0.1687105894088745} +03/05/2022 16:21:53 - INFO - codeparrot_training - Step 43293: {'lr': 0.0004099562914320183, 'samples': 22166528, 'steps': 43293, 'loss/train': 1.3750301599502563} +03/05/2022 16:21:55 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/05/2022 16:21:58 - INFO - codeparrot_training - Step 43294: {'lr': 0.0004099522130541681, 'samples': 22167040, 'steps': 43294, 'loss/train': 1.9272620677947998} +03/05/2022 16:22:01 - INFO - codeparrot_training - Step 43295: {'lr': 0.000409948134604246, 'samples': 22167552, 'steps': 43295, 'loss/train': 1.5363616943359375} +03/05/2022 16:22:03 - INFO - codeparrot_training - Skipping example with length 881 (seq_length=1024) +03/05/2022 16:22:07 - INFO - codeparrot_training - Step 43296: {'lr': 0.0004099440560822536, 'samples': 22168064, 'steps': 43296, 'loss/train': 1.2806549072265625} +03/05/2022 16:22:10 - INFO - codeparrot_training - Step 43297: {'lr': 0.000409939977488193, 'samples': 22168576, 'steps': 43297, 'loss/train': 0.5378865003585815} +03/05/2022 16:22:11 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/05/2022 16:22:15 - INFO - codeparrot_training - Step 43298: {'lr': 0.0004099358988220658, 'samples': 22169088, 'steps': 43298, 'loss/train': 1.4013214111328125} +03/05/2022 16:22:18 - INFO - codeparrot_training - Step 43299: {'lr': 0.00040993182008387406, 'samples': 22169600, 'steps': 43299, 'loss/train': 2.9513144493103027} +03/05/2022 16:22:20 - INFO - codeparrot_training - Skipping example with length 404 (seq_length=1024) +03/05/2022 16:22:23 - INFO - codeparrot_training - Step 43300: {'lr': 0.0004099277412736195, 'samples': 22170112, 'steps': 43300, 'loss/train': 1.6932947635650635} +03/05/2022 16:22:27 - INFO - codeparrot_training - Step 43301: {'lr': 0.0004099236623913039, 'samples': 22170624, 'steps': 43301, 'loss/train': 1.8396910429000854} +03/05/2022 16:22:28 - INFO - codeparrot_training - Skipping example with length 736 (seq_length=1024) +03/05/2022 16:22:32 - INFO - codeparrot_training - Step 43302: {'lr': 0.0004099195834369292, 'samples': 22171136, 'steps': 43302, 'loss/train': 2.3354883193969727} +03/05/2022 16:22:35 - INFO - codeparrot_training - Step 43303: {'lr': 0.0004099155044104972, 'samples': 22171648, 'steps': 43303, 'loss/train': 1.7761167287826538} +03/05/2022 16:22:37 - INFO - codeparrot_training - Skipping example with length 681 (seq_length=1024) +03/05/2022 16:22:40 - INFO - codeparrot_training - Step 43304: {'lr': 0.00040991142531200973, 'samples': 22172160, 'steps': 43304, 'loss/train': 0.6873633861541748} +03/05/2022 16:22:44 - INFO - codeparrot_training - Step 43305: {'lr': 0.0004099073461414686, 'samples': 22172672, 'steps': 43305, 'loss/train': 2.08894419670105} +03/05/2022 16:22:45 - INFO - codeparrot_training - Skipping example with length 78 (seq_length=1024) +03/05/2022 16:22:49 - INFO - codeparrot_training - Step 43306: {'lr': 0.0004099032668988758, 'samples': 22173184, 'steps': 43306, 'loss/train': 2.921689987182617} +03/05/2022 16:22:52 - INFO - codeparrot_training - Step 43307: {'lr': 0.00040989918758423306, 'samples': 22173696, 'steps': 43307, 'loss/train': 1.773556113243103} +03/05/2022 16:22:55 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/05/2022 16:22:58 - INFO - codeparrot_training - Step 43308: {'lr': 0.0004098951081975421, 'samples': 22174208, 'steps': 43308, 'loss/train': 2.1210813522338867} +03/05/2022 16:23:01 - INFO - codeparrot_training - Step 43309: {'lr': 0.0004098910287388049, 'samples': 22174720, 'steps': 43309, 'loss/train': 1.2710038423538208} +03/05/2022 16:23:04 - INFO - codeparrot_training - Skipping example with length 276 (seq_length=1024) +03/05/2022 16:23:06 - INFO - codeparrot_training - Step 43310: {'lr': 0.00040988694920802326, 'samples': 22175232, 'steps': 43310, 'loss/train': 1.8920053243637085} +03/05/2022 16:23:09 - INFO - codeparrot_training - Step 43311: {'lr': 0.0004098828696051991, 'samples': 22175744, 'steps': 43311, 'loss/train': 1.2835326194763184} +03/05/2022 16:23:12 - INFO - codeparrot_training - Skipping example with length 33 (seq_length=1024) +03/05/2022 16:23:15 - INFO - codeparrot_training - Step 43312: {'lr': 0.00040987878993033417, 'samples': 22176256, 'steps': 43312, 'loss/train': 1.5760188102722168} +03/05/2022 16:23:18 - INFO - codeparrot_training - Step 43313: {'lr': 0.0004098747101834303, 'samples': 22176768, 'steps': 43313, 'loss/train': 2.234300374984741} +03/05/2022 16:23:20 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/05/2022 16:23:23 - INFO - codeparrot_training - Step 43314: {'lr': 0.00040987063036448934, 'samples': 22177280, 'steps': 43314, 'loss/train': 2.0715091228485107} +03/05/2022 16:23:26 - INFO - codeparrot_training - Step 43315: {'lr': 0.0004098665504735132, 'samples': 22177792, 'steps': 43315, 'loss/train': 1.4630672931671143} +03/05/2022 16:23:29 - INFO - codeparrot_training - Skipping example with length 99 (seq_length=1024) +03/05/2022 16:23:32 - INFO - codeparrot_training - Step 43316: {'lr': 0.0004098624705105036, 'samples': 22178304, 'steps': 43316, 'loss/train': 1.4232174158096313} +03/05/2022 16:23:35 - INFO - codeparrot_training - Step 43317: {'lr': 0.00040985839047546243, 'samples': 22178816, 'steps': 43317, 'loss/train': 1.433431625366211} +03/05/2022 16:23:38 - INFO - codeparrot_training - Step 43318: {'lr': 0.00040985431036839155, 'samples': 22179328, 'steps': 43318, 'loss/train': 6.560598373413086} +03/05/2022 16:23:38 - INFO - codeparrot_training - Skipping example with length 526 (seq_length=1024) +03/05/2022 16:23:43 - INFO - codeparrot_training - Step 43319: {'lr': 0.00040985023018929277, 'samples': 22179840, 'steps': 43319, 'loss/train': 1.5397597551345825} +03/05/2022 16:23:47 - INFO - codeparrot_training - Step 43320: {'lr': 0.000409846149938168, 'samples': 22180352, 'steps': 43320, 'loss/train': 1.7565006017684937} +03/05/2022 16:23:47 - INFO - codeparrot_training - Skipping example with length 62 (seq_length=1024) +03/05/2022 16:23:52 - INFO - codeparrot_training - Step 43321: {'lr': 0.000409842069615019, 'samples': 22180864, 'steps': 43321, 'loss/train': 1.2176549434661865} +03/05/2022 16:23:55 - INFO - codeparrot_training - Step 43322: {'lr': 0.0004098379892198476, 'samples': 22181376, 'steps': 43322, 'loss/train': 1.4262795448303223} +03/05/2022 16:23:55 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/05/2022 16:24:00 - INFO - codeparrot_training - Step 43323: {'lr': 0.0004098339087526557, 'samples': 22181888, 'steps': 43323, 'loss/train': 2.227158784866333} +03/05/2022 16:24:03 - INFO - codeparrot_training - Step 43324: {'lr': 0.00040982982821344505, 'samples': 22182400, 'steps': 43324, 'loss/train': 1.4903699159622192} +03/05/2022 16:24:03 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/05/2022 16:24:09 - INFO - codeparrot_training - Step 43325: {'lr': 0.0004098257476022176, 'samples': 22182912, 'steps': 43325, 'loss/train': 2.0726962089538574} +03/05/2022 16:24:12 - INFO - codeparrot_training - Step 43326: {'lr': 0.00040982166691897517, 'samples': 22183424, 'steps': 43326, 'loss/train': 1.7390996217727661} +03/05/2022 16:24:12 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/05/2022 16:24:17 - INFO - codeparrot_training - Step 43327: {'lr': 0.00040981758616371943, 'samples': 22183936, 'steps': 43327, 'loss/train': 1.3204691410064697} +03/05/2022 16:24:21 - INFO - codeparrot_training - Step 43328: {'lr': 0.00040981350533645245, 'samples': 22184448, 'steps': 43328, 'loss/train': 2.1645500659942627} +03/05/2022 16:24:21 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/05/2022 16:24:26 - INFO - codeparrot_training - Step 43329: {'lr': 0.00040980942443717596, 'samples': 22184960, 'steps': 43329, 'loss/train': 2.4344727993011475} +03/05/2022 16:24:29 - INFO - codeparrot_training - Step 43330: {'lr': 0.0004098053434658918, 'samples': 22185472, 'steps': 43330, 'loss/train': 2.095499277114868} +03/05/2022 16:24:29 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/05/2022 16:24:34 - INFO - codeparrot_training - Step 43331: {'lr': 0.0004098012624226018, 'samples': 22185984, 'steps': 43331, 'loss/train': 1.8170472383499146} +03/05/2022 16:24:37 - INFO - codeparrot_training - Step 43332: {'lr': 0.00040979718130730786, 'samples': 22186496, 'steps': 43332, 'loss/train': 1.6126526594161987} +03/05/2022 16:24:37 - INFO - codeparrot_training - Skipping example with length 7 (seq_length=1024) +03/05/2022 16:24:43 - INFO - codeparrot_training - Step 43333: {'lr': 0.0004097931001200118, 'samples': 22187008, 'steps': 43333, 'loss/train': 0.8030344843864441} +03/05/2022 16:24:46 - INFO - codeparrot_training - Step 43334: {'lr': 0.00040978901886071543, 'samples': 22187520, 'steps': 43334, 'loss/train': 2.249089241027832} +03/05/2022 16:24:46 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/05/2022 16:24:51 - INFO - codeparrot_training - Step 43335: {'lr': 0.0004097849375294205, 'samples': 22188032, 'steps': 43335, 'loss/train': 1.147964358329773} +03/05/2022 16:24:54 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/05/2022 16:24:56 - INFO - codeparrot_training - Step 43336: {'lr': 0.000409780856126129, 'samples': 22188544, 'steps': 43336, 'loss/train': 1.7460064888000488} +03/05/2022 16:25:00 - INFO - codeparrot_training - Step 43337: {'lr': 0.00040977677465084275, 'samples': 22189056, 'steps': 43337, 'loss/train': 1.572378158569336} +03/05/2022 16:25:02 - INFO - codeparrot_training - Skipping example with length 66 (seq_length=1024) +03/05/2022 16:25:05 - INFO - codeparrot_training - Step 43338: {'lr': 0.00040977269310356345, 'samples': 22189568, 'steps': 43338, 'loss/train': 0.9728551506996155} +03/05/2022 16:25:08 - INFO - codeparrot_training - Step 43339: {'lr': 0.00040976861148429313, 'samples': 22190080, 'steps': 43339, 'loss/train': 0.36501577496528625} +03/05/2022 16:25:11 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/05/2022 16:25:13 - INFO - codeparrot_training - Step 43340: {'lr': 0.0004097645297930335, 'samples': 22190592, 'steps': 43340, 'loss/train': 1.357017159461975} +03/05/2022 16:25:17 - INFO - codeparrot_training - Step 43341: {'lr': 0.00040976044802978645, 'samples': 22191104, 'steps': 43341, 'loss/train': 1.7620670795440674} +03/05/2022 16:25:20 - INFO - codeparrot_training - Skipping example with length 97 (seq_length=1024) +03/05/2022 16:25:22 - INFO - codeparrot_training - Step 43342: {'lr': 0.0004097563661945538, 'samples': 22191616, 'steps': 43342, 'loss/train': 1.441552996635437} +03/05/2022 16:25:25 - INFO - codeparrot_training - Step 43343: {'lr': 0.0004097522842873374, 'samples': 22192128, 'steps': 43343, 'loss/train': 2.7072691917419434} +03/05/2022 16:25:28 - INFO - codeparrot_training - Skipping example with length 954 (seq_length=1024) +03/05/2022 16:25:30 - INFO - codeparrot_training - Step 43344: {'lr': 0.0004097482023081391, 'samples': 22192640, 'steps': 43344, 'loss/train': 2.577505350112915} +03/05/2022 16:25:34 - INFO - codeparrot_training - Step 43345: {'lr': 0.00040974412025696067, 'samples': 22193152, 'steps': 43345, 'loss/train': 1.4229176044464111} +03/05/2022 16:25:36 - INFO - codeparrot_training - Skipping example with length 692 (seq_length=1024) +03/05/2022 16:25:39 - INFO - codeparrot_training - Step 43346: {'lr': 0.0004097400381338041, 'samples': 22193664, 'steps': 43346, 'loss/train': 1.234185814857483} +03/05/2022 16:25:42 - INFO - codeparrot_training - Step 43347: {'lr': 0.0004097359559386711, 'samples': 22194176, 'steps': 43347, 'loss/train': 1.3730961084365845} +03/05/2022 16:25:45 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/05/2022 16:25:47 - INFO - codeparrot_training - Step 43348: {'lr': 0.0004097318736715635, 'samples': 22194688, 'steps': 43348, 'loss/train': 2.0351779460906982} +03/05/2022 16:25:51 - INFO - codeparrot_training - Step 43349: {'lr': 0.0004097277913324832, 'samples': 22195200, 'steps': 43349, 'loss/train': 1.8163080215454102} +03/05/2022 16:25:53 - INFO - codeparrot_training - Skipping example with length 12 (seq_length=1024) +03/05/2022 16:25:56 - INFO - codeparrot_training - Step 43350: {'lr': 0.000409723708921432, 'samples': 22195712, 'steps': 43350, 'loss/train': 1.5514692068099976} +03/05/2022 16:25:59 - INFO - codeparrot_training - Step 43351: {'lr': 0.0004097196264384118, 'samples': 22196224, 'steps': 43351, 'loss/train': 0.38539794087409973} +03/05/2022 16:26:01 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/05/2022 16:26:04 - INFO - codeparrot_training - Step 43352: {'lr': 0.00040971554388342436, 'samples': 22196736, 'steps': 43352, 'loss/train': 2.0198237895965576} +03/05/2022 16:26:08 - INFO - codeparrot_training - Step 43353: {'lr': 0.00040971146125647165, 'samples': 22197248, 'steps': 43353, 'loss/train': 1.6382557153701782} +03/05/2022 16:26:11 - INFO - codeparrot_training - Step 43354: {'lr': 0.00040970737855755535, 'samples': 22197760, 'steps': 43354, 'loss/train': 1.3564033508300781} +03/05/2022 16:26:11 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/05/2022 16:26:16 - INFO - codeparrot_training - Step 43355: {'lr': 0.00040970329578667735, 'samples': 22198272, 'steps': 43355, 'loss/train': 2.3902106285095215} +03/05/2022 16:26:19 - INFO - codeparrot_training - Step 43356: {'lr': 0.00040969921294383956, 'samples': 22198784, 'steps': 43356, 'loss/train': 3.4535341262817383} +03/05/2022 16:26:19 - INFO - codeparrot_training - Skipping example with length 31 (seq_length=1024) +03/05/2022 16:26:24 - INFO - codeparrot_training - Step 43357: {'lr': 0.00040969513002904375, 'samples': 22199296, 'steps': 43357, 'loss/train': 1.6258068084716797} +03/05/2022 16:26:28 - INFO - codeparrot_training - Step 43358: {'lr': 0.0004096910470422918, 'samples': 22199808, 'steps': 43358, 'loss/train': 1.2153639793395996} +03/05/2022 16:26:28 - INFO - codeparrot_training - Skipping example with length 670 (seq_length=1024) +03/05/2022 16:26:33 - INFO - codeparrot_training - Step 43359: {'lr': 0.0004096869639835855, 'samples': 22200320, 'steps': 43359, 'loss/train': 1.4235236644744873} +03/05/2022 16:26:36 - INFO - codeparrot_training - Step 43360: {'lr': 0.0004096828808529267, 'samples': 22200832, 'steps': 43360, 'loss/train': 1.9123384952545166} +03/05/2022 16:26:36 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/05/2022 16:26:41 - INFO - codeparrot_training - Step 43361: {'lr': 0.0004096787976503173, 'samples': 22201344, 'steps': 43361, 'loss/train': 2.8673346042633057} +03/05/2022 16:26:45 - INFO - codeparrot_training - Step 43362: {'lr': 0.0004096747143757591, 'samples': 22201856, 'steps': 43362, 'loss/train': 1.7435725927352905} +03/05/2022 16:26:45 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/05/2022 16:26:50 - INFO - codeparrot_training - Step 43363: {'lr': 0.0004096706310292539, 'samples': 22202368, 'steps': 43363, 'loss/train': 2.056272506713867} +03/05/2022 16:26:53 - INFO - codeparrot_training - Step 43364: {'lr': 0.0004096665476108036, 'samples': 22202880, 'steps': 43364, 'loss/train': 1.076832890510559} +03/05/2022 16:26:53 - INFO - codeparrot_training - Skipping example with length 642 (seq_length=1024) +03/05/2022 16:26:59 - INFO - codeparrot_training - Step 43365: {'lr': 0.00040966246412040995, 'samples': 22203392, 'steps': 43365, 'loss/train': 1.996356725692749} +03/05/2022 16:27:02 - INFO - codeparrot_training - Step 43366: {'lr': 0.00040965838055807493, 'samples': 22203904, 'steps': 43366, 'loss/train': 2.3094301223754883} +03/05/2022 16:27:03 - INFO - codeparrot_training - Skipping example with length 367 (seq_length=1024) +03/05/2022 16:27:07 - INFO - codeparrot_training - Step 43367: {'lr': 0.00040965429692380034, 'samples': 22204416, 'steps': 43367, 'loss/train': 2.051711082458496} +03/05/2022 16:27:10 - INFO - codeparrot_training - Step 43368: {'lr': 0.00040965021321758796, 'samples': 22204928, 'steps': 43368, 'loss/train': 1.2962428331375122} +03/05/2022 16:27:11 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/05/2022 16:27:16 - INFO - codeparrot_training - Step 43369: {'lr': 0.00040964612943943964, 'samples': 22205440, 'steps': 43369, 'loss/train': 1.508206844329834} +03/05/2022 16:27:19 - INFO - codeparrot_training - Step 43370: {'lr': 0.00040964204558935726, 'samples': 22205952, 'steps': 43370, 'loss/train': 2.2256107330322266} +03/05/2022 16:27:19 - INFO - codeparrot_training - Skipping example with length 447 (seq_length=1024) +03/05/2022 16:27:24 - INFO - codeparrot_training - Step 43371: {'lr': 0.00040963796166734257, 'samples': 22206464, 'steps': 43371, 'loss/train': 1.6218324899673462} +03/05/2022 16:27:27 - INFO - codeparrot_training - Step 43372: {'lr': 0.00040963387767339757, 'samples': 22206976, 'steps': 43372, 'loss/train': 1.4922221899032593} +03/05/2022 16:27:28 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/05/2022 16:27:32 - INFO - codeparrot_training - Step 43373: {'lr': 0.00040962979360752394, 'samples': 22207488, 'steps': 43373, 'loss/train': 2.066102981567383} +03/05/2022 16:27:36 - INFO - codeparrot_training - Step 43374: {'lr': 0.0004096257094697236, 'samples': 22208000, 'steps': 43374, 'loss/train': 1.886871576309204} +03/05/2022 16:27:36 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/05/2022 16:27:41 - INFO - codeparrot_training - Step 43375: {'lr': 0.00040962162525999833, 'samples': 22208512, 'steps': 43375, 'loss/train': 2.311516761779785} +03/05/2022 16:27:44 - INFO - codeparrot_training - Step 43376: {'lr': 0.00040961754097835015, 'samples': 22209024, 'steps': 43376, 'loss/train': 2.1094181537628174} +03/05/2022 16:27:45 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/05/2022 16:27:50 - INFO - codeparrot_training - Step 43377: {'lr': 0.00040961345662478065, 'samples': 22209536, 'steps': 43377, 'loss/train': 1.2571252584457397} +03/05/2022 16:27:53 - INFO - codeparrot_training - Step 43378: {'lr': 0.00040960937219929186, 'samples': 22210048, 'steps': 43378, 'loss/train': 1.975334882736206} +03/05/2022 16:27:53 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/05/2022 16:27:58 - INFO - codeparrot_training - Step 43379: {'lr': 0.00040960528770188554, 'samples': 22210560, 'steps': 43379, 'loss/train': 1.0472302436828613} +03/05/2022 16:28:01 - INFO - codeparrot_training - Step 43380: {'lr': 0.00040960120313256356, 'samples': 22211072, 'steps': 43380, 'loss/train': 1.998261570930481} +03/05/2022 16:28:02 - INFO - codeparrot_training - Skipping example with length 368 (seq_length=1024) +03/05/2022 16:28:07 - INFO - codeparrot_training - Step 43381: {'lr': 0.0004095971184913277, 'samples': 22211584, 'steps': 43381, 'loss/train': 1.7994533777236938} +03/05/2022 16:28:10 - INFO - codeparrot_training - Step 43382: {'lr': 0.0004095930337781798, 'samples': 22212096, 'steps': 43382, 'loss/train': 1.8848236799240112} +03/05/2022 16:28:10 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/05/2022 16:28:15 - INFO - codeparrot_training - Step 43383: {'lr': 0.00040958894899312183, 'samples': 22212608, 'steps': 43383, 'loss/train': 1.3643547296524048} +03/05/2022 16:28:18 - INFO - codeparrot_training - Step 43384: {'lr': 0.0004095848641361555, 'samples': 22213120, 'steps': 43384, 'loss/train': 2.758451461791992} +03/05/2022 16:28:20 - INFO - codeparrot_training - Skipping example with length 437 (seq_length=1024) +03/05/2022 16:28:24 - INFO - codeparrot_training - Step 43385: {'lr': 0.0004095807792072827, 'samples': 22213632, 'steps': 43385, 'loss/train': 1.6296025514602661} +03/05/2022 16:28:27 - INFO - codeparrot_training - Step 43386: {'lr': 0.00040957669420650525, 'samples': 22214144, 'steps': 43386, 'loss/train': 2.6608428955078125} +03/05/2022 16:28:28 - INFO - codeparrot_training - Skipping example with length 363 (seq_length=1024) +03/05/2022 16:28:32 - INFO - codeparrot_training - Step 43387: {'lr': 0.000409572609133825, 'samples': 22214656, 'steps': 43387, 'loss/train': 1.7277415990829468} +03/05/2022 16:28:35 - INFO - codeparrot_training - Step 43388: {'lr': 0.00040956852398924383, 'samples': 22215168, 'steps': 43388, 'loss/train': 1.0673612356185913} +03/05/2022 16:28:37 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/05/2022 16:28:41 - INFO - codeparrot_training - Step 43389: {'lr': 0.0004095644387727635, 'samples': 22215680, 'steps': 43389, 'loss/train': 4.4261908531188965} +03/05/2022 16:28:44 - INFO - codeparrot_training - Step 43390: {'lr': 0.0004095603534843859, 'samples': 22216192, 'steps': 43390, 'loss/train': 1.3187798261642456} +03/05/2022 16:28:45 - INFO - codeparrot_training - Skipping example with length 667 (seq_length=1024) +03/05/2022 16:28:49 - INFO - codeparrot_training - Step 43391: {'lr': 0.00040955626812411297, 'samples': 22216704, 'steps': 43391, 'loss/train': 3.016716718673706} +03/05/2022 16:28:52 - INFO - codeparrot_training - Step 43392: {'lr': 0.0004095521826919463, 'samples': 22217216, 'steps': 43392, 'loss/train': 1.3573050498962402} +03/05/2022 16:28:54 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/05/2022 16:28:57 - INFO - codeparrot_training - Step 43393: {'lr': 0.0004095480971878879, 'samples': 22217728, 'steps': 43393, 'loss/train': 2.000455379486084} +03/05/2022 16:29:01 - INFO - codeparrot_training - Step 43394: {'lr': 0.0004095440116119397, 'samples': 22218240, 'steps': 43394, 'loss/train': 2.1848514080047607} +03/05/2022 16:29:02 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/05/2022 16:29:06 - INFO - codeparrot_training - Step 43395: {'lr': 0.00040953992596410335, 'samples': 22218752, 'steps': 43395, 'loss/train': 1.9530316591262817} +03/05/2022 16:29:09 - INFO - codeparrot_training - Step 43396: {'lr': 0.0004095358402443808, 'samples': 22219264, 'steps': 43396, 'loss/train': 1.3370455503463745} +03/05/2022 16:29:10 - INFO - codeparrot_training - Skipping example with length 115 (seq_length=1024) +03/05/2022 16:29:14 - INFO - codeparrot_training - Step 43397: {'lr': 0.0004095317544527738, 'samples': 22219776, 'steps': 43397, 'loss/train': 1.0897003412246704} +03/05/2022 16:29:17 - INFO - codeparrot_training - Step 43398: {'lr': 0.00040952766858928433, 'samples': 22220288, 'steps': 43398, 'loss/train': 2.345574140548706} +03/05/2022 16:29:19 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/05/2022 16:29:23 - INFO - codeparrot_training - Step 43399: {'lr': 0.0004095235826539141, 'samples': 22220800, 'steps': 43399, 'loss/train': 1.7572458982467651} +03/05/2022 16:29:26 - INFO - codeparrot_training - Step 43400: {'lr': 0.00040951949664666504, 'samples': 22221312, 'steps': 43400, 'loss/train': 1.6472071409225464} +03/05/2022 16:29:27 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/05/2022 16:29:31 - INFO - codeparrot_training - Step 43401: {'lr': 0.00040951541056753895, 'samples': 22221824, 'steps': 43401, 'loss/train': 1.2369054555892944} +03/05/2022 16:29:34 - INFO - codeparrot_training - Step 43402: {'lr': 0.00040951132441653773, 'samples': 22222336, 'steps': 43402, 'loss/train': 2.183459997177124} +03/05/2022 16:29:36 - INFO - codeparrot_training - Skipping example with length 84 (seq_length=1024) +03/05/2022 16:29:40 - INFO - codeparrot_training - Step 43403: {'lr': 0.00040950723819366307, 'samples': 22222848, 'steps': 43403, 'loss/train': 1.8384268283843994} +03/05/2022 16:29:43 - INFO - codeparrot_training - Step 43404: {'lr': 0.000409503151898917, 'samples': 22223360, 'steps': 43404, 'loss/train': 1.941799521446228} +03/05/2022 16:29:44 - INFO - codeparrot_training - Skipping example with length 574 (seq_length=1024) +03/05/2022 16:29:48 - INFO - codeparrot_training - Step 43405: {'lr': 0.0004094990655323012, 'samples': 22223872, 'steps': 43405, 'loss/train': 1.082533597946167} +03/05/2022 16:29:51 - INFO - codeparrot_training - Step 43406: {'lr': 0.00040949497909381757, 'samples': 22224384, 'steps': 43406, 'loss/train': 1.0185636281967163} +03/05/2022 16:29:53 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/05/2022 16:29:57 - INFO - codeparrot_training - Step 43407: {'lr': 0.000409490892583468, 'samples': 22224896, 'steps': 43407, 'loss/train': 1.0507245063781738} +03/05/2022 16:30:00 - INFO - codeparrot_training - Step 43408: {'lr': 0.0004094868060012543, 'samples': 22225408, 'steps': 43408, 'loss/train': 1.8886489868164062} +03/05/2022 16:30:01 - INFO - codeparrot_training - Skipping example with length 825 (seq_length=1024) +03/05/2022 16:30:05 - INFO - codeparrot_training - Step 43409: {'lr': 0.0004094827193471783, 'samples': 22225920, 'steps': 43409, 'loss/train': 1.2209147214889526} +03/05/2022 16:30:08 - INFO - codeparrot_training - Step 43410: {'lr': 0.00040947863262124186, 'samples': 22226432, 'steps': 43410, 'loss/train': 2.434523820877075} +03/05/2022 16:30:09 - INFO - codeparrot_training - Skipping example with length 281 (seq_length=1024) +03/05/2022 16:30:13 - INFO - codeparrot_training - Step 43411: {'lr': 0.0004094745458234468, 'samples': 22226944, 'steps': 43411, 'loss/train': 1.2861884832382202} +03/05/2022 16:30:17 - INFO - codeparrot_training - Step 43412: {'lr': 0.00040947045895379494, 'samples': 22227456, 'steps': 43412, 'loss/train': 1.6324081420898438} +03/05/2022 16:30:18 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/05/2022 16:30:22 - INFO - codeparrot_training - Step 43413: {'lr': 0.00040946637201228815, 'samples': 22227968, 'steps': 43413, 'loss/train': 2.4248392581939697} +03/05/2022 16:30:25 - INFO - codeparrot_training - Step 43414: {'lr': 0.00040946228499892835, 'samples': 22228480, 'steps': 43414, 'loss/train': 1.2386106252670288} +03/05/2022 16:30:26 - INFO - codeparrot_training - Skipping example with length 932 (seq_length=1024) +03/05/2022 16:30:30 - INFO - codeparrot_training - Step 43415: {'lr': 0.0004094581979137172, 'samples': 22228992, 'steps': 43415, 'loss/train': 2.3843882083892822} +03/05/2022 16:30:33 - INFO - codeparrot_training - Step 43416: {'lr': 0.00040945411075665674, 'samples': 22229504, 'steps': 43416, 'loss/train': 1.8741120100021362} +03/05/2022 16:30:34 - INFO - codeparrot_training - Skipping example with length 42 (seq_length=1024) +03/05/2022 16:30:39 - INFO - codeparrot_training - Step 43417: {'lr': 0.0004094500235277486, 'samples': 22230016, 'steps': 43417, 'loss/train': 1.5433290004730225} +03/05/2022 16:30:42 - INFO - codeparrot_training - Step 43418: {'lr': 0.0004094459362269949, 'samples': 22230528, 'steps': 43418, 'loss/train': 2.6721668243408203} +03/05/2022 16:30:42 - INFO - codeparrot_training - Skipping example with length 139 (seq_length=1024) +03/05/2022 16:30:47 - INFO - codeparrot_training - Step 43419: {'lr': 0.0004094418488543972, 'samples': 22231040, 'steps': 43419, 'loss/train': 6.720859527587891} +03/05/2022 16:30:50 - INFO - codeparrot_training - Step 43420: {'lr': 0.00040943776140995756, 'samples': 22231552, 'steps': 43420, 'loss/train': 1.2261567115783691} +03/05/2022 16:30:52 - INFO - codeparrot_training - Skipping example with length 705 (seq_length=1024) +03/05/2022 16:30:55 - INFO - codeparrot_training - Step 43421: {'lr': 0.0004094336738936777, 'samples': 22232064, 'steps': 43421, 'loss/train': 1.625981330871582} +03/05/2022 16:30:59 - INFO - codeparrot_training - Step 43422: {'lr': 0.0004094295863055594, 'samples': 22232576, 'steps': 43422, 'loss/train': 1.865250825881958} +03/05/2022 16:31:00 - INFO - codeparrot_training - Skipping example with length 93 (seq_length=1024) +03/05/2022 16:31:04 - INFO - codeparrot_training - Step 43423: {'lr': 0.0004094254986456046, 'samples': 22233088, 'steps': 43423, 'loss/train': 1.1052261590957642} +03/05/2022 16:31:07 - INFO - codeparrot_training - Step 43424: {'lr': 0.0004094214109138152, 'samples': 22233600, 'steps': 43424, 'loss/train': 2.057037591934204} +03/05/2022 16:31:10 - INFO - codeparrot_training - Skipping example with length 77 (seq_length=1024) +03/05/2022 16:31:13 - INFO - codeparrot_training - Step 43425: {'lr': 0.000409417323110193, 'samples': 22234112, 'steps': 43425, 'loss/train': 1.785339593887329} +03/05/2022 16:31:16 - INFO - codeparrot_training - Step 43426: {'lr': 0.00040941323523473975, 'samples': 22234624, 'steps': 43426, 'loss/train': 2.082720994949341} +03/05/2022 16:31:19 - INFO - codeparrot_training - Step 43427: {'lr': 0.00040940914728745736, 'samples': 22235136, 'steps': 43427, 'loss/train': 2.516047239303589} +03/05/2022 16:31:19 - INFO - codeparrot_training - Skipping example with length 193 (seq_length=1024) +03/05/2022 16:31:24 - INFO - codeparrot_training - Step 43428: {'lr': 0.0004094050592683477, 'samples': 22235648, 'steps': 43428, 'loss/train': 1.8581115007400513} +03/05/2022 16:31:28 - INFO - codeparrot_training - Step 43429: {'lr': 0.00040940097117741255, 'samples': 22236160, 'steps': 43429, 'loss/train': 2.093256711959839} +03/05/2022 16:31:28 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/05/2022 16:31:33 - INFO - codeparrot_training - Step 43430: {'lr': 0.00040939688301465377, 'samples': 22236672, 'steps': 43430, 'loss/train': 1.8424209356307983} +03/05/2022 16:31:36 - INFO - codeparrot_training - Step 43431: {'lr': 0.0004093927947800732, 'samples': 22237184, 'steps': 43431, 'loss/train': 2.3670756816864014} +03/05/2022 16:31:36 - INFO - codeparrot_training - Skipping example with length 145 (seq_length=1024) +03/05/2022 16:31:41 - INFO - codeparrot_training - Step 43432: {'lr': 0.00040938870647367275, 'samples': 22237696, 'steps': 43432, 'loss/train': 1.480378270149231} +03/05/2022 16:31:45 - INFO - codeparrot_training - Step 43433: {'lr': 0.0004093846180954542, 'samples': 22238208, 'steps': 43433, 'loss/train': 2.6717529296875} +03/05/2022 16:31:45 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/05/2022 16:31:50 - INFO - codeparrot_training - Step 43434: {'lr': 0.00040938052964541936, 'samples': 22238720, 'steps': 43434, 'loss/train': 1.94635808467865} +03/05/2022 16:31:53 - INFO - codeparrot_training - Step 43435: {'lr': 0.0004093764411235702, 'samples': 22239232, 'steps': 43435, 'loss/train': 0.8725064992904663} +03/05/2022 16:31:53 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/05/2022 16:31:58 - INFO - codeparrot_training - Step 43436: {'lr': 0.00040937235252990834, 'samples': 22239744, 'steps': 43436, 'loss/train': 1.1072663068771362} +03/05/2022 16:32:02 - INFO - codeparrot_training - Step 43437: {'lr': 0.00040936826386443585, 'samples': 22240256, 'steps': 43437, 'loss/train': 1.9217743873596191} +03/05/2022 16:32:02 - INFO - codeparrot_training - Skipping example with length 94 (seq_length=1024) +03/05/2022 16:32:07 - INFO - codeparrot_training - Step 43438: {'lr': 0.00040936417512715454, 'samples': 22240768, 'steps': 43438, 'loss/train': 1.182011365890503} +03/05/2022 16:32:10 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/05/2022 16:32:12 - INFO - codeparrot_training - Step 43439: {'lr': 0.00040936008631806603, 'samples': 22241280, 'steps': 43439, 'loss/train': 1.537466049194336} +03/05/2022 16:32:15 - INFO - codeparrot_training - Step 43440: {'lr': 0.00040935599743717243, 'samples': 22241792, 'steps': 43440, 'loss/train': 1.962570309638977} +03/05/2022 16:32:18 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/05/2022 16:32:20 - INFO - codeparrot_training - Step 43441: {'lr': 0.00040935190848447544, 'samples': 22242304, 'steps': 43441, 'loss/train': 2.3581345081329346} +03/05/2022 16:32:24 - INFO - codeparrot_training - Step 43442: {'lr': 0.000409347819459977, 'samples': 22242816, 'steps': 43442, 'loss/train': 1.8138973712921143} +03/05/2022 16:32:27 - INFO - codeparrot_training - Step 43443: {'lr': 0.0004093437303636788, 'samples': 22243328, 'steps': 43443, 'loss/train': 2.073587417602539} +03/05/2022 16:32:27 - INFO - codeparrot_training - Skipping example with length 176 (seq_length=1024) +03/05/2022 16:32:32 - INFO - codeparrot_training - Step 43444: {'lr': 0.0004093396411955829, 'samples': 22243840, 'steps': 43444, 'loss/train': 1.656822681427002} +03/05/2022 16:32:35 - INFO - codeparrot_training - Step 43445: {'lr': 0.0004093355519556908, 'samples': 22244352, 'steps': 43445, 'loss/train': 7.034511089324951} +03/05/2022 16:32:35 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/05/2022 16:32:41 - INFO - codeparrot_training - Step 43446: {'lr': 0.0004093314626440048, 'samples': 22244864, 'steps': 43446, 'loss/train': 1.6902729272842407} +03/05/2022 16:32:44 - INFO - codeparrot_training - Step 43447: {'lr': 0.0004093273732605264, 'samples': 22245376, 'steps': 43447, 'loss/train': 1.8294931650161743} +03/05/2022 16:32:44 - INFO - codeparrot_training - Skipping example with length 348 (seq_length=1024) +03/05/2022 16:32:49 - INFO - codeparrot_training - Step 43448: {'lr': 0.0004093232838052575, 'samples': 22245888, 'steps': 43448, 'loss/train': 1.8179471492767334} +03/05/2022 16:32:52 - INFO - codeparrot_training - Step 43449: {'lr': 0.0004093191942782001, 'samples': 22246400, 'steps': 43449, 'loss/train': 1.4248247146606445} +03/05/2022 16:32:52 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/05/2022 16:32:58 - INFO - codeparrot_training - Step 43450: {'lr': 0.0004093151046793558, 'samples': 22246912, 'steps': 43450, 'loss/train': 1.8495866060256958} +03/05/2022 16:33:01 - INFO - codeparrot_training - Skipping example with length 417 (seq_length=1024) +03/05/2022 16:33:03 - INFO - codeparrot_training - Step 43451: {'lr': 0.00040931101500872656, 'samples': 22247424, 'steps': 43451, 'loss/train': 2.4566843509674072} +03/05/2022 16:33:06 - INFO - codeparrot_training - Step 43452: {'lr': 0.00040930692526631443, 'samples': 22247936, 'steps': 43452, 'loss/train': 1.5582078695297241} +03/05/2022 16:33:09 - INFO - codeparrot_training - Skipping example with length 425 (seq_length=1024) +03/05/2022 16:33:11 - INFO - codeparrot_training - Step 43453: {'lr': 0.0004093028354521209, 'samples': 22248448, 'steps': 43453, 'loss/train': 1.3538379669189453} +03/05/2022 16:33:15 - INFO - codeparrot_training - Step 43454: {'lr': 0.000409298745566148, 'samples': 22248960, 'steps': 43454, 'loss/train': 1.7898694276809692} +03/05/2022 16:33:17 - INFO - codeparrot_training - Skipping example with length 467 (seq_length=1024) +03/05/2022 16:33:20 - INFO - codeparrot_training - Step 43455: {'lr': 0.00040929465560839753, 'samples': 22249472, 'steps': 43455, 'loss/train': 1.5181999206542969} +03/05/2022 16:33:23 - INFO - codeparrot_training - Step 43456: {'lr': 0.00040929056557887137, 'samples': 22249984, 'steps': 43456, 'loss/train': 1.8687688112258911} +03/05/2022 16:33:25 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/05/2022 16:33:28 - INFO - codeparrot_training - Step 43457: {'lr': 0.0004092864754775713, 'samples': 22250496, 'steps': 43457, 'loss/train': 1.209481954574585} +03/05/2022 16:33:31 - INFO - codeparrot_training - Step 43458: {'lr': 0.00040928238530449926, 'samples': 22251008, 'steps': 43458, 'loss/train': 1.4416886568069458} +03/05/2022 16:33:34 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/05/2022 16:33:37 - INFO - codeparrot_training - Step 43459: {'lr': 0.00040927829505965694, 'samples': 22251520, 'steps': 43459, 'loss/train': 2.0315394401550293} +03/05/2022 16:33:40 - INFO - codeparrot_training - Step 43460: {'lr': 0.00040927420474304646, 'samples': 22252032, 'steps': 43460, 'loss/train': 1.5098377466201782} +03/05/2022 16:33:42 - INFO - codeparrot_training - Skipping example with length 724 (seq_length=1024) +03/05/2022 16:33:45 - INFO - codeparrot_training - Step 43461: {'lr': 0.00040927011435466933, 'samples': 22252544, 'steps': 43461, 'loss/train': 1.8692922592163086} +03/05/2022 16:33:48 - INFO - codeparrot_training - Step 43462: {'lr': 0.0004092660238945276, 'samples': 22253056, 'steps': 43462, 'loss/train': 0.7996223568916321} +03/05/2022 16:33:50 - INFO - codeparrot_training - Skipping example with length 529 (seq_length=1024) +03/05/2022 16:33:54 - INFO - codeparrot_training - Step 43463: {'lr': 0.00040926193336262304, 'samples': 22253568, 'steps': 43463, 'loss/train': 1.5729109048843384} +03/05/2022 16:33:57 - INFO - codeparrot_training - Step 43464: {'lr': 0.0004092578427589575, 'samples': 22254080, 'steps': 43464, 'loss/train': 2.0172555446624756} +03/05/2022 16:33:59 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/05/2022 16:34:02 - INFO - codeparrot_training - Step 43465: {'lr': 0.0004092537520835328, 'samples': 22254592, 'steps': 43465, 'loss/train': 2.3409149646759033} +03/05/2022 16:34:05 - INFO - codeparrot_training - Step 43466: {'lr': 0.0004092496613363509, 'samples': 22255104, 'steps': 43466, 'loss/train': 1.2438368797302246} +03/05/2022 16:34:07 - INFO - codeparrot_training - Skipping example with length 813 (seq_length=1024) +03/05/2022 16:34:10 - INFO - codeparrot_training - Step 43467: {'lr': 0.0004092455705174135, 'samples': 22255616, 'steps': 43467, 'loss/train': 1.6945438385009766} +03/05/2022 16:34:14 - INFO - codeparrot_training - Step 43468: {'lr': 0.00040924147962672253, 'samples': 22256128, 'steps': 43468, 'loss/train': 1.7372034788131714} +03/05/2022 16:34:16 - INFO - codeparrot_training - Skipping example with length 389 (seq_length=1024) +03/05/2022 16:34:19 - INFO - codeparrot_training - Step 43469: {'lr': 0.00040923738866427986, 'samples': 22256640, 'steps': 43469, 'loss/train': 1.73540461063385} +03/05/2022 16:34:22 - INFO - codeparrot_training - Step 43470: {'lr': 0.00040923329763008714, 'samples': 22257152, 'steps': 43470, 'loss/train': 1.5015273094177246} +03/05/2022 16:34:24 - INFO - codeparrot_training - Skipping example with length 586 (seq_length=1024) +03/05/2022 16:34:27 - INFO - codeparrot_training - Step 43471: {'lr': 0.0004092292065241464, 'samples': 22257664, 'steps': 43471, 'loss/train': 1.4789345264434814} +03/05/2022 16:34:31 - INFO - codeparrot_training - Step 43472: {'lr': 0.00040922511534645953, 'samples': 22258176, 'steps': 43472, 'loss/train': 2.03440523147583} +03/05/2022 16:34:33 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/05/2022 16:34:36 - INFO - codeparrot_training - Step 43473: {'lr': 0.0004092210240970282, 'samples': 22258688, 'steps': 43473, 'loss/train': 0.5834307074546814} +03/05/2022 16:34:39 - INFO - codeparrot_training - Step 43474: {'lr': 0.0004092169327758544, 'samples': 22259200, 'steps': 43474, 'loss/train': 0.5725497007369995} +03/05/2022 16:34:41 - INFO - codeparrot_training - Skipping example with length 72 (seq_length=1024) +03/05/2022 16:34:45 - INFO - codeparrot_training - Step 43475: {'lr': 0.0004092128413829398, 'samples': 22259712, 'steps': 43475, 'loss/train': 1.9388123750686646} +03/05/2022 16:34:48 - INFO - codeparrot_training - Step 43476: {'lr': 0.0004092087499182864, 'samples': 22260224, 'steps': 43476, 'loss/train': 1.5793097019195557} +03/05/2022 16:34:50 - INFO - codeparrot_training - Skipping example with length 452 (seq_length=1024) +03/05/2022 16:34:53 - INFO - codeparrot_training - Step 43477: {'lr': 0.000409204658381896, 'samples': 22260736, 'steps': 43477, 'loss/train': 0.8688333034515381} +03/05/2022 16:34:57 - INFO - codeparrot_training - Step 43478: {'lr': 0.00040920056677377047, 'samples': 22261248, 'steps': 43478, 'loss/train': 1.7755496501922607} +03/05/2022 16:34:59 - INFO - codeparrot_training - Skipping example with length 920 (seq_length=1024) +03/05/2022 16:35:02 - INFO - codeparrot_training - Step 43479: {'lr': 0.00040919647509391155, 'samples': 22261760, 'steps': 43479, 'loss/train': 1.1312000751495361} +03/05/2022 16:35:05 - INFO - codeparrot_training - Step 43480: {'lr': 0.0004091923833423212, 'samples': 22262272, 'steps': 43480, 'loss/train': 1.179079294204712} +03/05/2022 16:35:08 - INFO - codeparrot_training - Skipping example with length 85 (seq_length=1024) +03/05/2022 16:35:10 - INFO - codeparrot_training - Step 43481: {'lr': 0.00040918829151900127, 'samples': 22262784, 'steps': 43481, 'loss/train': 1.7857574224472046} +03/05/2022 16:35:14 - INFO - codeparrot_training - Step 43482: {'lr': 0.0004091841996239535, 'samples': 22263296, 'steps': 43482, 'loss/train': 1.3958780765533447} +03/05/2022 16:35:16 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/05/2022 16:35:19 - INFO - codeparrot_training - Step 43483: {'lr': 0.00040918010765717976, 'samples': 22263808, 'steps': 43483, 'loss/train': 1.8963899612426758} +03/05/2022 16:35:22 - INFO - codeparrot_training - Step 43484: {'lr': 0.00040917601561868194, 'samples': 22264320, 'steps': 43484, 'loss/train': 1.1714063882827759} +03/05/2022 16:35:25 - INFO - codeparrot_training - Skipping example with length 461 (seq_length=1024) +03/05/2022 16:35:27 - INFO - codeparrot_training - Step 43485: {'lr': 0.00040917192350846187, 'samples': 22264832, 'steps': 43485, 'loss/train': 2.2142341136932373} +03/05/2022 16:35:31 - INFO - codeparrot_training - Step 43486: {'lr': 0.00040916783132652134, 'samples': 22265344, 'steps': 43486, 'loss/train': 1.316070556640625} +03/05/2022 16:35:33 - INFO - codeparrot_training - Skipping example with length 565 (seq_length=1024) +03/05/2022 16:35:36 - INFO - codeparrot_training - Step 43487: {'lr': 0.0004091637390728623, 'samples': 22265856, 'steps': 43487, 'loss/train': 1.7286927700042725} +03/05/2022 16:35:39 - INFO - codeparrot_training - Step 43488: {'lr': 0.00040915964674748665, 'samples': 22266368, 'steps': 43488, 'loss/train': 1.663365125656128} +03/05/2022 16:35:42 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/05/2022 16:35:45 - INFO - codeparrot_training - Step 43489: {'lr': 0.0004091555543503959, 'samples': 22266880, 'steps': 43489, 'loss/train': 1.930106282234192} +03/05/2022 16:35:48 - INFO - codeparrot_training - Step 43490: {'lr': 0.00040915146188159223, 'samples': 22267392, 'steps': 43490, 'loss/train': 0.0828438401222229} +03/05/2022 16:35:51 - INFO - codeparrot_training - Step 43491: {'lr': 0.0004091473693410773, 'samples': 22267904, 'steps': 43491, 'loss/train': 2.0211939811706543} +03/05/2022 16:35:51 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/05/2022 16:35:56 - INFO - codeparrot_training - Step 43492: {'lr': 0.0004091432767288531, 'samples': 22268416, 'steps': 43492, 'loss/train': 1.9732705354690552} +03/05/2022 16:35:59 - INFO - codeparrot_training - Step 43493: {'lr': 0.0004091391840449213, 'samples': 22268928, 'steps': 43493, 'loss/train': 1.4866596460342407} +03/05/2022 16:35:59 - INFO - codeparrot_training - Skipping example with length 800 (seq_length=1024) +03/05/2022 16:36:05 - INFO - codeparrot_training - Step 43494: {'lr': 0.00040913509128928394, 'samples': 22269440, 'steps': 43494, 'loss/train': 1.7426174879074097} +03/05/2022 16:36:08 - INFO - codeparrot_training - Step 43495: {'lr': 0.00040913099846194274, 'samples': 22269952, 'steps': 43495, 'loss/train': 0.23369881510734558} +03/05/2022 16:36:08 - INFO - codeparrot_training - Skipping example with length 183 (seq_length=1024) +03/05/2022 16:36:13 - INFO - codeparrot_training - Step 43496: {'lr': 0.00040912690556289957, 'samples': 22270464, 'steps': 43496, 'loss/train': 1.9182265996932983} +03/05/2022 16:36:16 - INFO - codeparrot_training - Step 43497: {'lr': 0.0004091228125921562, 'samples': 22270976, 'steps': 43497, 'loss/train': 1.8995013236999512} +03/05/2022 16:36:16 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/05/2022 16:36:22 - INFO - codeparrot_training - Step 43498: {'lr': 0.0004091187195497146, 'samples': 22271488, 'steps': 43498, 'loss/train': 1.671336054801941} +03/05/2022 16:36:25 - INFO - codeparrot_training - Step 43499: {'lr': 0.00040911462643557656, 'samples': 22272000, 'steps': 43499, 'loss/train': 1.895821213722229} +03/05/2022 16:36:25 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/05/2022 16:36:30 - INFO - codeparrot_training - Step 43500: {'lr': 0.0004091105332497439, 'samples': 22272512, 'steps': 43500, 'loss/train': 2.290811777114868} +03/05/2022 16:36:33 - INFO - codeparrot_training - Step 43501: {'lr': 0.0004091064399922185, 'samples': 22273024, 'steps': 43501, 'loss/train': 1.6062957048416138} +03/05/2022 16:36:34 - INFO - codeparrot_training - Skipping example with length 21 (seq_length=1024) +03/05/2022 16:36:39 - INFO - codeparrot_training - Step 43502: {'lr': 0.0004091023466630023, 'samples': 22273536, 'steps': 43502, 'loss/train': 1.0313433408737183} +03/05/2022 16:36:42 - INFO - codeparrot_training - Step 43503: {'lr': 0.00040909825326209694, 'samples': 22274048, 'steps': 43503, 'loss/train': 1.9420777559280396} +03/05/2022 16:36:42 - INFO - codeparrot_training - Skipping example with length 504 (seq_length=1024) +03/05/2022 16:36:47 - INFO - codeparrot_training - Step 43504: {'lr': 0.0004090941597895043, 'samples': 22274560, 'steps': 43504, 'loss/train': 1.579280138015747} +03/05/2022 16:36:50 - INFO - codeparrot_training - Step 43505: {'lr': 0.0004090900662452264, 'samples': 22275072, 'steps': 43505, 'loss/train': 1.4133107662200928} +03/05/2022 16:36:50 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/05/2022 16:36:56 - INFO - codeparrot_training - Step 43506: {'lr': 0.00040908597262926484, 'samples': 22275584, 'steps': 43506, 'loss/train': 1.4942498207092285} +03/05/2022 16:36:59 - INFO - codeparrot_training - Step 43507: {'lr': 0.0004090818789416217, 'samples': 22276096, 'steps': 43507, 'loss/train': 1.8641345500946045} +03/05/2022 16:36:59 - INFO - codeparrot_training - Skipping example with length 697 (seq_length=1024) +03/05/2022 16:37:04 - INFO - codeparrot_training - Step 43508: {'lr': 0.0004090777851822988, 'samples': 22276608, 'steps': 43508, 'loss/train': 1.3042681217193604} +03/05/2022 16:37:07 - INFO - codeparrot_training - Step 43509: {'lr': 0.0004090736913512977, 'samples': 22277120, 'steps': 43509, 'loss/train': 1.2210595607757568} +03/05/2022 16:37:07 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/05/2022 16:37:12 - INFO - codeparrot_training - Step 43510: {'lr': 0.0004090695974486206, 'samples': 22277632, 'steps': 43510, 'loss/train': 2.2817001342773438} +03/05/2022 16:37:15 - INFO - codeparrot_training - Step 43511: {'lr': 0.00040906550347426907, 'samples': 22278144, 'steps': 43511, 'loss/train': 1.3206937313079834} +03/05/2022 16:37:16 - INFO - codeparrot_training - Skipping example with length 394 (seq_length=1024) +03/05/2022 16:37:21 - INFO - codeparrot_training - Step 43512: {'lr': 0.0004090614094282452, 'samples': 22278656, 'steps': 43512, 'loss/train': 1.8190135955810547} +03/05/2022 16:37:24 - INFO - codeparrot_training - Step 43513: {'lr': 0.00040905731531055067, 'samples': 22279168, 'steps': 43513, 'loss/train': 1.7540173530578613} +03/05/2022 16:37:24 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/05/2022 16:37:29 - INFO - codeparrot_training - Step 43514: {'lr': 0.0004090532211211874, 'samples': 22279680, 'steps': 43514, 'loss/train': 1.7907708883285522} +03/05/2022 16:37:32 - INFO - codeparrot_training - Step 43515: {'lr': 0.0004090491268601572, 'samples': 22280192, 'steps': 43515, 'loss/train': 1.856278419494629} +03/05/2022 16:37:32 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/05/2022 16:37:38 - INFO - codeparrot_training - Step 43516: {'lr': 0.0004090450325274618, 'samples': 22280704, 'steps': 43516, 'loss/train': 1.101977825164795} +03/05/2022 16:37:41 - INFO - codeparrot_training - Step 43517: {'lr': 0.0004090409381231033, 'samples': 22281216, 'steps': 43517, 'loss/train': 1.4714518785476685} +03/05/2022 16:37:41 - INFO - codeparrot_training - Skipping example with length 320 (seq_length=1024) +03/05/2022 16:37:46 - INFO - codeparrot_training - Step 43518: {'lr': 0.0004090368436470833, 'samples': 22281728, 'steps': 43518, 'loss/train': 1.5295124053955078} +03/05/2022 16:37:49 - INFO - codeparrot_training - Step 43519: {'lr': 0.0004090327490994038, 'samples': 22282240, 'steps': 43519, 'loss/train': 1.5267893075942993} +03/05/2022 16:37:49 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/05/2022 16:37:55 - INFO - codeparrot_training - Step 43520: {'lr': 0.00040902865448006663, 'samples': 22282752, 'steps': 43520, 'loss/train': 2.32295560836792} +03/05/2022 16:37:58 - INFO - codeparrot_training - Step 43521: {'lr': 0.0004090245597890736, 'samples': 22283264, 'steps': 43521, 'loss/train': 1.7842472791671753} +03/05/2022 16:37:58 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/05/2022 16:38:03 - INFO - codeparrot_training - Step 43522: {'lr': 0.00040902046502642656, 'samples': 22283776, 'steps': 43522, 'loss/train': 1.6596343517303467} +03/05/2022 16:38:06 - INFO - codeparrot_training - Step 43523: {'lr': 0.0004090163701921273, 'samples': 22284288, 'steps': 43523, 'loss/train': 2.123654365539551} +03/05/2022 16:38:06 - INFO - codeparrot_training - Skipping example with length 696 (seq_length=1024) +03/05/2022 16:38:12 - INFO - codeparrot_training - Step 43524: {'lr': 0.0004090122752861777, 'samples': 22284800, 'steps': 43524, 'loss/train': 2.0612683296203613} +03/05/2022 16:38:15 - INFO - codeparrot_training - Step 43525: {'lr': 0.0004090081803085797, 'samples': 22285312, 'steps': 43525, 'loss/train': 1.7806622982025146} +03/05/2022 16:38:15 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/05/2022 16:38:20 - INFO - codeparrot_training - Step 43526: {'lr': 0.00040900408525933505, 'samples': 22285824, 'steps': 43526, 'loss/train': 1.4373328685760498} +03/05/2022 16:38:23 - INFO - codeparrot_training - Step 43527: {'lr': 0.0004089999901384456, 'samples': 22286336, 'steps': 43527, 'loss/train': 1.595618724822998} +03/05/2022 16:38:23 - INFO - codeparrot_training - Skipping example with length 662 (seq_length=1024) +03/05/2022 16:38:29 - INFO - codeparrot_training - Step 43528: {'lr': 0.00040899589494591316, 'samples': 22286848, 'steps': 43528, 'loss/train': 1.22590172290802} +03/05/2022 16:38:32 - INFO - codeparrot_training - Step 43529: {'lr': 0.0004089917996817397, 'samples': 22287360, 'steps': 43529, 'loss/train': 1.0005383491516113} +03/05/2022 16:38:32 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/05/2022 16:38:37 - INFO - codeparrot_training - Step 43530: {'lr': 0.00040898770434592694, 'samples': 22287872, 'steps': 43530, 'loss/train': 0.7258080840110779} +03/05/2022 16:38:40 - INFO - codeparrot_training - Step 43531: {'lr': 0.0004089836089384768, 'samples': 22288384, 'steps': 43531, 'loss/train': 2.315338373184204} +03/05/2022 16:38:40 - INFO - codeparrot_training - Skipping example with length 95 (seq_length=1024) +03/05/2022 16:38:45 - INFO - codeparrot_training - Step 43532: {'lr': 0.0004089795134593911, 'samples': 22288896, 'steps': 43532, 'loss/train': 1.788054347038269} +03/05/2022 16:38:48 - INFO - codeparrot_training - Step 43533: {'lr': 0.00040897541790867165, 'samples': 22289408, 'steps': 43533, 'loss/train': 1.650675654411316} +03/05/2022 16:38:48 - INFO - codeparrot_training - Skipping example with length 298 (seq_length=1024) +03/05/2022 16:38:54 - INFO - codeparrot_training - Step 43534: {'lr': 0.00040897132228632035, 'samples': 22289920, 'steps': 43534, 'loss/train': 2.0224924087524414} +03/05/2022 16:38:57 - INFO - codeparrot_training - Step 43535: {'lr': 0.000408967226592339, 'samples': 22290432, 'steps': 43535, 'loss/train': 2.995075225830078} +03/05/2022 16:38:57 - INFO - codeparrot_training - Skipping example with length 997 (seq_length=1024) +03/05/2022 16:39:02 - INFO - codeparrot_training - Step 43536: {'lr': 0.00040896313082672953, 'samples': 22290944, 'steps': 43536, 'loss/train': 0.3775224983692169} +03/05/2022 16:39:05 - INFO - codeparrot_training - Skipping example with length 688 (seq_length=1024) +03/05/2022 16:39:08 - INFO - codeparrot_training - Step 43537: {'lr': 0.0004089590349894937, 'samples': 22291456, 'steps': 43537, 'loss/train': 1.7950793504714966} +03/05/2022 16:39:11 - INFO - codeparrot_training - Step 43538: {'lr': 0.0004089549390806334, 'samples': 22291968, 'steps': 43538, 'loss/train': 1.1951911449432373} +03/05/2022 16:39:14 - INFO - codeparrot_training - Step 43539: {'lr': 0.0004089508431001504, 'samples': 22292480, 'steps': 43539, 'loss/train': 1.8888870477676392} +03/05/2022 16:39:14 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/05/2022 16:39:19 - INFO - codeparrot_training - Step 43540: {'lr': 0.00040894674704804667, 'samples': 22292992, 'steps': 43540, 'loss/train': 1.8097847700119019} +03/05/2022 16:39:23 - INFO - codeparrot_training - Step 43541: {'lr': 0.00040894265092432397, 'samples': 22293504, 'steps': 43541, 'loss/train': 2.2973973751068115} +03/05/2022 16:39:23 - INFO - codeparrot_training - Skipping example with length 699 (seq_length=1024) +03/05/2022 16:39:28 - INFO - codeparrot_training - Step 43542: {'lr': 0.0004089385547289841, 'samples': 22294016, 'steps': 43542, 'loss/train': 2.192369222640991} +03/05/2022 16:39:31 - INFO - codeparrot_training - Step 43543: {'lr': 0.00040893445846202904, 'samples': 22294528, 'steps': 43543, 'loss/train': 1.159581184387207} +03/05/2022 16:39:33 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/05/2022 16:39:37 - INFO - codeparrot_training - Step 43544: {'lr': 0.00040893036212346056, 'samples': 22295040, 'steps': 43544, 'loss/train': 1.4829267263412476} +03/05/2022 16:39:40 - INFO - codeparrot_training - Step 43545: {'lr': 0.00040892626571328053, 'samples': 22295552, 'steps': 43545, 'loss/train': 1.7142601013183594} +03/05/2022 16:39:41 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/05/2022 16:39:45 - INFO - codeparrot_training - Step 43546: {'lr': 0.00040892216923149073, 'samples': 22296064, 'steps': 43546, 'loss/train': 2.046525716781616} +03/05/2022 16:39:48 - INFO - codeparrot_training - Step 43547: {'lr': 0.000408918072678093, 'samples': 22296576, 'steps': 43547, 'loss/train': 0.4935801327228546} +03/05/2022 16:39:49 - INFO - codeparrot_training - Skipping example with length 422 (seq_length=1024) +03/05/2022 16:39:53 - INFO - codeparrot_training - Step 43548: {'lr': 0.0004089139760530893, 'samples': 22297088, 'steps': 43548, 'loss/train': 3.584404706954956} +03/05/2022 16:39:57 - INFO - codeparrot_training - Step 43549: {'lr': 0.0004089098793564815, 'samples': 22297600, 'steps': 43549, 'loss/train': 2.512632369995117} +03/05/2022 16:39:58 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/05/2022 16:40:02 - INFO - codeparrot_training - Step 43550: {'lr': 0.00040890578258827125, 'samples': 22298112, 'steps': 43550, 'loss/train': 0.07745035737752914} +03/05/2022 16:40:05 - INFO - codeparrot_training - Step 43551: {'lr': 0.00040890168574846055, 'samples': 22298624, 'steps': 43551, 'loss/train': 1.2003059387207031} +03/05/2022 16:40:06 - INFO - codeparrot_training - Skipping example with length 841 (seq_length=1024) +03/05/2022 16:40:10 - INFO - codeparrot_training - Step 43552: {'lr': 0.0004088975888370512, 'samples': 22299136, 'steps': 43552, 'loss/train': 1.5562398433685303} +03/05/2022 16:40:13 - INFO - codeparrot_training - Step 43553: {'lr': 0.00040889349185404503, 'samples': 22299648, 'steps': 43553, 'loss/train': 1.4919451475143433} +03/05/2022 16:40:15 - INFO - codeparrot_training - Skipping example with length 248 (seq_length=1024) +03/05/2022 16:40:19 - INFO - codeparrot_training - Step 43554: {'lr': 0.00040888939479944385, 'samples': 22300160, 'steps': 43554, 'loss/train': 1.7951635122299194} +03/05/2022 16:40:22 - INFO - codeparrot_training - Step 43555: {'lr': 0.00040888529767324966, 'samples': 22300672, 'steps': 43555, 'loss/train': 2.021787643432617} +03/05/2022 16:40:23 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/05/2022 16:40:27 - INFO - codeparrot_training - Step 43556: {'lr': 0.0004088812004754642, 'samples': 22301184, 'steps': 43556, 'loss/train': 2.0214481353759766} +03/05/2022 16:40:30 - INFO - codeparrot_training - Step 43557: {'lr': 0.00040887710320608927, 'samples': 22301696, 'steps': 43557, 'loss/train': 1.9364957809448242} +03/05/2022 16:40:31 - INFO - codeparrot_training - Skipping example with length 184 (seq_length=1024) +03/05/2022 16:40:36 - INFO - codeparrot_training - Step 43558: {'lr': 0.00040887300586512677, 'samples': 22302208, 'steps': 43558, 'loss/train': 1.3532919883728027} +03/05/2022 16:40:39 - INFO - codeparrot_training - Step 43559: {'lr': 0.0004088689084525786, 'samples': 22302720, 'steps': 43559, 'loss/train': 2.2497425079345703} +03/05/2022 16:40:40 - INFO - codeparrot_training - Skipping example with length 678 (seq_length=1024) +03/05/2022 16:40:44 - INFO - codeparrot_training - Step 43560: {'lr': 0.0004088648109684465, 'samples': 22303232, 'steps': 43560, 'loss/train': 1.405975580215454} +03/05/2022 16:40:47 - INFO - codeparrot_training - Step 43561: {'lr': 0.00040886071341273236, 'samples': 22303744, 'steps': 43561, 'loss/train': 2.108630657196045} +03/05/2022 16:40:48 - INFO - codeparrot_training - Skipping example with length 538 (seq_length=1024) +03/05/2022 16:40:53 - INFO - codeparrot_training - Step 43562: {'lr': 0.0004088566157854381, 'samples': 22304256, 'steps': 43562, 'loss/train': 1.5644049644470215} +03/05/2022 16:40:56 - INFO - codeparrot_training - Step 43563: {'lr': 0.0004088525180865654, 'samples': 22304768, 'steps': 43563, 'loss/train': 2.1927173137664795} +03/05/2022 16:40:57 - INFO - codeparrot_training - Skipping example with length 395 (seq_length=1024) +03/05/2022 16:41:01 - INFO - codeparrot_training - Step 43564: {'lr': 0.0004088484203161163, 'samples': 22305280, 'steps': 43564, 'loss/train': 1.9716377258300781} +03/05/2022 16:41:04 - INFO - codeparrot_training - Step 43565: {'lr': 0.0004088443224740925, 'samples': 22305792, 'steps': 43565, 'loss/train': 1.3490979671478271} +03/05/2022 16:41:05 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/05/2022 16:41:10 - INFO - codeparrot_training - Step 43566: {'lr': 0.00040884022456049595, 'samples': 22306304, 'steps': 43566, 'loss/train': 1.9948389530181885} +03/05/2022 16:41:13 - INFO - codeparrot_training - Step 43567: {'lr': 0.00040883612657532844, 'samples': 22306816, 'steps': 43567, 'loss/train': 3.5000038146972656} +03/05/2022 16:41:15 - INFO - codeparrot_training - Skipping example with length 498 (seq_length=1024) +03/05/2022 16:41:18 - INFO - codeparrot_training - Step 43568: {'lr': 0.0004088320285185918, 'samples': 22307328, 'steps': 43568, 'loss/train': 2.366649627685547} +03/05/2022 16:41:21 - INFO - codeparrot_training - Step 43569: {'lr': 0.0004088279303902879, 'samples': 22307840, 'steps': 43569, 'loss/train': 1.967574954032898} +03/05/2022 16:41:23 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/05/2022 16:41:27 - INFO - codeparrot_training - Step 43570: {'lr': 0.0004088238321904185, 'samples': 22308352, 'steps': 43570, 'loss/train': 2.1393465995788574} +03/05/2022 16:41:30 - INFO - codeparrot_training - Step 43571: {'lr': 0.00040881973391898563, 'samples': 22308864, 'steps': 43571, 'loss/train': 1.1536953449249268} +03/05/2022 16:41:31 - INFO - codeparrot_training - Skipping example with length 257 (seq_length=1024) +03/05/2022 16:41:35 - INFO - codeparrot_training - Step 43572: {'lr': 0.00040881563557599107, 'samples': 22309376, 'steps': 43572, 'loss/train': 1.2718925476074219} +03/05/2022 16:41:38 - INFO - codeparrot_training - Step 43573: {'lr': 0.00040881153716143656, 'samples': 22309888, 'steps': 43573, 'loss/train': 7.445451736450195} +03/05/2022 16:41:40 - INFO - codeparrot_training - Skipping example with length 15 (seq_length=1024) +03/05/2022 16:41:44 - INFO - codeparrot_training - Step 43574: {'lr': 0.000408807438675324, 'samples': 22310400, 'steps': 43574, 'loss/train': 1.5592296123504639} +03/05/2022 16:41:47 - INFO - codeparrot_training - Step 43575: {'lr': 0.0004088033401176554, 'samples': 22310912, 'steps': 43575, 'loss/train': 2.3682451248168945} +03/05/2022 16:41:49 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/05/2022 16:41:52 - INFO - codeparrot_training - Step 43576: {'lr': 0.00040879924148843233, 'samples': 22311424, 'steps': 43576, 'loss/train': 1.682289719581604} +03/05/2022 16:41:55 - INFO - codeparrot_training - Step 43577: {'lr': 0.00040879514278765685, 'samples': 22311936, 'steps': 43577, 'loss/train': 1.4882627725601196} +03/05/2022 16:41:57 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/05/2022 16:42:00 - INFO - codeparrot_training - Step 43578: {'lr': 0.00040879104401533064, 'samples': 22312448, 'steps': 43578, 'loss/train': 2.209794521331787} +03/05/2022 16:42:04 - INFO - codeparrot_training - Step 43579: {'lr': 0.0004087869451714557, 'samples': 22312960, 'steps': 43579, 'loss/train': 1.7769020795822144} +03/05/2022 16:42:05 - INFO - codeparrot_training - Skipping example with length 182 (seq_length=1024) +03/05/2022 16:42:09 - INFO - codeparrot_training - Step 43580: {'lr': 0.0004087828462560338, 'samples': 22313472, 'steps': 43580, 'loss/train': 1.4512205123901367} +03/05/2022 16:42:12 - INFO - codeparrot_training - Step 43581: {'lr': 0.0004087787472690668, 'samples': 22313984, 'steps': 43581, 'loss/train': 2.010418653488159} +03/05/2022 16:42:14 - INFO - codeparrot_training - Skipping example with length 618 (seq_length=1024) +03/05/2022 16:42:17 - INFO - codeparrot_training - Step 43582: {'lr': 0.00040877464821055656, 'samples': 22314496, 'steps': 43582, 'loss/train': 1.9044525623321533} +03/05/2022 16:42:21 - INFO - codeparrot_training - Step 43583: {'lr': 0.00040877054908050495, 'samples': 22315008, 'steps': 43583, 'loss/train': 1.9305002689361572} +03/05/2022 16:42:22 - INFO - codeparrot_training - Skipping example with length 819 (seq_length=1024) +03/05/2022 16:42:26 - INFO - codeparrot_training - Step 43584: {'lr': 0.0004087664498789137, 'samples': 22315520, 'steps': 43584, 'loss/train': 1.8394944667816162} +03/05/2022 16:42:29 - INFO - codeparrot_training - Step 43585: {'lr': 0.00040876235060578476, 'samples': 22316032, 'steps': 43585, 'loss/train': 0.9854817986488342} +03/05/2022 16:42:31 - INFO - codeparrot_training - Skipping example with length 277 (seq_length=1024) +03/05/2022 16:42:34 - INFO - codeparrot_training - Step 43586: {'lr': 0.00040875825126112, 'samples': 22316544, 'steps': 43586, 'loss/train': 1.82539701461792} +03/05/2022 16:42:37 - INFO - codeparrot_training - Step 43587: {'lr': 0.00040875415184492113, 'samples': 22317056, 'steps': 43587, 'loss/train': 2.220182180404663} +03/05/2022 16:42:39 - INFO - codeparrot_training - Skipping example with length 323 (seq_length=1024) +03/05/2022 16:42:43 - INFO - codeparrot_training - Step 43588: {'lr': 0.0004087500523571902, 'samples': 22317568, 'steps': 43588, 'loss/train': 1.7767337560653687} +03/05/2022 16:42:46 - INFO - codeparrot_training - Step 43589: {'lr': 0.00040874595279792884, 'samples': 22318080, 'steps': 43589, 'loss/train': 2.4290528297424316} +03/05/2022 16:42:48 - INFO - codeparrot_training - Skipping example with length 766 (seq_length=1024) +03/05/2022 16:42:51 - INFO - codeparrot_training - Step 43590: {'lr': 0.00040874185316713905, 'samples': 22318592, 'steps': 43590, 'loss/train': 1.5387769937515259} +03/05/2022 16:42:54 - INFO - codeparrot_training - Step 43591: {'lr': 0.00040873775346482265, 'samples': 22319104, 'steps': 43591, 'loss/train': 1.6900389194488525} +03/05/2022 16:42:56 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/05/2022 16:43:00 - INFO - codeparrot_training - Step 43592: {'lr': 0.0004087336536909815, 'samples': 22319616, 'steps': 43592, 'loss/train': 1.2524985074996948} +03/05/2022 16:43:03 - INFO - codeparrot_training - Step 43593: {'lr': 0.00040872955384561735, 'samples': 22320128, 'steps': 43593, 'loss/train': 1.3358381986618042} +03/05/2022 16:43:05 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/05/2022 16:43:08 - INFO - codeparrot_training - Step 43594: {'lr': 0.00040872545392873214, 'samples': 22320640, 'steps': 43594, 'loss/train': 2.3314616680145264} +03/05/2022 16:43:11 - INFO - codeparrot_training - Step 43595: {'lr': 0.00040872135394032764, 'samples': 22321152, 'steps': 43595, 'loss/train': 1.562785267829895} +03/05/2022 16:43:14 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/05/2022 16:43:17 - INFO - codeparrot_training - Step 43596: {'lr': 0.0004087172538804058, 'samples': 22321664, 'steps': 43596, 'loss/train': 1.869885802268982} +03/05/2022 16:43:20 - INFO - codeparrot_training - Step 43597: {'lr': 0.0004087131537489685, 'samples': 22322176, 'steps': 43597, 'loss/train': 1.979675531387329} +03/05/2022 16:43:22 - INFO - codeparrot_training - Skipping example with length 245 (seq_length=1024) +03/05/2022 16:43:25 - INFO - codeparrot_training - Step 43598: {'lr': 0.00040870905354601733, 'samples': 22322688, 'steps': 43598, 'loss/train': 1.2901482582092285} +03/05/2022 16:43:28 - INFO - codeparrot_training - Step 43599: {'lr': 0.0004087049532715544, 'samples': 22323200, 'steps': 43599, 'loss/train': 2.2488045692443848} +03/05/2022 16:43:31 - INFO - codeparrot_training - Skipping example with length 379 (seq_length=1024) +03/05/2022 16:43:34 - INFO - codeparrot_training - Step 43600: {'lr': 0.00040870085292558147, 'samples': 22323712, 'steps': 43600, 'loss/train': 1.8258154392242432} +03/05/2022 16:43:37 - INFO - codeparrot_training - Step 43601: {'lr': 0.0004086967525081003, 'samples': 22324224, 'steps': 43601, 'loss/train': 1.9085488319396973} +03/05/2022 16:43:39 - INFO - codeparrot_training - Skipping example with length 192 (seq_length=1024) +03/05/2022 16:43:42 - INFO - codeparrot_training - Step 43602: {'lr': 0.00040869265201911285, 'samples': 22324736, 'steps': 43602, 'loss/train': 1.9763127565383911} +03/05/2022 16:43:45 - INFO - codeparrot_training - Step 43603: {'lr': 0.00040868855145862105, 'samples': 22325248, 'steps': 43603, 'loss/train': 0.7025713920593262} +03/05/2022 16:43:47 - INFO - codeparrot_training - Skipping example with length 693 (seq_length=1024) +03/05/2022 16:43:50 - INFO - codeparrot_training - Step 43604: {'lr': 0.00040868445082662655, 'samples': 22325760, 'steps': 43604, 'loss/train': 1.475812554359436} +03/05/2022 16:43:54 - INFO - codeparrot_training - Step 43605: {'lr': 0.0004086803501231313, 'samples': 22326272, 'steps': 43605, 'loss/train': 1.27096426486969} +03/05/2022 16:43:56 - INFO - codeparrot_training - Skipping example with length 317 (seq_length=1024) +03/05/2022 16:43:59 - INFO - codeparrot_training - Step 43606: {'lr': 0.00040867624934813715, 'samples': 22326784, 'steps': 43606, 'loss/train': 1.0256454944610596} +03/05/2022 16:44:02 - INFO - codeparrot_training - Step 43607: {'lr': 0.00040867214850164594, 'samples': 22327296, 'steps': 43607, 'loss/train': 1.0828173160552979} +03/05/2022 16:44:05 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/05/2022 16:44:07 - INFO - codeparrot_training - Step 43608: {'lr': 0.0004086680475836594, 'samples': 22327808, 'steps': 43608, 'loss/train': 1.5296202898025513} +03/05/2022 16:44:11 - INFO - codeparrot_training - Step 43609: {'lr': 0.0004086639465941796, 'samples': 22328320, 'steps': 43609, 'loss/train': 2.0186009407043457} +03/05/2022 16:44:13 - INFO - codeparrot_training - Skipping example with length 748 (seq_length=1024) +03/05/2022 16:44:16 - INFO - codeparrot_training - Step 43610: {'lr': 0.00040865984553320825, 'samples': 22328832, 'steps': 43610, 'loss/train': 2.0901713371276855} +03/05/2022 16:44:19 - INFO - codeparrot_training - Step 43611: {'lr': 0.0004086557444007472, 'samples': 22329344, 'steps': 43611, 'loss/train': 0.8491621017456055} +03/05/2022 16:44:21 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/05/2022 16:44:24 - INFO - codeparrot_training - Step 43612: {'lr': 0.0004086516431967984, 'samples': 22329856, 'steps': 43612, 'loss/train': 1.3803293704986572} +03/05/2022 16:44:27 - INFO - codeparrot_training - Step 43613: {'lr': 0.0004086475419213635, 'samples': 22330368, 'steps': 43613, 'loss/train': 2.8201427459716797} +03/05/2022 16:44:29 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/05/2022 16:44:33 - INFO - codeparrot_training - Step 43614: {'lr': 0.0004086434405744445, 'samples': 22330880, 'steps': 43614, 'loss/train': 1.27468740940094} +03/05/2022 16:44:36 - INFO - codeparrot_training - Step 43615: {'lr': 0.00040863933915604323, 'samples': 22331392, 'steps': 43615, 'loss/train': 1.8225866556167603} +03/05/2022 16:44:39 - INFO - codeparrot_training - Step 43616: {'lr': 0.00040863523766616157, 'samples': 22331904, 'steps': 43616, 'loss/train': 2.0748023986816406} +03/05/2022 16:44:40 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/05/2022 16:44:45 - INFO - codeparrot_training - Step 43617: {'lr': 0.0004086311361048012, 'samples': 22332416, 'steps': 43617, 'loss/train': 2.3647472858428955} +03/05/2022 16:44:48 - INFO - codeparrot_training - Step 43618: {'lr': 0.0004086270344719642, 'samples': 22332928, 'steps': 43618, 'loss/train': 1.3587702512741089} +03/05/2022 16:44:48 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/05/2022 16:44:53 - INFO - codeparrot_training - Step 43619: {'lr': 0.00040862293276765227, 'samples': 22333440, 'steps': 43619, 'loss/train': 1.7732042074203491} +03/05/2022 16:44:56 - INFO - codeparrot_training - Step 43620: {'lr': 0.00040861883099186725, 'samples': 22333952, 'steps': 43620, 'loss/train': 1.735103964805603} +03/05/2022 16:44:57 - INFO - codeparrot_training - Skipping example with length 142 (seq_length=1024) +03/05/2022 16:45:02 - INFO - codeparrot_training - Step 43621: {'lr': 0.0004086147291446111, 'samples': 22334464, 'steps': 43621, 'loss/train': 2.0094165802001953} +03/05/2022 16:45:05 - INFO - codeparrot_training - Step 43622: {'lr': 0.0004086106272258856, 'samples': 22334976, 'steps': 43622, 'loss/train': 1.8635659217834473} +03/05/2022 16:45:05 - INFO - codeparrot_training - Skipping example with length 738 (seq_length=1024) +03/05/2022 16:45:10 - INFO - codeparrot_training - Step 43623: {'lr': 0.0004086065252356925, 'samples': 22335488, 'steps': 43623, 'loss/train': 1.9533724784851074} +03/05/2022 16:45:13 - INFO - codeparrot_training - Step 43624: {'lr': 0.00040860242317403383, 'samples': 22336000, 'steps': 43624, 'loss/train': 1.4665549993515015} +03/05/2022 16:45:13 - INFO - codeparrot_training - Skipping example with length 351 (seq_length=1024) +03/05/2022 16:45:18 - INFO - codeparrot_training - Step 43625: {'lr': 0.0004085983210409114, 'samples': 22336512, 'steps': 43625, 'loss/train': 2.1034657955169678} +03/05/2022 16:45:22 - INFO - codeparrot_training - Step 43626: {'lr': 0.00040859421883632696, 'samples': 22337024, 'steps': 43626, 'loss/train': 2.37176775932312} +03/05/2022 16:45:22 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/05/2022 16:45:27 - INFO - codeparrot_training - Step 43627: {'lr': 0.0004085901165602824, 'samples': 22337536, 'steps': 43627, 'loss/train': 0.41075319051742554} +03/05/2022 16:45:30 - INFO - codeparrot_training - Step 43628: {'lr': 0.00040858601421277956, 'samples': 22338048, 'steps': 43628, 'loss/train': 1.1877251863479614} +03/05/2022 16:45:31 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/05/2022 16:45:35 - INFO - codeparrot_training - Step 43629: {'lr': 0.00040858191179382044, 'samples': 22338560, 'steps': 43629, 'loss/train': 2.111377239227295} +03/05/2022 16:45:39 - INFO - codeparrot_training - Step 43630: {'lr': 0.0004085778093034066, 'samples': 22339072, 'steps': 43630, 'loss/train': 2.2425715923309326} +03/05/2022 16:45:39 - INFO - codeparrot_training - Skipping example with length 685 (seq_length=1024) +03/05/2022 16:45:44 - INFO - codeparrot_training - Step 43631: {'lr': 0.0004085737067415401, 'samples': 22339584, 'steps': 43631, 'loss/train': 2.2668380737304688} +03/05/2022 16:45:47 - INFO - codeparrot_training - Step 43632: {'lr': 0.00040856960410822277, 'samples': 22340096, 'steps': 43632, 'loss/train': 1.8864325284957886} +03/05/2022 16:45:49 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/05/2022 16:45:53 - INFO - codeparrot_training - Step 43633: {'lr': 0.0004085655014034564, 'samples': 22340608, 'steps': 43633, 'loss/train': 1.4593383073806763} +03/05/2022 16:45:56 - INFO - codeparrot_training - Step 43634: {'lr': 0.0004085613986272428, 'samples': 22341120, 'steps': 43634, 'loss/train': 1.6532355546951294} +03/05/2022 16:45:57 - INFO - codeparrot_training - Skipping example with length 355 (seq_length=1024) +03/05/2022 16:46:01 - INFO - codeparrot_training - Step 43635: {'lr': 0.0004085572957795839, 'samples': 22341632, 'steps': 43635, 'loss/train': 0.9712749123573303} +03/05/2022 16:46:05 - INFO - codeparrot_training - Step 43636: {'lr': 0.00040855319286048163, 'samples': 22342144, 'steps': 43636, 'loss/train': 1.8798185586929321} +03/05/2022 16:46:06 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/05/2022 16:46:10 - INFO - codeparrot_training - Step 43637: {'lr': 0.0004085490898699377, 'samples': 22342656, 'steps': 43637, 'loss/train': 1.976017713546753} +03/05/2022 16:46:13 - INFO - codeparrot_training - Step 43638: {'lr': 0.0004085449868079539, 'samples': 22343168, 'steps': 43638, 'loss/train': 0.8378556966781616} +03/05/2022 16:46:14 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/05/2022 16:46:19 - INFO - codeparrot_training - Step 43639: {'lr': 0.00040854088367453225, 'samples': 22343680, 'steps': 43639, 'loss/train': 1.745743751525879} +03/05/2022 16:46:22 - INFO - codeparrot_training - Step 43640: {'lr': 0.00040853678046967454, 'samples': 22344192, 'steps': 43640, 'loss/train': 1.9121558666229248} +03/05/2022 16:46:22 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/05/2022 16:46:27 - INFO - codeparrot_training - Step 43641: {'lr': 0.00040853267719338256, 'samples': 22344704, 'steps': 43641, 'loss/train': 1.4722404479980469} +03/05/2022 16:46:30 - INFO - codeparrot_training - Step 43642: {'lr': 0.00040852857384565824, 'samples': 22345216, 'steps': 43642, 'loss/train': 1.7225645780563354} +03/05/2022 16:46:31 - INFO - codeparrot_training - Skipping example with length 836 (seq_length=1024) +03/05/2022 16:46:36 - INFO - codeparrot_training - Step 43643: {'lr': 0.00040852447042650337, 'samples': 22345728, 'steps': 43643, 'loss/train': 0.6063699126243591} +03/05/2022 16:46:39 - INFO - codeparrot_training - Step 43644: {'lr': 0.0004085203669359198, 'samples': 22346240, 'steps': 43644, 'loss/train': 1.1045125722885132} +03/05/2022 16:46:40 - INFO - codeparrot_training - Skipping example with length 356 (seq_length=1024) +03/05/2022 16:46:44 - INFO - codeparrot_training - Step 43645: {'lr': 0.0004085162633739095, 'samples': 22346752, 'steps': 43645, 'loss/train': 1.9208160638809204} +03/05/2022 16:46:47 - INFO - codeparrot_training - Step 43646: {'lr': 0.0004085121597404741, 'samples': 22347264, 'steps': 43646, 'loss/train': 2.7570877075195312} +03/05/2022 16:46:48 - INFO - codeparrot_training - Skipping example with length 721 (seq_length=1024) +03/05/2022 16:46:53 - INFO - codeparrot_training - Step 43647: {'lr': 0.0004085080560356156, 'samples': 22347776, 'steps': 43647, 'loss/train': 1.6683223247528076} +03/05/2022 16:46:56 - INFO - codeparrot_training - Step 43648: {'lr': 0.0004085039522593358, 'samples': 22348288, 'steps': 43648, 'loss/train': 1.5714131593704224} +03/05/2022 16:46:57 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/05/2022 16:47:01 - INFO - codeparrot_training - Step 43649: {'lr': 0.0004084998484116366, 'samples': 22348800, 'steps': 43649, 'loss/train': 1.1651099920272827} +03/05/2022 16:47:04 - INFO - codeparrot_training - Step 43650: {'lr': 0.0004084957444925198, 'samples': 22349312, 'steps': 43650, 'loss/train': 1.1158400774002075} +03/05/2022 16:47:06 - INFO - codeparrot_training - Skipping example with length 516 (seq_length=1024) +03/05/2022 16:47:10 - INFO - codeparrot_training - Step 43651: {'lr': 0.0004084916405019873, 'samples': 22349824, 'steps': 43651, 'loss/train': 1.0685019493103027} +03/05/2022 16:47:13 - INFO - codeparrot_training - Step 43652: {'lr': 0.0004084875364400409, 'samples': 22350336, 'steps': 43652, 'loss/train': 1.8802387714385986} +03/05/2022 16:47:14 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/05/2022 16:47:18 - INFO - codeparrot_training - Step 43653: {'lr': 0.0004084834323066824, 'samples': 22350848, 'steps': 43653, 'loss/train': 1.3390833139419556} +03/05/2022 16:47:21 - INFO - codeparrot_training - Step 43654: {'lr': 0.00040847932810191375, 'samples': 22351360, 'steps': 43654, 'loss/train': 1.6255507469177246} +03/05/2022 16:47:23 - INFO - codeparrot_training - Skipping example with length 905 (seq_length=1024) +03/05/2022 16:47:26 - INFO - codeparrot_training - Step 43655: {'lr': 0.00040847522382573675, 'samples': 22351872, 'steps': 43655, 'loss/train': 0.9801068305969238} +03/05/2022 16:47:30 - INFO - codeparrot_training - Step 43656: {'lr': 0.0004084711194781533, 'samples': 22352384, 'steps': 43656, 'loss/train': 2.205024003982544} +03/05/2022 16:47:31 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/05/2022 16:47:35 - INFO - codeparrot_training - Step 43657: {'lr': 0.00040846701505916516, 'samples': 22352896, 'steps': 43657, 'loss/train': 1.8518301248550415} +03/05/2022 16:47:38 - INFO - codeparrot_training - Step 43658: {'lr': 0.00040846291056877425, 'samples': 22353408, 'steps': 43658, 'loss/train': 1.3448373079299927} +03/05/2022 16:47:39 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/05/2022 16:47:43 - INFO - codeparrot_training - Step 43659: {'lr': 0.0004084588060069824, 'samples': 22353920, 'steps': 43659, 'loss/train': 1.3093796968460083} +03/05/2022 16:47:46 - INFO - codeparrot_training - Step 43660: {'lr': 0.0004084547013737915, 'samples': 22354432, 'steps': 43660, 'loss/train': 1.4651625156402588} +03/05/2022 16:47:48 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/05/2022 16:47:52 - INFO - codeparrot_training - Step 43661: {'lr': 0.00040845059666920323, 'samples': 22354944, 'steps': 43661, 'loss/train': 1.9470903873443604} +03/05/2022 16:47:55 - INFO - codeparrot_training - Step 43662: {'lr': 0.0004084464918932197, 'samples': 22355456, 'steps': 43662, 'loss/train': 1.0761038064956665} +03/05/2022 16:47:56 - INFO - codeparrot_training - Skipping example with length 321 (seq_length=1024) +03/05/2022 16:48:01 - INFO - codeparrot_training - Step 43663: {'lr': 0.0004084423870458426, 'samples': 22355968, 'steps': 43663, 'loss/train': 6.507169723510742} +03/05/2022 16:48:04 - INFO - codeparrot_training - Step 43664: {'lr': 0.00040843828212707366, 'samples': 22356480, 'steps': 43664, 'loss/train': 2.264404296875} +03/05/2022 16:48:05 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/05/2022 16:48:09 - INFO - codeparrot_training - Step 43665: {'lr': 0.00040843417713691505, 'samples': 22356992, 'steps': 43665, 'loss/train': 1.8914523124694824} +03/05/2022 16:48:12 - INFO - codeparrot_training - Step 43666: {'lr': 0.0004084300720753684, 'samples': 22357504, 'steps': 43666, 'loss/train': 1.4085049629211426} +03/05/2022 16:48:14 - INFO - codeparrot_training - Skipping example with length 565 (seq_length=1024) +03/05/2022 16:48:17 - INFO - codeparrot_training - Step 43667: {'lr': 0.0004084259669424356, 'samples': 22358016, 'steps': 43667, 'loss/train': 1.1709370613098145} +03/05/2022 16:48:21 - INFO - codeparrot_training - Step 43668: {'lr': 0.0004084218617381185, 'samples': 22358528, 'steps': 43668, 'loss/train': 1.9813975095748901} +03/05/2022 16:48:23 - INFO - codeparrot_training - Skipping example with length 282 (seq_length=1024) +03/05/2022 16:48:26 - INFO - codeparrot_training - Step 43669: {'lr': 0.00040841775646241897, 'samples': 22359040, 'steps': 43669, 'loss/train': 1.8351049423217773} +03/05/2022 16:48:29 - INFO - codeparrot_training - Step 43670: {'lr': 0.0004084136511153388, 'samples': 22359552, 'steps': 43670, 'loss/train': 1.079126238822937} +03/05/2022 16:48:31 - INFO - codeparrot_training - Skipping example with length 27 (seq_length=1024) +03/05/2022 16:48:34 - INFO - codeparrot_training - Step 43671: {'lr': 0.00040840954569687987, 'samples': 22360064, 'steps': 43671, 'loss/train': 1.3537046909332275} +03/05/2022 16:48:38 - INFO - codeparrot_training - Step 43672: {'lr': 0.0004084054402070441, 'samples': 22360576, 'steps': 43672, 'loss/train': 1.565490484237671} +03/05/2022 16:48:39 - INFO - codeparrot_training - Skipping example with length 951 (seq_length=1024) +03/05/2022 16:48:43 - INFO - codeparrot_training - Step 43673: {'lr': 0.0004084013346458333, 'samples': 22361088, 'steps': 43673, 'loss/train': 1.4023786783218384} +03/05/2022 16:48:46 - INFO - codeparrot_training - Step 43674: {'lr': 0.00040839722901324924, 'samples': 22361600, 'steps': 43674, 'loss/train': 1.7986925840377808} +03/05/2022 16:48:48 - INFO - codeparrot_training - Skipping example with length 374 (seq_length=1024) +03/05/2022 16:48:51 - INFO - codeparrot_training - Step 43675: {'lr': 0.00040839312330929377, 'samples': 22362112, 'steps': 43675, 'loss/train': 1.3742527961730957} +03/05/2022 16:48:54 - INFO - codeparrot_training - Step 43676: {'lr': 0.00040838901753396896, 'samples': 22362624, 'steps': 43676, 'loss/train': 1.335737705230713} +03/05/2022 16:48:56 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/05/2022 16:49:00 - INFO - codeparrot_training - Step 43677: {'lr': 0.0004083849116872764, 'samples': 22363136, 'steps': 43677, 'loss/train': 1.172147512435913} +03/05/2022 16:49:03 - INFO - codeparrot_training - Step 43678: {'lr': 0.0004083808057692181, 'samples': 22363648, 'steps': 43678, 'loss/train': 1.688503623008728} +03/05/2022 16:49:05 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/05/2022 16:49:08 - INFO - codeparrot_training - Step 43679: {'lr': 0.00040837669977979586, 'samples': 22364160, 'steps': 43679, 'loss/train': 2.137653112411499} +03/05/2022 16:49:11 - INFO - codeparrot_training - Step 43680: {'lr': 0.00040837259371901145, 'samples': 22364672, 'steps': 43680, 'loss/train': 1.1138176918029785} +03/05/2022 16:49:14 - INFO - codeparrot_training - Skipping example with length 944 (seq_length=1024) +03/05/2022 16:49:17 - INFO - codeparrot_training - Step 43681: {'lr': 0.00040836848758686687, 'samples': 22365184, 'steps': 43681, 'loss/train': 1.9679498672485352} +03/05/2022 16:49:20 - INFO - codeparrot_training - Step 43682: {'lr': 0.00040836438138336384, 'samples': 22365696, 'steps': 43682, 'loss/train': 1.266290545463562} +03/05/2022 16:49:22 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/05/2022 16:49:25 - INFO - codeparrot_training - Step 43683: {'lr': 0.00040836027510850426, 'samples': 22366208, 'steps': 43683, 'loss/train': 1.095920205116272} +03/05/2022 16:49:28 - INFO - codeparrot_training - Step 43684: {'lr': 0.00040835616876229, 'samples': 22366720, 'steps': 43684, 'loss/train': 0.9919460415840149} +03/05/2022 16:49:31 - INFO - codeparrot_training - Skipping example with length 640 (seq_length=1024) +03/05/2022 16:49:34 - INFO - codeparrot_training - Step 43685: {'lr': 0.00040835206234472287, 'samples': 22367232, 'steps': 43685, 'loss/train': 1.6624979972839355} +03/05/2022 16:49:37 - INFO - codeparrot_training - Step 43686: {'lr': 0.0004083479558558048, 'samples': 22367744, 'steps': 43686, 'loss/train': 1.819217562675476} +03/05/2022 16:49:39 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/05/2022 16:49:42 - INFO - codeparrot_training - Step 43687: {'lr': 0.0004083438492955376, 'samples': 22368256, 'steps': 43687, 'loss/train': 1.4410797357559204} +03/05/2022 16:49:45 - INFO - codeparrot_training - Step 43688: {'lr': 0.00040833974266392306, 'samples': 22368768, 'steps': 43688, 'loss/train': 1.7796001434326172} +03/05/2022 16:49:48 - INFO - codeparrot_training - Skipping example with length 844 (seq_length=1024) +03/05/2022 16:49:51 - INFO - codeparrot_training - Step 43689: {'lr': 0.00040833563596096305, 'samples': 22369280, 'steps': 43689, 'loss/train': 1.548547625541687} +03/05/2022 16:49:54 - INFO - codeparrot_training - Step 43690: {'lr': 0.0004083315291866595, 'samples': 22369792, 'steps': 43690, 'loss/train': 0.8119476437568665} +03/05/2022 16:49:56 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/05/2022 16:49:59 - INFO - codeparrot_training - Step 43691: {'lr': 0.00040832742234101415, 'samples': 22370304, 'steps': 43691, 'loss/train': 1.9347418546676636} +03/05/2022 16:50:02 - INFO - codeparrot_training - Step 43692: {'lr': 0.00040832331542402895, 'samples': 22370816, 'steps': 43692, 'loss/train': 1.5216619968414307} +03/05/2022 16:50:05 - INFO - codeparrot_training - Skipping example with length 850 (seq_length=1024) +03/05/2022 16:50:08 - INFO - codeparrot_training - Step 43693: {'lr': 0.0004083192084357057, 'samples': 22371328, 'steps': 43693, 'loss/train': 1.1230177879333496} +03/05/2022 16:50:11 - INFO - codeparrot_training - Step 43694: {'lr': 0.0004083151013760462, 'samples': 22371840, 'steps': 43694, 'loss/train': 1.637165904045105} +03/05/2022 16:50:13 - INFO - codeparrot_training - Skipping example with length 238 (seq_length=1024) +03/05/2022 16:50:16 - INFO - codeparrot_training - Step 43695: {'lr': 0.0004083109942450524, 'samples': 22372352, 'steps': 43695, 'loss/train': 1.572172999382019} +03/05/2022 16:50:19 - INFO - codeparrot_training - Step 43696: {'lr': 0.00040830688704272615, 'samples': 22372864, 'steps': 43696, 'loss/train': 2.208292007446289} +03/05/2022 16:50:21 - INFO - codeparrot_training - Skipping example with length 180 (seq_length=1024) +03/05/2022 16:50:25 - INFO - codeparrot_training - Step 43697: {'lr': 0.0004083027797690693, 'samples': 22373376, 'steps': 43697, 'loss/train': 1.4846382141113281} +03/05/2022 16:50:28 - INFO - codeparrot_training - Step 43698: {'lr': 0.0004082986724240835, 'samples': 22373888, 'steps': 43698, 'loss/train': 1.4512183666229248} +03/05/2022 16:50:30 - INFO - codeparrot_training - Skipping example with length 673 (seq_length=1024) +03/05/2022 16:50:33 - INFO - codeparrot_training - Step 43699: {'lr': 0.00040829456500777084, 'samples': 22374400, 'steps': 43699, 'loss/train': 2.3024072647094727} +03/05/2022 16:50:36 - INFO - codeparrot_training - Step 43700: {'lr': 0.00040829045752013317, 'samples': 22374912, 'steps': 43700, 'loss/train': 1.728979468345642} +03/05/2022 16:50:39 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/05/2022 16:50:42 - INFO - codeparrot_training - Step 43701: {'lr': 0.00040828634996117213, 'samples': 22375424, 'steps': 43701, 'loss/train': 1.0800122022628784} +03/05/2022 16:50:45 - INFO - codeparrot_training - Step 43702: {'lr': 0.0004082822423308897, 'samples': 22375936, 'steps': 43702, 'loss/train': 1.7643481492996216} +03/05/2022 16:50:47 - INFO - codeparrot_training - Skipping example with length 887 (seq_length=1024) +03/05/2022 16:50:50 - INFO - codeparrot_training - Step 43703: {'lr': 0.00040827813462928784, 'samples': 22376448, 'steps': 43703, 'loss/train': 1.5963966846466064} +03/05/2022 16:50:53 - INFO - codeparrot_training - Step 43704: {'lr': 0.0004082740268563683, 'samples': 22376960, 'steps': 43704, 'loss/train': 1.8335922956466675} +03/05/2022 16:50:56 - INFO - codeparrot_training - Skipping example with length 96 (seq_length=1024) +03/05/2022 16:50:58 - INFO - codeparrot_training - Step 43705: {'lr': 0.0004082699190121329, 'samples': 22377472, 'steps': 43705, 'loss/train': 1.871158242225647} +03/05/2022 16:51:02 - INFO - codeparrot_training - Step 43706: {'lr': 0.00040826581109658345, 'samples': 22377984, 'steps': 43706, 'loss/train': 1.2035809755325317} +03/05/2022 16:51:04 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/05/2022 16:51:07 - INFO - codeparrot_training - Step 43707: {'lr': 0.00040826170310972196, 'samples': 22378496, 'steps': 43707, 'loss/train': 1.8745824098587036} +03/05/2022 16:51:10 - INFO - codeparrot_training - Step 43708: {'lr': 0.0004082575950515501, 'samples': 22379008, 'steps': 43708, 'loss/train': 2.0608975887298584} +03/05/2022 16:51:12 - INFO - codeparrot_training - Skipping example with length 632 (seq_length=1024) +03/05/2022 16:51:15 - INFO - codeparrot_training - Step 43709: {'lr': 0.00040825348692206985, 'samples': 22379520, 'steps': 43709, 'loss/train': 2.143752098083496} +03/05/2022 16:51:19 - INFO - codeparrot_training - Step 43710: {'lr': 0.0004082493787212831, 'samples': 22380032, 'steps': 43710, 'loss/train': 1.1901994943618774} +03/05/2022 16:51:21 - INFO - codeparrot_training - Skipping example with length 942 (seq_length=1024) +03/05/2022 16:51:24 - INFO - codeparrot_training - Step 43711: {'lr': 0.00040824527044919153, 'samples': 22380544, 'steps': 43711, 'loss/train': 1.492495059967041} +03/05/2022 16:51:27 - INFO - codeparrot_training - Step 43712: {'lr': 0.0004082411621057971, 'samples': 22381056, 'steps': 43712, 'loss/train': 1.803471565246582} +03/05/2022 16:51:30 - INFO - codeparrot_training - Skipping example with length 638 (seq_length=1024) +03/05/2022 16:51:32 - INFO - codeparrot_training - Step 43713: {'lr': 0.00040823705369110163, 'samples': 22381568, 'steps': 43713, 'loss/train': 0.9945499300956726} +03/05/2022 16:51:35 - INFO - codeparrot_training - Step 43714: {'lr': 0.000408232945205107, 'samples': 22382080, 'steps': 43714, 'loss/train': 0.09742530435323715} +03/05/2022 16:51:38 - INFO - codeparrot_training - Skipping example with length 974 (seq_length=1024) +03/05/2022 16:51:41 - INFO - codeparrot_training - Step 43715: {'lr': 0.00040822883664781506, 'samples': 22382592, 'steps': 43715, 'loss/train': 1.8790180683135986} +03/05/2022 16:51:44 - INFO - codeparrot_training - Step 43716: {'lr': 0.0004082247280192276, 'samples': 22383104, 'steps': 43716, 'loss/train': 1.2757371664047241} +03/05/2022 16:51:47 - INFO - codeparrot_training - Step 43717: {'lr': 0.00040822061931934656, 'samples': 22383616, 'steps': 43717, 'loss/train': 2.048994302749634} +03/05/2022 16:51:47 - INFO - codeparrot_training - Skipping example with length 858 (seq_length=1024) +03/05/2022 16:51:52 - INFO - codeparrot_training - Step 43718: {'lr': 0.00040821651054817376, 'samples': 22384128, 'steps': 43718, 'loss/train': 0.6961318850517273} +03/05/2022 16:51:56 - INFO - codeparrot_training - Step 43719: {'lr': 0.000408212401705711, 'samples': 22384640, 'steps': 43719, 'loss/train': 1.4624629020690918} +03/05/2022 16:51:56 - INFO - codeparrot_training - Skipping example with length 710 (seq_length=1024) +03/05/2022 16:52:01 - INFO - codeparrot_training - Step 43720: {'lr': 0.0004082082927919602, 'samples': 22385152, 'steps': 43720, 'loss/train': 1.5730066299438477} +03/05/2022 16:52:04 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/05/2022 16:52:06 - INFO - codeparrot_training - Step 43721: {'lr': 0.0004082041838069232, 'samples': 22385664, 'steps': 43721, 'loss/train': 1.944197177886963} +03/05/2022 16:52:09 - INFO - codeparrot_training - Step 43722: {'lr': 0.0004082000747506018, 'samples': 22386176, 'steps': 43722, 'loss/train': 1.5476070642471313} +03/05/2022 16:52:12 - INFO - codeparrot_training - Skipping example with length 614 (seq_length=1024) +03/05/2022 16:52:15 - INFO - codeparrot_training - Step 43723: {'lr': 0.00040819596562299793, 'samples': 22386688, 'steps': 43723, 'loss/train': 0.8925703763961792} +03/05/2022 16:52:18 - INFO - codeparrot_training - Step 43724: {'lr': 0.0004081918564241134, 'samples': 22387200, 'steps': 43724, 'loss/train': 2.050227403640747} +03/05/2022 16:52:21 - INFO - codeparrot_training - Step 43725: {'lr': 0.00040818774715395, 'samples': 22387712, 'steps': 43725, 'loss/train': 1.4173418283462524} +03/05/2022 16:52:23 - INFO - codeparrot_training - Skipping example with length 19 (seq_length=1024) +03/05/2022 16:52:27 - INFO - codeparrot_training - Step 43726: {'lr': 0.0004081836378125097, 'samples': 22388224, 'steps': 43726, 'loss/train': 1.8735824823379517} +03/05/2022 16:52:30 - INFO - codeparrot_training - Step 43727: {'lr': 0.00040817952839979424, 'samples': 22388736, 'steps': 43727, 'loss/train': 1.3684109449386597} +03/05/2022 16:52:32 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/05/2022 16:52:35 - INFO - codeparrot_training - Step 43728: {'lr': 0.00040817541891580557, 'samples': 22389248, 'steps': 43728, 'loss/train': 1.1134504079818726} +03/05/2022 16:52:38 - INFO - codeparrot_training - Step 43729: {'lr': 0.00040817130936054546, 'samples': 22389760, 'steps': 43729, 'loss/train': 2.4697091579437256} +03/05/2022 16:52:40 - INFO - codeparrot_training - Skipping example with length 637 (seq_length=1024) +03/05/2022 16:52:44 - INFO - codeparrot_training - Step 43730: {'lr': 0.00040816719973401586, 'samples': 22390272, 'steps': 43730, 'loss/train': 1.5405408143997192} +03/05/2022 16:52:47 - INFO - codeparrot_training - Step 43731: {'lr': 0.0004081630900362185, 'samples': 22390784, 'steps': 43731, 'loss/train': 1.9429545402526855} +03/05/2022 16:52:49 - INFO - codeparrot_training - Skipping example with length 931 (seq_length=1024) +03/05/2022 16:52:52 - INFO - codeparrot_training - Step 43732: {'lr': 0.0004081589802671553, 'samples': 22391296, 'steps': 43732, 'loss/train': 1.8159990310668945} +03/05/2022 16:52:55 - INFO - codeparrot_training - Step 43733: {'lr': 0.00040815487042682814, 'samples': 22391808, 'steps': 43733, 'loss/train': 1.5074989795684814} +03/05/2022 16:52:57 - INFO - codeparrot_training - Skipping example with length 935 (seq_length=1024) +03/05/2022 16:53:00 - INFO - codeparrot_training - Step 43734: {'lr': 0.0004081507605152388, 'samples': 22392320, 'steps': 43734, 'loss/train': 2.0342931747436523} +03/05/2022 16:53:04 - INFO - codeparrot_training - Step 43735: {'lr': 0.0004081466505323892, 'samples': 22392832, 'steps': 43735, 'loss/train': 1.1064194440841675} +03/05/2022 16:53:06 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/05/2022 16:53:09 - INFO - codeparrot_training - Step 43736: {'lr': 0.0004081425404782811, 'samples': 22393344, 'steps': 43736, 'loss/train': 1.9182690382003784} +03/05/2022 16:53:12 - INFO - codeparrot_training - Step 43737: {'lr': 0.00040813843035291655, 'samples': 22393856, 'steps': 43737, 'loss/train': 1.207209825515747} +03/05/2022 16:53:14 - INFO - codeparrot_training - Skipping example with length 954 (seq_length=1024) +03/05/2022 16:53:17 - INFO - codeparrot_training - Step 43738: {'lr': 0.00040813432015629714, 'samples': 22394368, 'steps': 43738, 'loss/train': 2.294670581817627} +03/05/2022 16:53:21 - INFO - codeparrot_training - Step 43739: {'lr': 0.0004081302098884249, 'samples': 22394880, 'steps': 43739, 'loss/train': 1.1051883697509766} +03/05/2022 16:53:22 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/05/2022 16:53:26 - INFO - codeparrot_training - Step 43740: {'lr': 0.0004081260995493015, 'samples': 22395392, 'steps': 43740, 'loss/train': 1.69712233543396} +03/05/2022 16:53:29 - INFO - codeparrot_training - Step 43741: {'lr': 0.0004081219891389291, 'samples': 22395904, 'steps': 43741, 'loss/train': 1.7056206464767456} +03/05/2022 16:53:31 - INFO - codeparrot_training - Skipping example with length 429 (seq_length=1024) +03/05/2022 16:53:34 - INFO - codeparrot_training - Step 43742: {'lr': 0.0004081178786573092, 'samples': 22396416, 'steps': 43742, 'loss/train': 1.5444432497024536} +03/05/2022 16:53:38 - INFO - codeparrot_training - Step 43743: {'lr': 0.000408113768104444, 'samples': 22396928, 'steps': 43743, 'loss/train': 1.611764907836914} +03/05/2022 16:53:39 - INFO - codeparrot_training - Skipping example with length 364 (seq_length=1024) +03/05/2022 16:53:43 - INFO - codeparrot_training - Step 43744: {'lr': 0.0004081096574803351, 'samples': 22397440, 'steps': 43744, 'loss/train': 0.793775737285614} +03/05/2022 16:53:46 - INFO - codeparrot_training - Step 43745: {'lr': 0.00040810554678498434, 'samples': 22397952, 'steps': 43745, 'loss/train': 1.2236206531524658} +03/05/2022 16:53:48 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/05/2022 16:53:51 - INFO - codeparrot_training - Step 43746: {'lr': 0.00040810143601839377, 'samples': 22398464, 'steps': 43746, 'loss/train': 2.298741579055786} +03/05/2022 16:53:55 - INFO - codeparrot_training - Step 43747: {'lr': 0.0004080973251805651, 'samples': 22398976, 'steps': 43747, 'loss/train': 1.856736421585083} +03/05/2022 16:53:57 - INFO - codeparrot_training - Skipping example with length 172 (seq_length=1024) +03/05/2022 16:54:00 - INFO - codeparrot_training - Step 43748: {'lr': 0.0004080932142715002, 'samples': 22399488, 'steps': 43748, 'loss/train': 1.2671951055526733} +03/05/2022 16:54:03 - INFO - codeparrot_training - Step 43749: {'lr': 0.000408089103291201, 'samples': 22400000, 'steps': 43749, 'loss/train': 1.8553847074508667} +03/05/2022 16:54:05 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/05/2022 16:54:08 - INFO - codeparrot_training - Step 43750: {'lr': 0.0004080849922396692, 'samples': 22400512, 'steps': 43750, 'loss/train': 2.610322952270508} +03/05/2022 16:54:12 - INFO - codeparrot_training - Step 43751: {'lr': 0.00040808088111690677, 'samples': 22401024, 'steps': 43751, 'loss/train': 0.12095628678798676} +03/05/2022 16:54:14 - INFO - codeparrot_training - Skipping example with length 201 (seq_length=1024) +03/05/2022 16:54:17 - INFO - codeparrot_training - Step 43752: {'lr': 0.00040807676992291557, 'samples': 22401536, 'steps': 43752, 'loss/train': 1.3341021537780762} +03/05/2022 16:54:20 - INFO - codeparrot_training - Step 43753: {'lr': 0.0004080726586576974, 'samples': 22402048, 'steps': 43753, 'loss/train': 2.1739494800567627} +03/05/2022 16:54:22 - INFO - codeparrot_training - Skipping example with length 811 (seq_length=1024) +03/05/2022 16:54:25 - INFO - codeparrot_training - Step 43754: {'lr': 0.0004080685473212541, 'samples': 22402560, 'steps': 43754, 'loss/train': 1.2442368268966675} +03/05/2022 16:54:28 - INFO - codeparrot_training - Step 43755: {'lr': 0.0004080644359135876, 'samples': 22403072, 'steps': 43755, 'loss/train': 2.043179750442505} +03/05/2022 16:54:31 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/05/2022 16:54:34 - INFO - codeparrot_training - Step 43756: {'lr': 0.00040806032443469967, 'samples': 22403584, 'steps': 43756, 'loss/train': 0.4384129047393799} +03/05/2022 16:54:37 - INFO - codeparrot_training - Step 43757: {'lr': 0.0004080562128845923, 'samples': 22404096, 'steps': 43757, 'loss/train': 1.2111958265304565} +03/05/2022 16:54:39 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/05/2022 16:54:42 - INFO - codeparrot_training - Step 43758: {'lr': 0.0004080521012632671, 'samples': 22404608, 'steps': 43758, 'loss/train': 1.120803952217102} +03/05/2022 16:54:46 - INFO - codeparrot_training - Step 43759: {'lr': 0.00040804798957072607, 'samples': 22405120, 'steps': 43759, 'loss/train': 1.6617861986160278} +03/05/2022 16:54:48 - INFO - codeparrot_training - Skipping example with length 802 (seq_length=1024) +03/05/2022 16:54:51 - INFO - codeparrot_training - Step 43760: {'lr': 0.0004080438778069711, 'samples': 22405632, 'steps': 43760, 'loss/train': 2.115455150604248} +03/05/2022 16:54:54 - INFO - codeparrot_training - Step 43761: {'lr': 0.000408039765972004, 'samples': 22406144, 'steps': 43761, 'loss/train': 1.1934529542922974} +03/05/2022 16:54:56 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/05/2022 16:54:59 - INFO - codeparrot_training - Step 43762: {'lr': 0.0004080356540658266, 'samples': 22406656, 'steps': 43762, 'loss/train': 1.0696488618850708} +03/05/2022 16:55:02 - INFO - codeparrot_training - Step 43763: {'lr': 0.00040803154208844086, 'samples': 22407168, 'steps': 43763, 'loss/train': 2.110583782196045} +03/05/2022 16:55:05 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/05/2022 16:55:08 - INFO - codeparrot_training - Step 43764: {'lr': 0.00040802743003984845, 'samples': 22407680, 'steps': 43764, 'loss/train': 1.1371500492095947} +03/05/2022 16:55:11 - INFO - codeparrot_training - Step 43765: {'lr': 0.0004080233179200513, 'samples': 22408192, 'steps': 43765, 'loss/train': 2.5755507946014404} +03/05/2022 16:55:13 - INFO - codeparrot_training - Skipping example with length 441 (seq_length=1024) +03/05/2022 16:55:16 - INFO - codeparrot_training - Step 43766: {'lr': 0.00040801920572905133, 'samples': 22408704, 'steps': 43766, 'loss/train': 1.1315655708312988} +03/05/2022 16:55:19 - INFO - codeparrot_training - Step 43767: {'lr': 0.0004080150934668503, 'samples': 22409216, 'steps': 43767, 'loss/train': 1.925883173942566} +03/05/2022 16:55:21 - INFO - codeparrot_training - Skipping example with length 158 (seq_length=1024) +03/05/2022 16:55:25 - INFO - codeparrot_training - Step 43768: {'lr': 0.00040801098113345014, 'samples': 22409728, 'steps': 43768, 'loss/train': 1.7936030626296997} +03/05/2022 16:55:28 - INFO - codeparrot_training - Step 43769: {'lr': 0.00040800686872885267, 'samples': 22410240, 'steps': 43769, 'loss/train': 1.4563323259353638} +03/05/2022 16:55:30 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/05/2022 16:55:33 - INFO - codeparrot_training - Step 43770: {'lr': 0.0004080027562530598, 'samples': 22410752, 'steps': 43770, 'loss/train': 2.625141143798828} +03/05/2022 16:55:36 - INFO - codeparrot_training - Step 43771: {'lr': 0.0004079986437060733, 'samples': 22411264, 'steps': 43771, 'loss/train': 1.2488816976547241} +03/05/2022 16:55:39 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/05/2022 16:55:42 - INFO - codeparrot_training - Step 43772: {'lr': 0.00040799453108789497, 'samples': 22411776, 'steps': 43772, 'loss/train': 1.4593466520309448} +03/05/2022 16:55:45 - INFO - codeparrot_training - Step 43773: {'lr': 0.0004079904183985268, 'samples': 22412288, 'steps': 43773, 'loss/train': 2.231966257095337} +03/05/2022 16:55:47 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/05/2022 16:55:50 - INFO - codeparrot_training - Step 43774: {'lr': 0.00040798630563797055, 'samples': 22412800, 'steps': 43774, 'loss/train': 2.2671263217926025} +03/05/2022 16:55:53 - INFO - codeparrot_training - Step 43775: {'lr': 0.00040798219280622816, 'samples': 22413312, 'steps': 43775, 'loss/train': 1.4242421388626099} +03/05/2022 16:55:56 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/05/2022 16:55:59 - INFO - codeparrot_training - Step 43776: {'lr': 0.0004079780799033014, 'samples': 22413824, 'steps': 43776, 'loss/train': 2.207301378250122} +03/05/2022 16:56:02 - INFO - codeparrot_training - Step 43777: {'lr': 0.0004079739669291922, 'samples': 22414336, 'steps': 43777, 'loss/train': 1.8314064741134644} +03/05/2022 16:56:05 - INFO - codeparrot_training - Skipping example with length 259 (seq_length=1024) +03/05/2022 16:56:07 - INFO - codeparrot_training - Step 43778: {'lr': 0.0004079698538839023, 'samples': 22414848, 'steps': 43778, 'loss/train': 1.5699042081832886} +03/05/2022 16:56:10 - INFO - codeparrot_training - Step 43779: {'lr': 0.00040796574076743366, 'samples': 22415360, 'steps': 43779, 'loss/train': 1.917794942855835} +03/05/2022 16:56:13 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/05/2022 16:56:16 - INFO - codeparrot_training - Step 43780: {'lr': 0.00040796162757978803, 'samples': 22415872, 'steps': 43780, 'loss/train': 1.334808111190796} +03/05/2022 16:56:19 - INFO - codeparrot_training - Step 43781: {'lr': 0.00040795751432096746, 'samples': 22416384, 'steps': 43781, 'loss/train': 2.1090517044067383} +03/05/2022 16:56:21 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/05/2022 16:56:24 - INFO - codeparrot_training - Step 43782: {'lr': 0.00040795340099097357, 'samples': 22416896, 'steps': 43782, 'loss/train': 2.0725297927856445} +03/05/2022 16:56:27 - INFO - codeparrot_training - Step 43783: {'lr': 0.00040794928758980837, 'samples': 22417408, 'steps': 43783, 'loss/train': 0.06948641687631607} +03/05/2022 16:56:30 - INFO - codeparrot_training - Skipping example with length 597 (seq_length=1024) +03/05/2022 16:56:33 - INFO - codeparrot_training - Step 43784: {'lr': 0.0004079451741174737, 'samples': 22417920, 'steps': 43784, 'loss/train': 1.224715232849121} +03/05/2022 16:56:36 - INFO - codeparrot_training - Step 43785: {'lr': 0.00040794106057397123, 'samples': 22418432, 'steps': 43785, 'loss/train': 1.880058765411377} +03/05/2022 16:56:38 - INFO - codeparrot_training - Skipping example with length 106 (seq_length=1024) +03/05/2022 16:56:41 - INFO - codeparrot_training - Step 43786: {'lr': 0.00040793694695930304, 'samples': 22418944, 'steps': 43786, 'loss/train': 1.9487391710281372} +03/05/2022 16:56:44 - INFO - codeparrot_training - Step 43787: {'lr': 0.00040793283327347085, 'samples': 22419456, 'steps': 43787, 'loss/train': 1.438030481338501} +03/05/2022 16:56:46 - INFO - codeparrot_training - Skipping example with length 32 (seq_length=1024) +03/05/2022 16:56:49 - INFO - codeparrot_training - Step 43788: {'lr': 0.00040792871951647657, 'samples': 22419968, 'steps': 43788, 'loss/train': 1.9918346405029297} +03/05/2022 16:56:53 - INFO - codeparrot_training - Step 43789: {'lr': 0.00040792460568832214, 'samples': 22420480, 'steps': 43789, 'loss/train': 2.6037285327911377} +03/05/2022 16:56:54 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/05/2022 16:56:58 - INFO - codeparrot_training - Step 43790: {'lr': 0.00040792049178900924, 'samples': 22420992, 'steps': 43790, 'loss/train': 1.5741981267929077} +03/05/2022 16:57:01 - INFO - codeparrot_training - Step 43791: {'lr': 0.00040791637781853983, 'samples': 22421504, 'steps': 43791, 'loss/train': 1.9002717733383179} +03/05/2022 16:57:03 - INFO - codeparrot_training - Skipping example with length 930 (seq_length=1024) +03/05/2022 16:57:06 - INFO - codeparrot_training - Step 43792: {'lr': 0.0004079122637769157, 'samples': 22422016, 'steps': 43792, 'loss/train': 1.9454540014266968} +03/05/2022 16:57:10 - INFO - codeparrot_training - Step 43793: {'lr': 0.0004079081496641388, 'samples': 22422528, 'steps': 43793, 'loss/train': 1.5321191549301147} +03/05/2022 16:57:11 - INFO - codeparrot_training - Skipping example with length 133 (seq_length=1024) +03/05/2022 16:57:15 - INFO - codeparrot_training - Step 43794: {'lr': 0.0004079040354802109, 'samples': 22423040, 'steps': 43794, 'loss/train': 1.175579309463501} +03/05/2022 16:57:18 - INFO - codeparrot_training - Step 43795: {'lr': 0.00040789992122513386, 'samples': 22423552, 'steps': 43795, 'loss/train': 1.3223199844360352} +03/05/2022 16:57:20 - INFO - codeparrot_training - Skipping example with length 964 (seq_length=1024) +03/05/2022 16:57:23 - INFO - codeparrot_training - Step 43796: {'lr': 0.00040789580689890953, 'samples': 22424064, 'steps': 43796, 'loss/train': 1.7115073204040527} +03/05/2022 16:57:27 - INFO - codeparrot_training - Step 43797: {'lr': 0.00040789169250153985, 'samples': 22424576, 'steps': 43797, 'loss/train': 2.380213499069214} +03/05/2022 16:57:29 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 16:57:32 - INFO - codeparrot_training - Step 43798: {'lr': 0.00040788757803302656, 'samples': 22425088, 'steps': 43798, 'loss/train': 1.8337550163269043} +03/05/2022 16:57:35 - INFO - codeparrot_training - Step 43799: {'lr': 0.00040788346349337156, 'samples': 22425600, 'steps': 43799, 'loss/train': 1.8149511814117432} +03/05/2022 16:57:38 - INFO - codeparrot_training - Skipping example with length 718 (seq_length=1024) +03/05/2022 16:57:40 - INFO - codeparrot_training - Step 43800: {'lr': 0.00040787934888257673, 'samples': 22426112, 'steps': 43800, 'loss/train': 1.8103344440460205} +03/05/2022 16:57:44 - INFO - codeparrot_training - Step 43801: {'lr': 0.00040787523420064394, 'samples': 22426624, 'steps': 43801, 'loss/train': 1.837502360343933} +03/05/2022 16:57:46 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/05/2022 16:57:49 - INFO - codeparrot_training - Step 43802: {'lr': 0.00040787111944757496, 'samples': 22427136, 'steps': 43802, 'loss/train': 2.037440538406372} +03/05/2022 16:57:52 - INFO - codeparrot_training - Step 43803: {'lr': 0.0004078670046233717, 'samples': 22427648, 'steps': 43803, 'loss/train': 1.5786080360412598} +03/05/2022 16:57:55 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/05/2022 16:57:57 - INFO - codeparrot_training - Step 43804: {'lr': 0.000407862889728036, 'samples': 22428160, 'steps': 43804, 'loss/train': 1.5454950332641602} +03/05/2022 16:58:01 - INFO - codeparrot_training - Step 43805: {'lr': 0.0004078587747615697, 'samples': 22428672, 'steps': 43805, 'loss/train': 1.5921725034713745} +03/05/2022 16:58:03 - INFO - codeparrot_training - Skipping example with length 939 (seq_length=1024) +03/05/2022 16:58:06 - INFO - codeparrot_training - Step 43806: {'lr': 0.00040785465972397475, 'samples': 22429184, 'steps': 43806, 'loss/train': 2.730365037918091} +03/05/2022 16:58:09 - INFO - codeparrot_training - Step 43807: {'lr': 0.0004078505446152528, 'samples': 22429696, 'steps': 43807, 'loss/train': 1.6530457735061646} +03/05/2022 16:58:11 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/05/2022 16:58:14 - INFO - codeparrot_training - Step 43808: {'lr': 0.0004078464294354059, 'samples': 22430208, 'steps': 43808, 'loss/train': 2.3621537685394287} +03/05/2022 16:58:17 - INFO - codeparrot_training - Step 43809: {'lr': 0.00040784231418443585, 'samples': 22430720, 'steps': 43809, 'loss/train': 1.6734894514083862} +03/05/2022 16:58:20 - INFO - codeparrot_training - Skipping example with length 912 (seq_length=1024) +03/05/2022 16:58:23 - INFO - codeparrot_training - Step 43810: {'lr': 0.00040783819886234445, 'samples': 22431232, 'steps': 43810, 'loss/train': 1.0745031833648682} +03/05/2022 16:58:26 - INFO - codeparrot_training - Step 43811: {'lr': 0.00040783408346913366, 'samples': 22431744, 'steps': 43811, 'loss/train': 1.5872265100479126} +03/05/2022 16:58:28 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/05/2022 16:58:31 - INFO - codeparrot_training - Step 43812: {'lr': 0.00040782996800480523, 'samples': 22432256, 'steps': 43812, 'loss/train': 2.334005355834961} +03/05/2022 16:58:34 - INFO - codeparrot_training - Step 43813: {'lr': 0.000407825852469361, 'samples': 22432768, 'steps': 43813, 'loss/train': 1.9279346466064453} +03/05/2022 16:58:37 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/05/2022 16:58:40 - INFO - codeparrot_training - Step 43814: {'lr': 0.00040782173686280287, 'samples': 22433280, 'steps': 43814, 'loss/train': 1.9805338382720947} +03/05/2022 16:58:43 - INFO - codeparrot_training - Step 43815: {'lr': 0.0004078176211851328, 'samples': 22433792, 'steps': 43815, 'loss/train': 2.4957211017608643} +03/05/2022 16:58:45 - INFO - codeparrot_training - Skipping example with length 441 (seq_length=1024) +03/05/2022 16:58:48 - INFO - codeparrot_training - Step 43816: {'lr': 0.0004078135054363524, 'samples': 22434304, 'steps': 43816, 'loss/train': 1.9035494327545166} +03/05/2022 16:58:52 - INFO - codeparrot_training - Step 43817: {'lr': 0.00040780938961646385, 'samples': 22434816, 'steps': 43817, 'loss/train': 1.687270164489746} +03/05/2022 16:58:55 - INFO - codeparrot_training - Step 43818: {'lr': 0.00040780527372546874, 'samples': 22435328, 'steps': 43818, 'loss/train': 1.753762125968933} +03/05/2022 16:58:56 - INFO - codeparrot_training - Skipping example with length 665 (seq_length=1024) +03/05/2022 16:59:00 - INFO - codeparrot_training - Step 43819: {'lr': 0.000407801157763369, 'samples': 22435840, 'steps': 43819, 'loss/train': 1.92158842086792} +03/05/2022 16:59:03 - INFO - codeparrot_training - Step 43820: {'lr': 0.0004077970417301665, 'samples': 22436352, 'steps': 43820, 'loss/train': 1.331741452217102} +03/05/2022 16:59:04 - INFO - codeparrot_training - Skipping example with length 218 (seq_length=1024) +03/05/2022 16:59:08 - INFO - codeparrot_training - Step 43821: {'lr': 0.00040779292562586304, 'samples': 22436864, 'steps': 43821, 'loss/train': 2.6385693550109863} +03/05/2022 16:59:12 - INFO - codeparrot_training - Step 43822: {'lr': 0.0004077888094504606, 'samples': 22437376, 'steps': 43822, 'loss/train': 1.2542415857315063} +03/05/2022 16:59:12 - INFO - codeparrot_training - Skipping example with length 885 (seq_length=1024) +03/05/2022 16:59:17 - INFO - codeparrot_training - Step 43823: {'lr': 0.0004077846932039609, 'samples': 22437888, 'steps': 43823, 'loss/train': 1.5465662479400635} +03/05/2022 16:59:20 - INFO - codeparrot_training - Step 43824: {'lr': 0.00040778057688636594, 'samples': 22438400, 'steps': 43824, 'loss/train': 1.8899484872817993} +03/05/2022 16:59:21 - INFO - codeparrot_training - Skipping example with length 846 (seq_length=1024) +03/05/2022 16:59:25 - INFO - codeparrot_training - Step 43825: {'lr': 0.00040777646049767736, 'samples': 22438912, 'steps': 43825, 'loss/train': 2.057701349258423} +03/05/2022 16:59:28 - INFO - codeparrot_training - Step 43826: {'lr': 0.0004077723440378972, 'samples': 22439424, 'steps': 43826, 'loss/train': 1.5554970502853394} +03/05/2022 16:59:29 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/05/2022 16:59:34 - INFO - codeparrot_training - Step 43827: {'lr': 0.0004077682275070273, 'samples': 22439936, 'steps': 43827, 'loss/train': 2.241832733154297} +03/05/2022 16:59:37 - INFO - codeparrot_training - Step 43828: {'lr': 0.00040776411090506944, 'samples': 22440448, 'steps': 43828, 'loss/train': 1.3987410068511963} +03/05/2022 16:59:37 - INFO - codeparrot_training - Skipping example with length 556 (seq_length=1024) +03/05/2022 16:59:42 - INFO - codeparrot_training - Step 43829: {'lr': 0.0004077599942320255, 'samples': 22440960, 'steps': 43829, 'loss/train': 1.2790558338165283} +03/05/2022 16:59:45 - INFO - codeparrot_training - Step 43830: {'lr': 0.00040775587748789733, 'samples': 22441472, 'steps': 43830, 'loss/train': 1.166007161140442} +03/05/2022 16:59:46 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/05/2022 16:59:51 - INFO - codeparrot_training - Step 43831: {'lr': 0.0004077517606726868, 'samples': 22441984, 'steps': 43831, 'loss/train': 1.5942026376724243} +03/05/2022 16:59:54 - INFO - codeparrot_training - Step 43832: {'lr': 0.0004077476437863958, 'samples': 22442496, 'steps': 43832, 'loss/train': 1.0755010843276978} +03/05/2022 16:59:54 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/05/2022 16:59:59 - INFO - codeparrot_training - Step 43833: {'lr': 0.0004077435268290261, 'samples': 22443008, 'steps': 43833, 'loss/train': 1.4858921766281128} +03/05/2022 17:00:02 - INFO - codeparrot_training - Step 43834: {'lr': 0.0004077394098005796, 'samples': 22443520, 'steps': 43834, 'loss/train': 1.7185603380203247} +03/05/2022 17:00:02 - INFO - codeparrot_training - Skipping example with length 451 (seq_length=1024) +03/05/2022 17:00:07 - INFO - codeparrot_training - Step 43835: {'lr': 0.00040773529270105816, 'samples': 22444032, 'steps': 43835, 'loss/train': 1.8293707370758057} +03/05/2022 17:00:11 - INFO - codeparrot_training - Step 43836: {'lr': 0.0004077311755304637, 'samples': 22444544, 'steps': 43836, 'loss/train': 1.56156325340271} +03/05/2022 17:00:11 - INFO - codeparrot_training - Skipping example with length 29 (seq_length=1024) +03/05/2022 17:00:16 - INFO - codeparrot_training - Step 43837: {'lr': 0.000407727058288798, 'samples': 22445056, 'steps': 43837, 'loss/train': 1.6813124418258667} +03/05/2022 17:00:19 - INFO - codeparrot_training - Step 43838: {'lr': 0.00040772294097606276, 'samples': 22445568, 'steps': 43838, 'loss/train': 1.2740013599395752} +03/05/2022 17:00:19 - INFO - codeparrot_training - Skipping example with length 303 (seq_length=1024) +03/05/2022 17:00:25 - INFO - codeparrot_training - Step 43839: {'lr': 0.0004077188235922601, 'samples': 22446080, 'steps': 43839, 'loss/train': 1.6485356092453003} +03/05/2022 17:00:28 - INFO - codeparrot_training - Step 43840: {'lr': 0.0004077147061373918, 'samples': 22446592, 'steps': 43840, 'loss/train': 2.145946502685547} +03/05/2022 17:00:30 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/05/2022 17:00:33 - INFO - codeparrot_training - Step 43841: {'lr': 0.00040771058861145963, 'samples': 22447104, 'steps': 43841, 'loss/train': 1.7775769233703613} +03/05/2022 17:00:36 - INFO - codeparrot_training - Step 43842: {'lr': 0.0004077064710144656, 'samples': 22447616, 'steps': 43842, 'loss/train': 2.2430338859558105} +03/05/2022 17:00:39 - INFO - codeparrot_training - Skipping example with length 22 (seq_length=1024) +03/05/2022 17:00:41 - INFO - codeparrot_training - Step 43843: {'lr': 0.0004077023533464114, 'samples': 22448128, 'steps': 43843, 'loss/train': 1.9397205114364624} +03/05/2022 17:00:45 - INFO - codeparrot_training - Step 43844: {'lr': 0.000407698235607299, 'samples': 22448640, 'steps': 43844, 'loss/train': 0.9130280017852783} +03/05/2022 17:00:47 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/05/2022 17:00:50 - INFO - codeparrot_training - Step 43845: {'lr': 0.0004076941177971301, 'samples': 22449152, 'steps': 43845, 'loss/train': 0.5364644527435303} +03/05/2022 17:00:53 - INFO - codeparrot_training - Step 43846: {'lr': 0.0004076899999159067, 'samples': 22449664, 'steps': 43846, 'loss/train': 1.2022267580032349} +03/05/2022 17:00:55 - INFO - codeparrot_training - Skipping example with length 603 (seq_length=1024) +03/05/2022 17:00:58 - INFO - codeparrot_training - Step 43847: {'lr': 0.0004076858819636307, 'samples': 22450176, 'steps': 43847, 'loss/train': 1.6162538528442383} +03/05/2022 17:01:02 - INFO - codeparrot_training - Step 43848: {'lr': 0.0004076817639403038, 'samples': 22450688, 'steps': 43848, 'loss/train': 2.0896689891815186} +03/05/2022 17:01:04 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/05/2022 17:01:07 - INFO - codeparrot_training - Step 43849: {'lr': 0.0004076776458459279, 'samples': 22451200, 'steps': 43849, 'loss/train': 1.6193335056304932} +03/05/2022 17:01:10 - INFO - codeparrot_training - Step 43850: {'lr': 0.00040767352768050503, 'samples': 22451712, 'steps': 43850, 'loss/train': 1.8727627992630005} +03/05/2022 17:01:12 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/05/2022 17:01:15 - INFO - codeparrot_training - Step 43851: {'lr': 0.0004076694094440368, 'samples': 22452224, 'steps': 43851, 'loss/train': 0.19420208036899567} +03/05/2022 17:01:19 - INFO - codeparrot_training - Step 43852: {'lr': 0.0004076652911365252, 'samples': 22452736, 'steps': 43852, 'loss/train': 2.0492234230041504} +03/05/2022 17:01:21 - INFO - codeparrot_training - Skipping example with length 441 (seq_length=1024) +03/05/2022 17:01:24 - INFO - codeparrot_training - Step 43853: {'lr': 0.00040766117275797196, 'samples': 22453248, 'steps': 43853, 'loss/train': 0.8284090161323547} +03/05/2022 17:01:27 - INFO - codeparrot_training - Step 43854: {'lr': 0.0004076570543083792, 'samples': 22453760, 'steps': 43854, 'loss/train': 1.439929485321045} +03/05/2022 17:01:29 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/05/2022 17:01:32 - INFO - codeparrot_training - Step 43855: {'lr': 0.0004076529357877485, 'samples': 22454272, 'steps': 43855, 'loss/train': 3.068974733352661} +03/05/2022 17:01:35 - INFO - codeparrot_training - Step 43856: {'lr': 0.00040764881719608184, 'samples': 22454784, 'steps': 43856, 'loss/train': 0.959490180015564} +03/05/2022 17:01:37 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/05/2022 17:01:41 - INFO - codeparrot_training - Step 43857: {'lr': 0.000407644698533381, 'samples': 22455296, 'steps': 43857, 'loss/train': 1.4604078531265259} +03/05/2022 17:01:44 - INFO - codeparrot_training - Step 43858: {'lr': 0.00040764057979964793, 'samples': 22455808, 'steps': 43858, 'loss/train': 1.9327092170715332} +03/05/2022 17:01:47 - INFO - codeparrot_training - Step 43859: {'lr': 0.0004076364609948844, 'samples': 22456320, 'steps': 43859, 'loss/train': 3.244659423828125} +03/05/2022 17:01:47 - INFO - codeparrot_training - Skipping example with length 434 (seq_length=1024) +03/05/2022 17:01:53 - INFO - codeparrot_training - Step 43860: {'lr': 0.0004076323421190924, 'samples': 22456832, 'steps': 43860, 'loss/train': 1.955910563468933} +03/05/2022 17:01:56 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/05/2022 17:01:58 - INFO - codeparrot_training - Step 43861: {'lr': 0.0004076282231722737, 'samples': 22457344, 'steps': 43861, 'loss/train': 2.271181344985962} +03/05/2022 17:02:01 - INFO - codeparrot_training - Step 43862: {'lr': 0.0004076241041544301, 'samples': 22457856, 'steps': 43862, 'loss/train': 1.480958342552185} +03/05/2022 17:02:05 - INFO - codeparrot_training - Step 43863: {'lr': 0.00040761998506556353, 'samples': 22458368, 'steps': 43863, 'loss/train': 0.4877408742904663} +03/05/2022 17:02:05 - INFO - codeparrot_training - Skipping example with length 476 (seq_length=1024) +03/05/2022 17:02:10 - INFO - codeparrot_training - Step 43864: {'lr': 0.0004076158659056758, 'samples': 22458880, 'steps': 43864, 'loss/train': 0.7723501324653625} +03/05/2022 17:02:13 - INFO - codeparrot_training - Step 43865: {'lr': 0.00040761174667476883, 'samples': 22459392, 'steps': 43865, 'loss/train': 1.313813328742981} +03/05/2022 17:02:13 - INFO - codeparrot_training - Skipping example with length 271 (seq_length=1024) +03/05/2022 17:02:18 - INFO - codeparrot_training - Step 43866: {'lr': 0.0004076076273728444, 'samples': 22459904, 'steps': 43866, 'loss/train': 2.0009844303131104} +03/05/2022 17:02:22 - INFO - codeparrot_training - Step 43867: {'lr': 0.0004076035079999045, 'samples': 22460416, 'steps': 43867, 'loss/train': 1.7654095888137817} +03/05/2022 17:02:22 - INFO - codeparrot_training - Skipping example with length 265 (seq_length=1024) +03/05/2022 17:02:27 - INFO - codeparrot_training - Step 43868: {'lr': 0.0004075993885559508, 'samples': 22460928, 'steps': 43868, 'loss/train': 1.6525152921676636} +03/05/2022 17:02:30 - INFO - codeparrot_training - Step 43869: {'lr': 0.0004075952690409852, 'samples': 22461440, 'steps': 43869, 'loss/train': 2.596529245376587} +03/05/2022 17:02:31 - INFO - codeparrot_training - Skipping example with length 599 (seq_length=1024) +03/05/2022 17:02:35 - INFO - codeparrot_training - Step 43870: {'lr': 0.00040759114945500974, 'samples': 22461952, 'steps': 43870, 'loss/train': 1.7956039905548096} +03/05/2022 17:02:38 - INFO - codeparrot_training - Step 43871: {'lr': 0.0004075870297980261, 'samples': 22462464, 'steps': 43871, 'loss/train': 1.3891104459762573} +03/05/2022 17:02:39 - INFO - codeparrot_training - Skipping example with length 570 (seq_length=1024) +03/05/2022 17:02:44 - INFO - codeparrot_training - Step 43872: {'lr': 0.0004075829100700361, 'samples': 22462976, 'steps': 43872, 'loss/train': 0.7473767399787903} +03/05/2022 17:02:47 - INFO - codeparrot_training - Step 43873: {'lr': 0.0004075787902710417, 'samples': 22463488, 'steps': 43873, 'loss/train': 1.9922208786010742} +03/05/2022 17:02:48 - INFO - codeparrot_training - Skipping example with length 828 (seq_length=1024) +03/05/2022 17:02:52 - INFO - codeparrot_training - Step 43874: {'lr': 0.0004075746704010448, 'samples': 22464000, 'steps': 43874, 'loss/train': 1.3193403482437134} +03/05/2022 17:02:55 - INFO - codeparrot_training - Step 43875: {'lr': 0.0004075705504600471, 'samples': 22464512, 'steps': 43875, 'loss/train': 2.283310890197754} +03/05/2022 17:02:56 - INFO - codeparrot_training - Skipping example with length 560 (seq_length=1024) +03/05/2022 17:03:01 - INFO - codeparrot_training - Step 43876: {'lr': 0.00040756643044805057, 'samples': 22465024, 'steps': 43876, 'loss/train': 2.2278008460998535} +03/05/2022 17:03:04 - INFO - codeparrot_training - Step 43877: {'lr': 0.0004075623103650571, 'samples': 22465536, 'steps': 43877, 'loss/train': 1.8205305337905884} +03/05/2022 17:03:04 - INFO - codeparrot_training - Skipping example with length 63 (seq_length=1024) +03/05/2022 17:03:09 - INFO - codeparrot_training - Step 43878: {'lr': 0.00040755819021106844, 'samples': 22466048, 'steps': 43878, 'loss/train': 2.260338306427002} +03/05/2022 17:03:12 - INFO - codeparrot_training - Step 43879: {'lr': 0.00040755406998608645, 'samples': 22466560, 'steps': 43879, 'loss/train': 0.7675941586494446} +03/05/2022 17:03:13 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/05/2022 17:03:17 - INFO - codeparrot_training - Step 43880: {'lr': 0.00040754994969011306, 'samples': 22467072, 'steps': 43880, 'loss/train': 0.2639926075935364} +03/05/2022 17:03:21 - INFO - codeparrot_training - Step 43881: {'lr': 0.00040754582932315007, 'samples': 22467584, 'steps': 43881, 'loss/train': 1.5944522619247437} +03/05/2022 17:03:21 - INFO - codeparrot_training - Skipping example with length 587 (seq_length=1024) +03/05/2022 17:03:26 - INFO - codeparrot_training - Step 43882: {'lr': 0.0004075417088851994, 'samples': 22468096, 'steps': 43882, 'loss/train': 1.3711528778076172} +03/05/2022 17:03:29 - INFO - codeparrot_training - Step 43883: {'lr': 0.0004075375883762629, 'samples': 22468608, 'steps': 43883, 'loss/train': 1.6593151092529297} +03/05/2022 17:03:30 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/05/2022 17:03:35 - INFO - codeparrot_training - Step 43884: {'lr': 0.0004075334677963423, 'samples': 22469120, 'steps': 43884, 'loss/train': 3.2335045337677} +03/05/2022 17:03:38 - INFO - codeparrot_training - Step 43885: {'lr': 0.0004075293471454396, 'samples': 22469632, 'steps': 43885, 'loss/train': 1.0621802806854248} +03/05/2022 17:03:38 - INFO - codeparrot_training - Skipping example with length 513 (seq_length=1024) +03/05/2022 17:03:43 - INFO - codeparrot_training - Step 43886: {'lr': 0.0004075252264235566, 'samples': 22470144, 'steps': 43886, 'loss/train': 0.9870612621307373} +03/05/2022 17:03:46 - INFO - codeparrot_training - Step 43887: {'lr': 0.0004075211056306951, 'samples': 22470656, 'steps': 43887, 'loss/train': 2.7605912685394287} +03/05/2022 17:03:47 - INFO - codeparrot_training - Skipping example with length 202 (seq_length=1024) +03/05/2022 17:03:51 - INFO - codeparrot_training - Step 43888: {'lr': 0.00040751698476685716, 'samples': 22471168, 'steps': 43888, 'loss/train': 1.9867603778839111} +03/05/2022 17:03:55 - INFO - codeparrot_training - Step 43889: {'lr': 0.00040751286383204437, 'samples': 22471680, 'steps': 43889, 'loss/train': 1.8042305707931519} +03/05/2022 17:03:55 - INFO - codeparrot_training - Skipping example with length 38 (seq_length=1024) +03/05/2022 17:04:00 - INFO - codeparrot_training - Step 43890: {'lr': 0.0004075087428262588, 'samples': 22472192, 'steps': 43890, 'loss/train': 6.065919876098633} +03/05/2022 17:04:03 - INFO - codeparrot_training - Step 43891: {'lr': 0.0004075046217495022, 'samples': 22472704, 'steps': 43891, 'loss/train': 1.916242003440857} +03/05/2022 17:04:05 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/05/2022 17:04:08 - INFO - codeparrot_training - Step 43892: {'lr': 0.00040750050060177643, 'samples': 22473216, 'steps': 43892, 'loss/train': 2.263683795928955} +03/05/2022 17:04:12 - INFO - codeparrot_training - Step 43893: {'lr': 0.00040749637938308336, 'samples': 22473728, 'steps': 43893, 'loss/train': 1.5873838663101196} +03/05/2022 17:04:14 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/05/2022 17:04:17 - INFO - codeparrot_training - Step 43894: {'lr': 0.00040749225809342485, 'samples': 22474240, 'steps': 43894, 'loss/train': 5.848801136016846} +03/05/2022 17:04:20 - INFO - codeparrot_training - Step 43895: {'lr': 0.00040748813673280277, 'samples': 22474752, 'steps': 43895, 'loss/train': 1.6977359056472778} +03/05/2022 17:04:24 - INFO - codeparrot_training - Step 43896: {'lr': 0.0004074840153012189, 'samples': 22475264, 'steps': 43896, 'loss/train': 2.025935173034668} +03/05/2022 17:04:24 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/05/2022 17:04:29 - INFO - codeparrot_training - Step 43897: {'lr': 0.0004074798937986753, 'samples': 22475776, 'steps': 43897, 'loss/train': 0.9327576160430908} +03/05/2022 17:04:32 - INFO - codeparrot_training - Step 43898: {'lr': 0.00040747577222517364, 'samples': 22476288, 'steps': 43898, 'loss/train': 1.450350046157837} +03/05/2022 17:04:33 - INFO - codeparrot_training - Skipping example with length 695 (seq_length=1024) +03/05/2022 17:04:37 - INFO - codeparrot_training - Step 43899: {'lr': 0.0004074716505807158, 'samples': 22476800, 'steps': 43899, 'loss/train': 1.9614824056625366} +03/05/2022 17:04:41 - INFO - codeparrot_training - Step 43900: {'lr': 0.0004074675288653037, 'samples': 22477312, 'steps': 43900, 'loss/train': 2.097571849822998} +03/05/2022 17:04:41 - INFO - codeparrot_training - Skipping example with length 669 (seq_length=1024) +03/05/2022 17:04:46 - INFO - codeparrot_training - Step 43901: {'lr': 0.0004074634070789391, 'samples': 22477824, 'steps': 43901, 'loss/train': 1.5937901735305786} +03/05/2022 17:04:49 - INFO - codeparrot_training - Step 43902: {'lr': 0.0004074592852216239, 'samples': 22478336, 'steps': 43902, 'loss/train': 2.261753559112549} +03/05/2022 17:04:50 - INFO - codeparrot_training - Skipping example with length 635 (seq_length=1024) +03/05/2022 17:04:54 - INFO - codeparrot_training - Step 43903: {'lr': 0.0004074551632933601, 'samples': 22478848, 'steps': 43903, 'loss/train': 1.6696419715881348} +03/05/2022 17:04:58 - INFO - codeparrot_training - Step 43904: {'lr': 0.00040745104129414933, 'samples': 22479360, 'steps': 43904, 'loss/train': 2.5564606189727783} +03/05/2022 17:05:03 - INFO - codeparrot_training - Step 43905: {'lr': 0.0004074469192239936, 'samples': 22479872, 'steps': 43905, 'loss/train': 3.1162302494049072} +03/05/2022 17:05:06 - INFO - codeparrot_training - Step 43906: {'lr': 0.0004074427970828947, 'samples': 22480384, 'steps': 43906, 'loss/train': 1.4316298961639404} +03/05/2022 17:05:07 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/05/2022 17:05:11 - INFO - codeparrot_training - Step 43907: {'lr': 0.00040743867487085444, 'samples': 22480896, 'steps': 43907, 'loss/train': 1.3243454694747925} +03/05/2022 17:05:15 - INFO - codeparrot_training - Step 43908: {'lr': 0.0004074345525878748, 'samples': 22481408, 'steps': 43908, 'loss/train': 1.1086838245391846} +03/05/2022 17:05:15 - INFO - codeparrot_training - Skipping example with length 1009 (seq_length=1024) +03/05/2022 17:05:20 - INFO - codeparrot_training - Step 43909: {'lr': 0.0004074304302339576, 'samples': 22481920, 'steps': 43909, 'loss/train': 0.9239487051963806} +03/05/2022 17:05:23 - INFO - codeparrot_training - Step 43910: {'lr': 0.0004074263078091046, 'samples': 22482432, 'steps': 43910, 'loss/train': 1.3239798545837402} +03/05/2022 17:05:24 - INFO - codeparrot_training - Skipping example with length 712 (seq_length=1024) +03/05/2022 17:05:28 - INFO - codeparrot_training - Step 43911: {'lr': 0.00040742218531331786, 'samples': 22482944, 'steps': 43911, 'loss/train': 1.6117647886276245} +03/05/2022 17:05:32 - INFO - codeparrot_training - Step 43912: {'lr': 0.0004074180627465991, 'samples': 22483456, 'steps': 43912, 'loss/train': 1.4238600730895996} +03/05/2022 17:05:32 - INFO - codeparrot_training - Skipping example with length 376 (seq_length=1024) +03/05/2022 17:05:37 - INFO - codeparrot_training - Step 43913: {'lr': 0.00040741394010895013, 'samples': 22483968, 'steps': 43913, 'loss/train': 2.1622467041015625} +03/05/2022 17:05:40 - INFO - codeparrot_training - Step 43914: {'lr': 0.0004074098174003729, 'samples': 22484480, 'steps': 43914, 'loss/train': 1.690324306488037} +03/05/2022 17:05:42 - INFO - codeparrot_training - Skipping example with length 174 (seq_length=1024) +03/05/2022 17:05:46 - INFO - codeparrot_training - Step 43915: {'lr': 0.0004074056946208692, 'samples': 22484992, 'steps': 43915, 'loss/train': 1.8282055854797363} +03/05/2022 17:05:49 - INFO - codeparrot_training - Step 43916: {'lr': 0.0004074015717704409, 'samples': 22485504, 'steps': 43916, 'loss/train': 1.258918046951294} +03/05/2022 17:05:51 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/05/2022 17:05:54 - INFO - codeparrot_training - Step 43917: {'lr': 0.00040739744884908994, 'samples': 22486016, 'steps': 43917, 'loss/train': 1.503737449645996} +03/05/2022 17:05:57 - INFO - codeparrot_training - Step 43918: {'lr': 0.00040739332585681807, 'samples': 22486528, 'steps': 43918, 'loss/train': 1.3356200456619263} +03/05/2022 17:06:00 - INFO - codeparrot_training - Skipping example with length 1015 (seq_length=1024) +03/05/2022 17:06:03 - INFO - codeparrot_training - Step 43919: {'lr': 0.00040738920279362724, 'samples': 22487040, 'steps': 43919, 'loss/train': 1.6052608489990234} +03/05/2022 17:06:06 - INFO - codeparrot_training - Step 43920: {'lr': 0.00040738507965951923, 'samples': 22487552, 'steps': 43920, 'loss/train': 0.9439377188682556} +03/05/2022 17:06:08 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/05/2022 17:06:11 - INFO - codeparrot_training - Step 43921: {'lr': 0.0004073809564544959, 'samples': 22488064, 'steps': 43921, 'loss/train': 1.9156811237335205} +03/05/2022 17:06:14 - INFO - codeparrot_training - Step 43922: {'lr': 0.0004073768331785592, 'samples': 22488576, 'steps': 43922, 'loss/train': 1.0275382995605469} +03/05/2022 17:06:17 - INFO - codeparrot_training - Skipping example with length 677 (seq_length=1024) +03/05/2022 17:06:19 - INFO - codeparrot_training - Step 43923: {'lr': 0.0004073727098317109, 'samples': 22489088, 'steps': 43923, 'loss/train': 1.474462628364563} +03/05/2022 17:06:22 - INFO - codeparrot_training - Step 43924: {'lr': 0.0004073685864139529, 'samples': 22489600, 'steps': 43924, 'loss/train': 1.7744269371032715} +03/05/2022 17:06:25 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/05/2022 17:06:28 - INFO - codeparrot_training - Step 43925: {'lr': 0.00040736446292528704, 'samples': 22490112, 'steps': 43925, 'loss/train': 0.6797260642051697} +03/05/2022 17:06:31 - INFO - codeparrot_training - Step 43926: {'lr': 0.0004073603393657152, 'samples': 22490624, 'steps': 43926, 'loss/train': 1.4901611804962158} +03/05/2022 17:06:34 - INFO - codeparrot_training - Skipping example with length 865 (seq_length=1024) +03/05/2022 17:06:36 - INFO - codeparrot_training - Step 43927: {'lr': 0.0004073562157352392, 'samples': 22491136, 'steps': 43927, 'loss/train': 1.5622234344482422} +03/05/2022 17:06:40 - INFO - codeparrot_training - Step 43928: {'lr': 0.00040735209203386093, 'samples': 22491648, 'steps': 43928, 'loss/train': 1.8684837818145752} +03/05/2022 17:06:42 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/05/2022 17:06:45 - INFO - codeparrot_training - Step 43929: {'lr': 0.00040734796826158226, 'samples': 22492160, 'steps': 43929, 'loss/train': 1.1911612749099731} +03/05/2022 17:06:48 - INFO - codeparrot_training - Step 43930: {'lr': 0.000407343844418405, 'samples': 22492672, 'steps': 43930, 'loss/train': 0.7678801417350769} +03/05/2022 17:06:51 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/05/2022 17:06:53 - INFO - codeparrot_training - Step 43931: {'lr': 0.000407339720504331, 'samples': 22493184, 'steps': 43931, 'loss/train': 1.5152851343154907} +03/05/2022 17:06:56 - INFO - codeparrot_training - Step 43932: {'lr': 0.00040733559651936216, 'samples': 22493696, 'steps': 43932, 'loss/train': 1.4474480152130127} +03/05/2022 17:06:59 - INFO - codeparrot_training - Skipping example with length 1018 (seq_length=1024) +03/05/2022 17:07:02 - INFO - codeparrot_training - Step 43933: {'lr': 0.0004073314724635003, 'samples': 22494208, 'steps': 43933, 'loss/train': 1.9507339000701904} +03/05/2022 17:07:05 - INFO - codeparrot_training - Step 43934: {'lr': 0.0004073273483367474, 'samples': 22494720, 'steps': 43934, 'loss/train': 1.06267511844635} +03/05/2022 17:07:08 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/05/2022 17:07:10 - INFO - codeparrot_training - Step 43935: {'lr': 0.0004073232241391052, 'samples': 22495232, 'steps': 43935, 'loss/train': 1.9124641418457031} +03/05/2022 17:07:14 - INFO - codeparrot_training - Step 43936: {'lr': 0.00040731909987057547, 'samples': 22495744, 'steps': 43936, 'loss/train': 0.9738065600395203} +03/05/2022 17:07:16 - INFO - codeparrot_training - Skipping example with length 219 (seq_length=1024) +03/05/2022 17:07:19 - INFO - codeparrot_training - Step 43937: {'lr': 0.0004073149755311603, 'samples': 22496256, 'steps': 43937, 'loss/train': 1.2642452716827393} +03/05/2022 17:07:22 - INFO - codeparrot_training - Step 43938: {'lr': 0.0004073108511208614, 'samples': 22496768, 'steps': 43938, 'loss/train': 1.7471874952316284} +03/05/2022 17:07:25 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/05/2022 17:07:27 - INFO - codeparrot_training - Step 43939: {'lr': 0.0004073067266396807, 'samples': 22497280, 'steps': 43939, 'loss/train': 0.8563461899757385} +03/05/2022 17:07:30 - INFO - codeparrot_training - Step 43940: {'lr': 0.00040730260208761995, 'samples': 22497792, 'steps': 43940, 'loss/train': 1.312140941619873} +03/05/2022 17:07:33 - INFO - codeparrot_training - Skipping example with length 746 (seq_length=1024) +03/05/2022 17:07:36 - INFO - codeparrot_training - Step 43941: {'lr': 0.0004072984774646811, 'samples': 22498304, 'steps': 43941, 'loss/train': 0.9718369245529175} +03/05/2022 17:07:39 - INFO - codeparrot_training - Step 43942: {'lr': 0.0004072943527708659, 'samples': 22498816, 'steps': 43942, 'loss/train': 2.0765297412872314} +03/05/2022 17:07:42 - INFO - codeparrot_training - Step 43943: {'lr': 0.00040729022800617637, 'samples': 22499328, 'steps': 43943, 'loss/train': 0.7074843645095825} +03/05/2022 17:07:43 - INFO - codeparrot_training - Skipping example with length 345 (seq_length=1024) +03/05/2022 17:07:47 - INFO - codeparrot_training - Step 43944: {'lr': 0.00040728610317061433, 'samples': 22499840, 'steps': 43944, 'loss/train': 1.1918139457702637} +03/05/2022 17:07:51 - INFO - codeparrot_training - Step 43945: {'lr': 0.0004072819782641816, 'samples': 22500352, 'steps': 43945, 'loss/train': 2.35152268409729} +03/05/2022 17:07:51 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/05/2022 17:07:56 - INFO - codeparrot_training - Step 43946: {'lr': 0.00040727785328687995, 'samples': 22500864, 'steps': 43946, 'loss/train': 1.532765507698059} +03/05/2022 17:07:59 - INFO - codeparrot_training - Step 43947: {'lr': 0.00040727372823871135, 'samples': 22501376, 'steps': 43947, 'loss/train': 1.9196816682815552} +03/05/2022 17:08:00 - INFO - codeparrot_training - Skipping example with length 869 (seq_length=1024) +03/05/2022 17:08:04 - INFO - codeparrot_training - Step 43948: {'lr': 0.00040726960311967766, 'samples': 22501888, 'steps': 43948, 'loss/train': 0.8873307704925537} +03/05/2022 17:08:08 - INFO - codeparrot_training - Step 43949: {'lr': 0.0004072654779297807, 'samples': 22502400, 'steps': 43949, 'loss/train': 2.539391040802002} +03/05/2022 17:08:08 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/05/2022 17:08:13 - INFO - codeparrot_training - Step 43950: {'lr': 0.0004072613526690223, 'samples': 22502912, 'steps': 43950, 'loss/train': 2.182304859161377} +03/05/2022 17:08:16 - INFO - codeparrot_training - Step 43951: {'lr': 0.00040725722733740444, 'samples': 22503424, 'steps': 43951, 'loss/train': 1.8063043355941772} +03/05/2022 17:08:16 - INFO - codeparrot_training - Skipping example with length 862 (seq_length=1024) +03/05/2022 17:08:21 - INFO - codeparrot_training - Step 43952: {'lr': 0.0004072531019349289, 'samples': 22503936, 'steps': 43952, 'loss/train': 0.6131553649902344} +03/05/2022 17:08:24 - INFO - codeparrot_training - Skipping example with length 650 (seq_length=1024) +03/05/2022 17:08:27 - INFO - codeparrot_training - Step 43953: {'lr': 0.00040724897646159753, 'samples': 22504448, 'steps': 43953, 'loss/train': 1.4741004705429077} +03/05/2022 17:08:30 - INFO - codeparrot_training - Step 43954: {'lr': 0.0004072448509174121, 'samples': 22504960, 'steps': 43954, 'loss/train': 0.5555626749992371} +03/05/2022 17:08:33 - INFO - codeparrot_training - Step 43955: {'lr': 0.00040724072530237465, 'samples': 22505472, 'steps': 43955, 'loss/train': 1.4066179990768433} +03/05/2022 17:08:33 - INFO - codeparrot_training - Skipping example with length 131 (seq_length=1024) +03/05/2022 17:08:38 - INFO - codeparrot_training - Step 43956: {'lr': 0.00040723659961648694, 'samples': 22505984, 'steps': 43956, 'loss/train': 1.3820515871047974} +03/05/2022 17:08:41 - INFO - codeparrot_training - Skipping example with length 747 (seq_length=1024) +03/05/2022 17:08:44 - INFO - codeparrot_training - Step 43957: {'lr': 0.0004072324738597509, 'samples': 22506496, 'steps': 43957, 'loss/train': 1.2527287006378174} +03/05/2022 17:08:47 - INFO - codeparrot_training - Step 43958: {'lr': 0.00040722834803216834, 'samples': 22507008, 'steps': 43958, 'loss/train': 1.0778868198394775} +03/05/2022 17:08:50 - INFO - codeparrot_training - Step 43959: {'lr': 0.000407224222133741, 'samples': 22507520, 'steps': 43959, 'loss/train': 1.3982579708099365} +03/05/2022 17:08:50 - INFO - codeparrot_training - Skipping example with length 1018 (seq_length=1024) +03/05/2022 17:08:56 - INFO - codeparrot_training - Step 43960: {'lr': 0.00040722009616447094, 'samples': 22508032, 'steps': 43960, 'loss/train': 1.528932809829712} +03/05/2022 17:08:59 - INFO - codeparrot_training - Step 43961: {'lr': 0.0004072159701243599, 'samples': 22508544, 'steps': 43961, 'loss/train': 1.5923629999160767} +03/05/2022 17:09:01 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/05/2022 17:09:04 - INFO - codeparrot_training - Step 43962: {'lr': 0.00040721184401340977, 'samples': 22509056, 'steps': 43962, 'loss/train': 2.082003355026245} +03/05/2022 17:09:07 - INFO - codeparrot_training - Step 43963: {'lr': 0.00040720771783162236, 'samples': 22509568, 'steps': 43963, 'loss/train': 1.7758045196533203} +03/05/2022 17:09:09 - INFO - codeparrot_training - Skipping example with length 91 (seq_length=1024) +03/05/2022 17:09:12 - INFO - codeparrot_training - Step 43964: {'lr': 0.0004072035915789997, 'samples': 22510080, 'steps': 43964, 'loss/train': 1.9124915599822998} +03/05/2022 17:09:16 - INFO - codeparrot_training - Step 43965: {'lr': 0.0004071994652555434, 'samples': 22510592, 'steps': 43965, 'loss/train': 0.6017963290214539} +03/05/2022 17:09:17 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/05/2022 17:09:21 - INFO - codeparrot_training - Step 43966: {'lr': 0.0004071953388612555, 'samples': 22511104, 'steps': 43966, 'loss/train': 0.23642615973949432} +03/05/2022 17:09:24 - INFO - codeparrot_training - Step 43967: {'lr': 0.0004071912123961379, 'samples': 22511616, 'steps': 43967, 'loss/train': 1.3218365907669067} +03/05/2022 17:09:26 - INFO - codeparrot_training - Skipping example with length 620 (seq_length=1024) +03/05/2022 17:09:29 - INFO - codeparrot_training - Step 43968: {'lr': 0.00040718708586019226, 'samples': 22512128, 'steps': 43968, 'loss/train': 0.8795645236968994} +03/05/2022 17:09:32 - INFO - codeparrot_training - Step 43969: {'lr': 0.00040718295925342053, 'samples': 22512640, 'steps': 43969, 'loss/train': 1.238362193107605} +03/05/2022 17:09:34 - INFO - codeparrot_training - Skipping example with length 401 (seq_length=1024) +03/05/2022 17:09:38 - INFO - codeparrot_training - Step 43970: {'lr': 0.0004071788325758246, 'samples': 22513152, 'steps': 43970, 'loss/train': 1.7536237239837646} +03/05/2022 17:09:41 - INFO - codeparrot_training - Step 43971: {'lr': 0.00040717470582740634, 'samples': 22513664, 'steps': 43971, 'loss/train': 1.5556763410568237} +03/05/2022 17:09:42 - INFO - codeparrot_training - Skipping example with length 637 (seq_length=1024) +03/05/2022 17:09:46 - INFO - codeparrot_training - Step 43972: {'lr': 0.0004071705790081676, 'samples': 22514176, 'steps': 43972, 'loss/train': 1.7886947393417358} +03/05/2022 17:09:49 - INFO - codeparrot_training - Step 43973: {'lr': 0.0004071664521181102, 'samples': 22514688, 'steps': 43973, 'loss/train': 1.4601370096206665} +03/05/2022 17:09:50 - INFO - codeparrot_training - Skipping example with length 87 (seq_length=1024) +03/05/2022 17:09:55 - INFO - codeparrot_training - Step 43974: {'lr': 0.00040716232515723596, 'samples': 22515200, 'steps': 43974, 'loss/train': 0.8933852314949036} +03/05/2022 17:09:58 - INFO - codeparrot_training - Step 43975: {'lr': 0.00040715819812554686, 'samples': 22515712, 'steps': 43975, 'loss/train': 0.851549506187439} +03/05/2022 17:10:01 - INFO - codeparrot_training - Step 43976: {'lr': 0.0004071540710230447, 'samples': 22516224, 'steps': 43976, 'loss/train': 1.2024188041687012} +03/05/2022 17:10:02 - INFO - codeparrot_training - Skipping example with length 395 (seq_length=1024) +03/05/2022 17:10:07 - INFO - codeparrot_training - Step 43977: {'lr': 0.0004071499438497314, 'samples': 22516736, 'steps': 43977, 'loss/train': 1.3647747039794922} +03/05/2022 17:10:10 - INFO - codeparrot_training - Step 43978: {'lr': 0.0004071458166056087, 'samples': 22517248, 'steps': 43978, 'loss/train': 1.0749295949935913} +03/05/2022 17:10:11 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/05/2022 17:10:15 - INFO - codeparrot_training - Step 43979: {'lr': 0.00040714168929067854, 'samples': 22517760, 'steps': 43979, 'loss/train': 1.9677598476409912} +03/05/2022 17:10:18 - INFO - codeparrot_training - Step 43980: {'lr': 0.0004071375619049427, 'samples': 22518272, 'steps': 43980, 'loss/train': 1.8645962476730347} +03/05/2022 17:10:19 - INFO - codeparrot_training - Skipping example with length 72 (seq_length=1024) +03/05/2022 17:10:24 - INFO - codeparrot_training - Step 43981: {'lr': 0.0004071334344484031, 'samples': 22518784, 'steps': 43981, 'loss/train': 2.1337404251098633} +03/05/2022 17:10:27 - INFO - codeparrot_training - Step 43982: {'lr': 0.00040712930692106164, 'samples': 22519296, 'steps': 43982, 'loss/train': 1.523332118988037} +03/05/2022 17:10:27 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/05/2022 17:10:32 - INFO - codeparrot_training - Step 43983: {'lr': 0.00040712517932292016, 'samples': 22519808, 'steps': 43983, 'loss/train': 1.663478136062622} +03/05/2022 17:10:35 - INFO - codeparrot_training - Step 43984: {'lr': 0.00040712105165398044, 'samples': 22520320, 'steps': 43984, 'loss/train': 1.6610372066497803} +03/05/2022 17:10:36 - INFO - codeparrot_training - Skipping example with length 770 (seq_length=1024) +03/05/2022 17:10:41 - INFO - codeparrot_training - Step 43985: {'lr': 0.0004071169239142445, 'samples': 22520832, 'steps': 43985, 'loss/train': 1.7040987014770508} +03/05/2022 17:10:44 - INFO - codeparrot_training - Step 43986: {'lr': 0.000407112796103714, 'samples': 22521344, 'steps': 43986, 'loss/train': 1.4652185440063477} +03/05/2022 17:10:44 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/05/2022 17:10:49 - INFO - codeparrot_training - Step 43987: {'lr': 0.0004071086682223909, 'samples': 22521856, 'steps': 43987, 'loss/train': 1.0532896518707275} +03/05/2022 17:10:52 - INFO - codeparrot_training - Step 43988: {'lr': 0.0004071045402702771, 'samples': 22522368, 'steps': 43988, 'loss/train': 2.0892438888549805} +03/05/2022 17:10:53 - INFO - codeparrot_training - Skipping example with length 780 (seq_length=1024) +03/05/2022 17:10:57 - INFO - codeparrot_training - Step 43989: {'lr': 0.0004071004122473744, 'samples': 22522880, 'steps': 43989, 'loss/train': 1.956278681755066} +03/05/2022 17:11:01 - INFO - codeparrot_training - Step 43990: {'lr': 0.0004070962841536847, 'samples': 22523392, 'steps': 43990, 'loss/train': 2.181784152984619} +03/05/2022 17:11:01 - INFO - codeparrot_training - Skipping example with length 391 (seq_length=1024) +03/05/2022 17:11:06 - INFO - codeparrot_training - Step 43991: {'lr': 0.0004070921559892098, 'samples': 22523904, 'steps': 43991, 'loss/train': 1.472664713859558} +03/05/2022 17:11:09 - INFO - codeparrot_training - Step 43992: {'lr': 0.00040708802775395165, 'samples': 22524416, 'steps': 43992, 'loss/train': 1.005934238433838} +03/05/2022 17:11:10 - INFO - codeparrot_training - Skipping example with length 810 (seq_length=1024) +03/05/2022 17:11:15 - INFO - codeparrot_training - Step 43993: {'lr': 0.000407083899447912, 'samples': 22524928, 'steps': 43993, 'loss/train': 2.130798578262329} +03/05/2022 17:11:18 - INFO - codeparrot_training - Step 43994: {'lr': 0.00040707977107109285, 'samples': 22525440, 'steps': 43994, 'loss/train': 1.9464563131332397} +03/05/2022 17:11:18 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/05/2022 17:11:23 - INFO - codeparrot_training - Step 43995: {'lr': 0.00040707564262349594, 'samples': 22525952, 'steps': 43995, 'loss/train': 1.53965163230896} +03/05/2022 17:11:26 - INFO - codeparrot_training - Step 43996: {'lr': 0.0004070715141051231, 'samples': 22526464, 'steps': 43996, 'loss/train': 1.7634841203689575} +03/05/2022 17:11:27 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/05/2022 17:11:32 - INFO - codeparrot_training - Step 43997: {'lr': 0.00040706738551597634, 'samples': 22526976, 'steps': 43997, 'loss/train': 1.2593517303466797} +03/05/2022 17:11:35 - INFO - codeparrot_training - Step 43998: {'lr': 0.0004070632568560574, 'samples': 22527488, 'steps': 43998, 'loss/train': 1.8829329013824463} +03/05/2022 17:11:35 - INFO - codeparrot_training - Skipping example with length 436 (seq_length=1024) +03/05/2022 17:11:40 - INFO - codeparrot_training - Step 43999: {'lr': 0.0004070591281253682, 'samples': 22528000, 'steps': 43999, 'loss/train': 0.640451192855835} +03/05/2022 17:11:43 - INFO - codeparrot_training - Step 44000: {'lr': 0.0004070549993239106, 'samples': 22528512, 'steps': 44000, 'loss/train': 1.8574333190917969} +03/05/2022 17:11:43 - INFO - codeparrot_training - Skipping example with length 497 (seq_length=1024) +03/05/2022 17:11:48 - INFO - codeparrot_training - Step 44001: {'lr': 0.0004070508704516864, 'samples': 22529024, 'steps': 44001, 'loss/train': 2.269308567047119} +03/05/2022 17:11:51 - INFO - codeparrot_training - Step 44002: {'lr': 0.00040704674150869753, 'samples': 22529536, 'steps': 44002, 'loss/train': 1.8191622495651245} +03/05/2022 17:11:52 - INFO - codeparrot_training - Skipping example with length 335 (seq_length=1024) +03/05/2022 17:11:57 - INFO - codeparrot_training - Step 44003: {'lr': 0.0004070426124949458, 'samples': 22530048, 'steps': 44003, 'loss/train': 2.4811599254608154} +03/05/2022 17:12:00 - INFO - codeparrot_training - Step 44004: {'lr': 0.00040703848341043313, 'samples': 22530560, 'steps': 44004, 'loss/train': 2.2507011890411377} +03/05/2022 17:12:00 - INFO - codeparrot_training - Skipping example with length 742 (seq_length=1024) +03/05/2022 17:12:05 - INFO - codeparrot_training - Step 44005: {'lr': 0.00040703435425516136, 'samples': 22531072, 'steps': 44005, 'loss/train': 1.3918503522872925} +03/05/2022 17:12:08 - INFO - codeparrot_training - Step 44006: {'lr': 0.0004070302250291322, 'samples': 22531584, 'steps': 44006, 'loss/train': 1.966025710105896} +03/05/2022 17:12:08 - INFO - codeparrot_training - Skipping example with length 467 (seq_length=1024) +03/05/2022 17:12:14 - INFO - codeparrot_training - Step 44007: {'lr': 0.0004070260957323478, 'samples': 22532096, 'steps': 44007, 'loss/train': 1.8530805110931396} +03/05/2022 17:12:17 - INFO - codeparrot_training - Step 44008: {'lr': 0.0004070219663648098, 'samples': 22532608, 'steps': 44008, 'loss/train': 2.3798091411590576} +03/05/2022 17:12:18 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/05/2022 17:12:22 - INFO - codeparrot_training - Step 44009: {'lr': 0.0004070178369265201, 'samples': 22533120, 'steps': 44009, 'loss/train': 1.4462999105453491} +03/05/2022 17:12:25 - INFO - codeparrot_training - Step 44010: {'lr': 0.00040701370741748057, 'samples': 22533632, 'steps': 44010, 'loss/train': 1.7006003856658936} +03/05/2022 17:12:26 - INFO - codeparrot_training - Skipping example with length 505 (seq_length=1024) +03/05/2022 17:12:31 - INFO - codeparrot_training - Step 44011: {'lr': 0.0004070095778376932, 'samples': 22534144, 'steps': 44011, 'loss/train': 1.4745851755142212} +03/05/2022 17:12:34 - INFO - codeparrot_training - Step 44012: {'lr': 0.0004070054481871597, 'samples': 22534656, 'steps': 44012, 'loss/train': 2.1193764209747314} +03/05/2022 17:12:34 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/05/2022 17:12:39 - INFO - codeparrot_training - Step 44013: {'lr': 0.00040700131846588185, 'samples': 22535168, 'steps': 44013, 'loss/train': 1.8424246311187744} +03/05/2022 17:12:42 - INFO - codeparrot_training - Step 44014: {'lr': 0.0004069971886738617, 'samples': 22535680, 'steps': 44014, 'loss/train': 2.0649259090423584} +03/05/2022 17:12:43 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/05/2022 17:12:47 - INFO - codeparrot_training - Step 44015: {'lr': 0.00040699305881110103, 'samples': 22536192, 'steps': 44015, 'loss/train': 1.8393774032592773} +03/05/2022 17:12:51 - INFO - codeparrot_training - Step 44016: {'lr': 0.00040698892887760174, 'samples': 22536704, 'steps': 44016, 'loss/train': 1.8186222314834595} +03/05/2022 17:12:51 - INFO - codeparrot_training - Skipping example with length 232 (seq_length=1024) +03/05/2022 17:12:56 - INFO - codeparrot_training - Step 44017: {'lr': 0.00040698479887336567, 'samples': 22537216, 'steps': 44017, 'loss/train': 1.9768949747085571} +03/05/2022 17:12:59 - INFO - codeparrot_training - Step 44018: {'lr': 0.00040698066879839463, 'samples': 22537728, 'steps': 44018, 'loss/train': 1.4789793491363525} +03/05/2022 17:13:00 - INFO - codeparrot_training - Skipping example with length 70 (seq_length=1024) +03/05/2022 17:13:04 - INFO - codeparrot_training - Step 44019: {'lr': 0.00040697653865269057, 'samples': 22538240, 'steps': 44019, 'loss/train': 1.7451080083847046} +03/05/2022 17:13:08 - INFO - codeparrot_training - Step 44020: {'lr': 0.00040697240843625527, 'samples': 22538752, 'steps': 44020, 'loss/train': 1.9882631301879883} +03/05/2022 17:13:08 - INFO - codeparrot_training - Skipping example with length 162 (seq_length=1024) +03/05/2022 17:13:13 - INFO - codeparrot_training - Step 44021: {'lr': 0.00040696827814909063, 'samples': 22539264, 'steps': 44021, 'loss/train': 1.2893009185791016} +03/05/2022 17:13:16 - INFO - codeparrot_training - Step 44022: {'lr': 0.0004069641477911985, 'samples': 22539776, 'steps': 44022, 'loss/train': 1.097474217414856} +03/05/2022 17:13:16 - INFO - codeparrot_training - Skipping example with length 884 (seq_length=1024) +03/05/2022 17:13:21 - INFO - codeparrot_training - Step 44023: {'lr': 0.00040696001736258077, 'samples': 22540288, 'steps': 44023, 'loss/train': 1.616531491279602} +03/05/2022 17:13:24 - INFO - codeparrot_training - Step 44024: {'lr': 0.0004069558868632393, 'samples': 22540800, 'steps': 44024, 'loss/train': 1.322118878364563} +03/05/2022 17:13:24 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/05/2022 17:13:30 - INFO - codeparrot_training - Step 44025: {'lr': 0.0004069517562931759, 'samples': 22541312, 'steps': 44025, 'loss/train': 1.130293607711792} +03/05/2022 17:13:33 - INFO - codeparrot_training - Step 44026: {'lr': 0.0004069476256523924, 'samples': 22541824, 'steps': 44026, 'loss/train': 1.8206464052200317} +03/05/2022 17:13:33 - INFO - codeparrot_training - Skipping example with length 765 (seq_length=1024) +03/05/2022 17:13:38 - INFO - codeparrot_training - Step 44027: {'lr': 0.0004069434949408908, 'samples': 22542336, 'steps': 44027, 'loss/train': 0.3178751766681671} +03/05/2022 17:13:41 - INFO - codeparrot_training - Step 44028: {'lr': 0.0004069393641586728, 'samples': 22542848, 'steps': 44028, 'loss/train': 1.928274393081665} +03/05/2022 17:13:41 - INFO - codeparrot_training - Skipping example with length 847 (seq_length=1024) +03/05/2022 17:13:46 - INFO - codeparrot_training - Step 44029: {'lr': 0.00040693523330574043, 'samples': 22543360, 'steps': 44029, 'loss/train': 0.5905219912528992} +03/05/2022 17:13:49 - INFO - codeparrot_training - Step 44030: {'lr': 0.0004069311023820954, 'samples': 22543872, 'steps': 44030, 'loss/train': 1.8269705772399902} +03/05/2022 17:13:49 - INFO - codeparrot_training - Skipping example with length 508 (seq_length=1024) +03/05/2022 17:13:55 - INFO - codeparrot_training - Step 44031: {'lr': 0.0004069269713877397, 'samples': 22544384, 'steps': 44031, 'loss/train': 1.456636905670166} +03/05/2022 17:13:58 - INFO - codeparrot_training - Step 44032: {'lr': 0.00040692284032267515, 'samples': 22544896, 'steps': 44032, 'loss/train': 1.481444001197815} +03/05/2022 17:13:58 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/05/2022 17:14:03 - INFO - codeparrot_training - Step 44033: {'lr': 0.0004069187091869035, 'samples': 22545408, 'steps': 44033, 'loss/train': 1.2420271635055542} +03/05/2022 17:14:07 - INFO - codeparrot_training - Step 44034: {'lr': 0.00040691457798042673, 'samples': 22545920, 'steps': 44034, 'loss/train': 1.7530019283294678} +03/05/2022 17:14:08 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/05/2022 17:14:12 - INFO - codeparrot_training - Step 44035: {'lr': 0.00040691044670324673, 'samples': 22546432, 'steps': 44035, 'loss/train': 1.104259729385376} +03/05/2022 17:14:15 - INFO - codeparrot_training - Step 44036: {'lr': 0.00040690631535536526, 'samples': 22546944, 'steps': 44036, 'loss/train': 2.0105106830596924} +03/05/2022 17:14:16 - INFO - codeparrot_training - Skipping example with length 26 (seq_length=1024) +03/05/2022 17:14:20 - INFO - codeparrot_training - Step 44037: {'lr': 0.00040690218393678426, 'samples': 22547456, 'steps': 44037, 'loss/train': 1.132739543914795} +03/05/2022 17:14:24 - INFO - codeparrot_training - Step 44038: {'lr': 0.0004068980524475054, 'samples': 22547968, 'steps': 44038, 'loss/train': 1.4528270959854126} +03/05/2022 17:14:24 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/05/2022 17:14:29 - INFO - codeparrot_training - Step 44039: {'lr': 0.00040689392088753097, 'samples': 22548480, 'steps': 44039, 'loss/train': 2.0915627479553223} +03/05/2022 17:14:32 - INFO - codeparrot_training - Step 44040: {'lr': 0.00040688978925686235, 'samples': 22548992, 'steps': 44040, 'loss/train': 0.5961836576461792} +03/05/2022 17:14:33 - INFO - codeparrot_training - Skipping example with length 805 (seq_length=1024) +03/05/2022 17:14:37 - INFO - codeparrot_training - Step 44041: {'lr': 0.00040688565755550164, 'samples': 22549504, 'steps': 44041, 'loss/train': 2.7009055614471436} +03/05/2022 17:14:41 - INFO - codeparrot_training - Step 44042: {'lr': 0.00040688152578345074, 'samples': 22550016, 'steps': 44042, 'loss/train': 2.336779832839966} +03/05/2022 17:14:42 - INFO - codeparrot_training - Skipping example with length 779 (seq_length=1024) +03/05/2022 17:14:46 - INFO - codeparrot_training - Step 44043: {'lr': 0.0004068773939407114, 'samples': 22550528, 'steps': 44043, 'loss/train': 2.5585217475891113} +03/05/2022 17:14:49 - INFO - codeparrot_training - Step 44044: {'lr': 0.0004068732620272856, 'samples': 22551040, 'steps': 44044, 'loss/train': 2.274350881576538} +03/05/2022 17:14:50 - INFO - codeparrot_training - Skipping example with length 137 (seq_length=1024) +03/05/2022 17:14:55 - INFO - codeparrot_training - Step 44045: {'lr': 0.000406869130043175, 'samples': 22551552, 'steps': 44045, 'loss/train': 2.3533880710601807} +03/05/2022 17:14:58 - INFO - codeparrot_training - Step 44046: {'lr': 0.0004068649979883817, 'samples': 22552064, 'steps': 44046, 'loss/train': 0.9152523875236511} +03/05/2022 17:14:59 - INFO - codeparrot_training - Skipping example with length 657 (seq_length=1024) +03/05/2022 17:15:03 - INFO - codeparrot_training - Step 44047: {'lr': 0.0004068608658629074, 'samples': 22552576, 'steps': 44047, 'loss/train': 1.4622944593429565} +03/05/2022 17:15:06 - INFO - codeparrot_training - Step 44048: {'lr': 0.000406856733666754, 'samples': 22553088, 'steps': 44048, 'loss/train': 2.484221935272217} +03/05/2022 17:15:07 - INFO - codeparrot_training - Skipping example with length 789 (seq_length=1024) +03/05/2022 17:15:12 - INFO - codeparrot_training - Step 44049: {'lr': 0.00040685260139992343, 'samples': 22553600, 'steps': 44049, 'loss/train': 2.119645357131958} +03/05/2022 17:15:15 - INFO - codeparrot_training - Step 44050: {'lr': 0.00040684846906241745, 'samples': 22554112, 'steps': 44050, 'loss/train': 1.4676960706710815} +03/05/2022 17:15:16 - INFO - codeparrot_training - Skipping example with length 546 (seq_length=1024) +03/05/2022 17:15:20 - INFO - codeparrot_training - Step 44051: {'lr': 0.000406844336654238, 'samples': 22554624, 'steps': 44051, 'loss/train': 2.8545725345611572} +03/05/2022 17:15:23 - INFO - codeparrot_training - Step 44052: {'lr': 0.00040684020417538694, 'samples': 22555136, 'steps': 44052, 'loss/train': 1.3792974948883057} +03/05/2022 17:15:24 - INFO - codeparrot_training - Skipping example with length 582 (seq_length=1024) +03/05/2022 17:15:28 - INFO - codeparrot_training - Step 44053: {'lr': 0.00040683607162586604, 'samples': 22555648, 'steps': 44053, 'loss/train': 0.9823753237724304} +03/05/2022 17:15:32 - INFO - codeparrot_training - Step 44054: {'lr': 0.00040683193900567727, 'samples': 22556160, 'steps': 44054, 'loss/train': 1.8144932985305786} +03/05/2022 17:15:33 - INFO - codeparrot_training - Skipping example with length 575 (seq_length=1024) +03/05/2022 17:15:37 - INFO - codeparrot_training - Step 44055: {'lr': 0.00040682780631482243, 'samples': 22556672, 'steps': 44055, 'loss/train': 2.0380773544311523} +03/05/2022 17:15:40 - INFO - codeparrot_training - Step 44056: {'lr': 0.0004068236735533034, 'samples': 22557184, 'steps': 44056, 'loss/train': 1.3847761154174805} +03/05/2022 17:15:41 - INFO - codeparrot_training - Skipping example with length 827 (seq_length=1024) +03/05/2022 17:15:46 - INFO - codeparrot_training - Step 44057: {'lr': 0.00040681954072112206, 'samples': 22557696, 'steps': 44057, 'loss/train': 1.1825686693191528} +03/05/2022 17:15:49 - INFO - codeparrot_training - Step 44058: {'lr': 0.0004068154078182802, 'samples': 22558208, 'steps': 44058, 'loss/train': 1.8407344818115234} +03/05/2022 17:15:51 - INFO - codeparrot_training - Skipping example with length 171 (seq_length=1024) +03/05/2022 17:15:54 - INFO - codeparrot_training - Step 44059: {'lr': 0.00040681127484477983, 'samples': 22558720, 'steps': 44059, 'loss/train': 3.2797741889953613} +03/05/2022 17:15:57 - INFO - codeparrot_training - Step 44060: {'lr': 0.0004068071418006226, 'samples': 22559232, 'steps': 44060, 'loss/train': 2.179814100265503} +03/05/2022 17:15:59 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/05/2022 17:16:02 - INFO - codeparrot_training - Step 44061: {'lr': 0.0004068030086858106, 'samples': 22559744, 'steps': 44061, 'loss/train': 0.994735062122345} +03/05/2022 17:16:06 - INFO - codeparrot_training - Step 44062: {'lr': 0.00040679887550034555, 'samples': 22560256, 'steps': 44062, 'loss/train': 1.9563329219818115} +03/05/2022 17:16:08 - INFO - codeparrot_training - Skipping example with length 435 (seq_length=1024) +03/05/2022 17:16:11 - INFO - codeparrot_training - Step 44063: {'lr': 0.0004067947422442293, 'samples': 22560768, 'steps': 44063, 'loss/train': 2.147077798843384} +03/05/2022 17:16:14 - INFO - codeparrot_training - Step 44064: {'lr': 0.00040679060891746384, 'samples': 22561280, 'steps': 44064, 'loss/train': 1.6807830333709717} +03/05/2022 17:16:16 - INFO - codeparrot_training - Skipping example with length 715 (seq_length=1024) +03/05/2022 17:16:19 - INFO - codeparrot_training - Step 44065: {'lr': 0.00040678647552005087, 'samples': 22561792, 'steps': 44065, 'loss/train': 2.1323800086975098} +03/05/2022 17:16:23 - INFO - codeparrot_training - Step 44066: {'lr': 0.00040678234205199237, 'samples': 22562304, 'steps': 44066, 'loss/train': 0.7981005311012268} +03/05/2022 17:16:24 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/05/2022 17:16:28 - INFO - codeparrot_training - Step 44067: {'lr': 0.0004067782085132902, 'samples': 22562816, 'steps': 44067, 'loss/train': 1.384117841720581} +03/05/2022 17:16:31 - INFO - codeparrot_training - Step 44068: {'lr': 0.00040677407490394616, 'samples': 22563328, 'steps': 44068, 'loss/train': 1.7643357515335083} +03/05/2022 17:16:32 - INFO - codeparrot_training - Skipping example with length 155 (seq_length=1024) +03/05/2022 17:16:37 - INFO - codeparrot_training - Step 44069: {'lr': 0.0004067699412239622, 'samples': 22563840, 'steps': 44069, 'loss/train': 1.6844052076339722} +03/05/2022 17:16:40 - INFO - codeparrot_training - Step 44070: {'lr': 0.00040676580747334, 'samples': 22564352, 'steps': 44070, 'loss/train': 2.048553705215454} +03/05/2022 17:16:43 - INFO - codeparrot_training - Step 44071: {'lr': 0.0004067616736520816, 'samples': 22564864, 'steps': 44071, 'loss/train': 0.8057253956794739} +03/05/2022 17:16:45 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 17:16:49 - INFO - codeparrot_training - Step 44072: {'lr': 0.0004067575397601888, 'samples': 22565376, 'steps': 44072, 'loss/train': 1.9531939029693604} +03/05/2022 17:16:52 - INFO - codeparrot_training - Step 44073: {'lr': 0.0004067534057976635, 'samples': 22565888, 'steps': 44073, 'loss/train': 1.8498653173446655} +03/05/2022 17:16:54 - INFO - codeparrot_training - Skipping example with length 196 (seq_length=1024) +03/05/2022 17:16:57 - INFO - codeparrot_training - Step 44074: {'lr': 0.0004067492717645075, 'samples': 22566400, 'steps': 44074, 'loss/train': 1.168759822845459} +03/05/2022 17:17:01 - INFO - codeparrot_training - Step 44075: {'lr': 0.00040674513766072274, 'samples': 22566912, 'steps': 44075, 'loss/train': 1.3275723457336426} +03/05/2022 17:17:03 - INFO - codeparrot_training - Skipping example with length 393 (seq_length=1024) +03/05/2022 17:17:06 - INFO - codeparrot_training - Step 44076: {'lr': 0.000406741003486311, 'samples': 22567424, 'steps': 44076, 'loss/train': 1.8722443580627441} +03/05/2022 17:17:09 - INFO - codeparrot_training - Step 44077: {'lr': 0.00040673686924127416, 'samples': 22567936, 'steps': 44077, 'loss/train': 2.305825710296631} +03/05/2022 17:17:12 - INFO - codeparrot_training - Skipping example with length 810 (seq_length=1024) +03/05/2022 17:17:14 - INFO - codeparrot_training - Step 44078: {'lr': 0.0004067327349256142, 'samples': 22568448, 'steps': 44078, 'loss/train': 2.080634832382202} +03/05/2022 17:17:17 - INFO - codeparrot_training - Step 44079: {'lr': 0.00040672860053933286, 'samples': 22568960, 'steps': 44079, 'loss/train': 0.7083601951599121} +03/05/2022 17:17:21 - INFO - codeparrot_training - Step 44080: {'lr': 0.00040672446608243194, 'samples': 22569472, 'steps': 44080, 'loss/train': 1.1495527029037476} +03/05/2022 17:17:21 - INFO - codeparrot_training - Skipping example with length 880 (seq_length=1024) +03/05/2022 17:17:26 - INFO - codeparrot_training - Step 44081: {'lr': 0.0004067203315549135, 'samples': 22569984, 'steps': 44081, 'loss/train': 1.797181248664856} +03/05/2022 17:17:29 - INFO - codeparrot_training - Step 44082: {'lr': 0.00040671619695677923, 'samples': 22570496, 'steps': 44082, 'loss/train': 2.24397349357605} +03/05/2022 17:17:29 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/05/2022 17:17:34 - INFO - codeparrot_training - Step 44083: {'lr': 0.00040671206228803117, 'samples': 22571008, 'steps': 44083, 'loss/train': 2.1217551231384277} +03/05/2022 17:17:38 - INFO - codeparrot_training - Step 44084: {'lr': 0.0004067079275486709, 'samples': 22571520, 'steps': 44084, 'loss/train': 1.2476714849472046} +03/05/2022 17:17:38 - INFO - codeparrot_training - Skipping example with length 445 (seq_length=1024) +03/05/2022 17:17:43 - INFO - codeparrot_training - Step 44085: {'lr': 0.00040670379273870054, 'samples': 22572032, 'steps': 44085, 'loss/train': 2.2558465003967285} +03/05/2022 17:17:46 - INFO - codeparrot_training - Step 44086: {'lr': 0.00040669965785812193, 'samples': 22572544, 'steps': 44086, 'loss/train': 1.746035099029541} +03/05/2022 17:17:46 - INFO - codeparrot_training - Skipping example with length 555 (seq_length=1024) +03/05/2022 17:17:51 - INFO - codeparrot_training - Step 44087: {'lr': 0.00040669552290693677, 'samples': 22573056, 'steps': 44087, 'loss/train': 1.1192668676376343} +03/05/2022 17:17:54 - INFO - codeparrot_training - Step 44088: {'lr': 0.0004066913878851471, 'samples': 22573568, 'steps': 44088, 'loss/train': 1.8575859069824219} +03/05/2022 17:17:55 - INFO - codeparrot_training - Skipping example with length 994 (seq_length=1024) +03/05/2022 17:18:00 - INFO - codeparrot_training - Step 44089: {'lr': 0.00040668725279275464, 'samples': 22574080, 'steps': 44089, 'loss/train': 1.3202877044677734} +03/05/2022 17:18:03 - INFO - codeparrot_training - Step 44090: {'lr': 0.0004066831176297614, 'samples': 22574592, 'steps': 44090, 'loss/train': 1.9421015977859497} +03/05/2022 17:18:03 - INFO - codeparrot_training - Skipping example with length 18 (seq_length=1024) +03/05/2022 17:18:08 - INFO - codeparrot_training - Step 44091: {'lr': 0.0004066789823961691, 'samples': 22575104, 'steps': 44091, 'loss/train': 0.726119339466095} +03/05/2022 17:18:12 - INFO - codeparrot_training - Step 44092: {'lr': 0.00040667484709197967, 'samples': 22575616, 'steps': 44092, 'loss/train': 1.767790675163269} +03/05/2022 17:18:12 - INFO - codeparrot_training - Skipping example with length 362 (seq_length=1024) +03/05/2022 17:18:17 - INFO - codeparrot_training - Step 44093: {'lr': 0.00040667071171719503, 'samples': 22576128, 'steps': 44093, 'loss/train': 2.221348524093628} +03/05/2022 17:18:20 - INFO - codeparrot_training - Step 44094: {'lr': 0.00040666657627181697, 'samples': 22576640, 'steps': 44094, 'loss/train': 1.8517422676086426} +03/05/2022 17:18:20 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/05/2022 17:18:25 - INFO - codeparrot_training - Step 44095: {'lr': 0.00040666244075584736, 'samples': 22577152, 'steps': 44095, 'loss/train': 1.9456379413604736} +03/05/2022 17:18:28 - INFO - codeparrot_training - Step 44096: {'lr': 0.000406658305169288, 'samples': 22577664, 'steps': 44096, 'loss/train': 1.577863097190857} +03/05/2022 17:18:29 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/05/2022 17:18:34 - INFO - codeparrot_training - Step 44097: {'lr': 0.000406654169512141, 'samples': 22578176, 'steps': 44097, 'loss/train': 2.1799745559692383} +03/05/2022 17:18:37 - INFO - codeparrot_training - Step 44098: {'lr': 0.0004066500337844078, 'samples': 22578688, 'steps': 44098, 'loss/train': 2.5567805767059326} +03/05/2022 17:18:39 - INFO - codeparrot_training - Skipping example with length 120 (seq_length=1024) +03/05/2022 17:18:43 - INFO - codeparrot_training - Step 44099: {'lr': 0.0004066458979860907, 'samples': 22579200, 'steps': 44099, 'loss/train': 0.20462945103645325} +03/05/2022 17:18:46 - INFO - codeparrot_training - Step 44100: {'lr': 0.00040664176211719136, 'samples': 22579712, 'steps': 44100, 'loss/train': 1.6339561939239502} +03/05/2022 17:18:48 - INFO - codeparrot_training - Skipping example with length 490 (seq_length=1024) +03/05/2022 17:18:51 - INFO - codeparrot_training - Step 44101: {'lr': 0.00040663762617771163, 'samples': 22580224, 'steps': 44101, 'loss/train': 2.400203227996826} +03/05/2022 17:18:54 - INFO - codeparrot_training - Step 44102: {'lr': 0.00040663349016765337, 'samples': 22580736, 'steps': 44102, 'loss/train': 2.187974452972412} +03/05/2022 17:18:57 - INFO - codeparrot_training - Skipping example with length 654 (seq_length=1024) +03/05/2022 17:19:00 - INFO - codeparrot_training - Step 44103: {'lr': 0.00040662935408701853, 'samples': 22581248, 'steps': 44103, 'loss/train': 1.8474748134613037} +03/05/2022 17:19:03 - INFO - codeparrot_training - Step 44104: {'lr': 0.00040662521793580886, 'samples': 22581760, 'steps': 44104, 'loss/train': 1.6552462577819824} +03/05/2022 17:19:05 - INFO - codeparrot_training - Skipping example with length 132 (seq_length=1024) +03/05/2022 17:19:08 - INFO - codeparrot_training - Step 44105: {'lr': 0.0004066210817140263, 'samples': 22582272, 'steps': 44105, 'loss/train': 0.7009512186050415} +03/05/2022 17:19:11 - INFO - codeparrot_training - Step 44106: {'lr': 0.0004066169454216727, 'samples': 22582784, 'steps': 44106, 'loss/train': 1.1058763265609741} +03/05/2022 17:19:13 - INFO - codeparrot_training - Skipping example with length 251 (seq_length=1024) +03/05/2022 17:19:17 - INFO - codeparrot_training - Step 44107: {'lr': 0.00040661280905875, 'samples': 22583296, 'steps': 44107, 'loss/train': 1.967161774635315} +03/05/2022 17:19:20 - INFO - codeparrot_training - Step 44108: {'lr': 0.0004066086726252599, 'samples': 22583808, 'steps': 44108, 'loss/train': 1.7949830293655396} +03/05/2022 17:19:21 - INFO - codeparrot_training - Skipping example with length 872 (seq_length=1024) +03/05/2022 17:19:25 - INFO - codeparrot_training - Step 44109: {'lr': 0.0004066045361212043, 'samples': 22584320, 'steps': 44109, 'loss/train': 1.9241244792938232} +03/05/2022 17:19:28 - INFO - codeparrot_training - Step 44110: {'lr': 0.00040660039954658523, 'samples': 22584832, 'steps': 44110, 'loss/train': 2.247014284133911} +03/05/2022 17:19:30 - INFO - codeparrot_training - Skipping example with length 900 (seq_length=1024) +03/05/2022 17:19:33 - INFO - codeparrot_training - Step 44111: {'lr': 0.0004065962629014044, 'samples': 22585344, 'steps': 44111, 'loss/train': 1.7103047370910645} +03/05/2022 17:19:37 - INFO - codeparrot_training - Step 44112: {'lr': 0.00040659212618566364, 'samples': 22585856, 'steps': 44112, 'loss/train': 1.8774996995925903} +03/05/2022 17:19:38 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/05/2022 17:19:42 - INFO - codeparrot_training - Step 44113: {'lr': 0.000406587989399365, 'samples': 22586368, 'steps': 44113, 'loss/train': 0.5324177742004395} +03/05/2022 17:19:45 - INFO - codeparrot_training - Step 44114: {'lr': 0.0004065838525425102, 'samples': 22586880, 'steps': 44114, 'loss/train': 3.025786876678467} +03/05/2022 17:19:47 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/05/2022 17:19:50 - INFO - codeparrot_training - Step 44115: {'lr': 0.00040657971561510104, 'samples': 22587392, 'steps': 44115, 'loss/train': 1.2008098363876343} +03/05/2022 17:19:53 - INFO - codeparrot_training - Step 44116: {'lr': 0.00040657557861713956, 'samples': 22587904, 'steps': 44116, 'loss/train': 1.639552116394043} +03/05/2022 17:19:55 - INFO - codeparrot_training - Skipping example with length 375 (seq_length=1024) +03/05/2022 17:19:59 - INFO - codeparrot_training - Step 44117: {'lr': 0.00040657144154862746, 'samples': 22588416, 'steps': 44117, 'loss/train': 2.1843345165252686} +03/05/2022 17:20:02 - INFO - codeparrot_training - Step 44118: {'lr': 0.00040656730440956677, 'samples': 22588928, 'steps': 44118, 'loss/train': 1.7968913316726685} +03/05/2022 17:20:03 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/05/2022 17:20:07 - INFO - codeparrot_training - Step 44119: {'lr': 0.0004065631671999592, 'samples': 22589440, 'steps': 44119, 'loss/train': 2.2209715843200684} +03/05/2022 17:20:10 - INFO - codeparrot_training - Step 44120: {'lr': 0.0004065590299198068, 'samples': 22589952, 'steps': 44120, 'loss/train': 1.8904602527618408} +03/05/2022 17:20:11 - INFO - codeparrot_training - Skipping example with length 507 (seq_length=1024) +03/05/2022 17:20:16 - INFO - codeparrot_training - Step 44121: {'lr': 0.00040655489256911123, 'samples': 22590464, 'steps': 44121, 'loss/train': 2.152430772781372} +03/05/2022 17:20:19 - INFO - codeparrot_training - Step 44122: {'lr': 0.00040655075514787445, 'samples': 22590976, 'steps': 44122, 'loss/train': 1.513514757156372} +03/05/2022 17:20:20 - INFO - codeparrot_training - Skipping example with length 917 (seq_length=1024) +03/05/2022 17:20:24 - INFO - codeparrot_training - Step 44123: {'lr': 0.0004065466176560983, 'samples': 22591488, 'steps': 44123, 'loss/train': 1.5352094173431396} +03/05/2022 17:20:27 - INFO - codeparrot_training - Step 44124: {'lr': 0.0004065424800937847, 'samples': 22592000, 'steps': 44124, 'loss/train': 1.9511908292770386} +03/05/2022 17:20:32 - INFO - codeparrot_training - Step 44125: {'lr': 0.0004065383424609354, 'samples': 22592512, 'steps': 44125, 'loss/train': 2.2775039672851562} +03/05/2022 17:20:36 - INFO - codeparrot_training - Step 44126: {'lr': 0.00040653420475755245, 'samples': 22593024, 'steps': 44126, 'loss/train': 1.63613760471344} +03/05/2022 17:20:36 - INFO - codeparrot_training - Skipping example with length 235 (seq_length=1024) +03/05/2022 17:20:41 - INFO - codeparrot_training - Step 44127: {'lr': 0.0004065300669836375, 'samples': 22593536, 'steps': 44127, 'loss/train': 1.7480061054229736} +03/05/2022 17:20:44 - INFO - codeparrot_training - Step 44128: {'lr': 0.0004065259291391926, 'samples': 22594048, 'steps': 44128, 'loss/train': 1.218375325202942} +03/05/2022 17:20:46 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/05/2022 17:20:49 - INFO - codeparrot_training - Step 44129: {'lr': 0.0004065217912242195, 'samples': 22594560, 'steps': 44129, 'loss/train': 1.7834889888763428} +03/05/2022 17:20:53 - INFO - codeparrot_training - Step 44130: {'lr': 0.00040651765323872, 'samples': 22595072, 'steps': 44130, 'loss/train': 1.6898303031921387} +03/05/2022 17:20:55 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/05/2022 17:20:58 - INFO - codeparrot_training - Step 44131: {'lr': 0.0004065135151826962, 'samples': 22595584, 'steps': 44131, 'loss/train': 1.7827272415161133} +03/05/2022 17:21:01 - INFO - codeparrot_training - Step 44132: {'lr': 0.00040650937705614975, 'samples': 22596096, 'steps': 44132, 'loss/train': 1.8567818403244019} +03/05/2022 17:21:03 - INFO - codeparrot_training - Skipping example with length 555 (seq_length=1024) +03/05/2022 17:21:06 - INFO - codeparrot_training - Step 44133: {'lr': 0.0004065052388590826, 'samples': 22596608, 'steps': 44133, 'loss/train': 2.2471628189086914} +03/05/2022 17:21:10 - INFO - codeparrot_training - Step 44134: {'lr': 0.00040650110059149664, 'samples': 22597120, 'steps': 44134, 'loss/train': 4.080743312835693} +03/05/2022 17:21:11 - INFO - codeparrot_training - Skipping example with length 544 (seq_length=1024) +03/05/2022 17:21:15 - INFO - codeparrot_training - Step 44135: {'lr': 0.0004064969622533937, 'samples': 22597632, 'steps': 44135, 'loss/train': 1.1336251497268677} +03/05/2022 17:21:18 - INFO - codeparrot_training - Step 44136: {'lr': 0.0004064928238447756, 'samples': 22598144, 'steps': 44136, 'loss/train': 2.3095993995666504} +03/05/2022 17:21:20 - INFO - codeparrot_training - Skipping example with length 149 (seq_length=1024) +03/05/2022 17:21:23 - INFO - codeparrot_training - Step 44137: {'lr': 0.00040648868536564427, 'samples': 22598656, 'steps': 44137, 'loss/train': 1.6412612199783325} +03/05/2022 17:21:26 - INFO - codeparrot_training - Step 44138: {'lr': 0.00040648454681600153, 'samples': 22599168, 'steps': 44138, 'loss/train': 1.8414227962493896} +03/05/2022 17:21:28 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/05/2022 17:21:32 - INFO - codeparrot_training - Step 44139: {'lr': 0.0004064804081958493, 'samples': 22599680, 'steps': 44139, 'loss/train': 2.0570695400238037} +03/05/2022 17:21:35 - INFO - codeparrot_training - Step 44140: {'lr': 0.00040647626950518945, 'samples': 22600192, 'steps': 44140, 'loss/train': 2.1817712783813477} +03/05/2022 17:21:37 - INFO - codeparrot_training - Skipping example with length 427 (seq_length=1024) +03/05/2022 17:21:40 - INFO - codeparrot_training - Step 44141: {'lr': 0.00040647213074402374, 'samples': 22600704, 'steps': 44141, 'loss/train': 1.9372197389602661} +03/05/2022 17:21:44 - INFO - codeparrot_training - Step 44142: {'lr': 0.0004064679919123541, 'samples': 22601216, 'steps': 44142, 'loss/train': 1.8611416816711426} +03/05/2022 17:21:46 - INFO - codeparrot_training - Skipping example with length 270 (seq_length=1024) +03/05/2022 17:21:49 - INFO - codeparrot_training - Step 44143: {'lr': 0.00040646385301018243, 'samples': 22601728, 'steps': 44143, 'loss/train': 0.36438289284706116} +03/05/2022 17:21:52 - INFO - codeparrot_training - Step 44144: {'lr': 0.0004064597140375105, 'samples': 22602240, 'steps': 44144, 'loss/train': 1.1619759798049927} +03/05/2022 17:21:56 - INFO - codeparrot_training - Step 44145: {'lr': 0.00040645557499434035, 'samples': 22602752, 'steps': 44145, 'loss/train': 1.7439614534378052} +03/05/2022 17:21:56 - INFO - codeparrot_training - Skipping example with length 986 (seq_length=1024) +03/05/2022 17:22:01 - INFO - codeparrot_training - Step 44146: {'lr': 0.0004064514358806737, 'samples': 22603264, 'steps': 44146, 'loss/train': 1.7756513357162476} +03/05/2022 17:22:04 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/05/2022 17:22:07 - INFO - codeparrot_training - Step 44147: {'lr': 0.00040644729669651235, 'samples': 22603776, 'steps': 44147, 'loss/train': 0.7803305387496948} +03/05/2022 17:22:10 - INFO - codeparrot_training - Step 44148: {'lr': 0.0004064431574418583, 'samples': 22604288, 'steps': 44148, 'loss/train': 1.7715725898742676} +03/05/2022 17:22:13 - INFO - codeparrot_training - Step 44149: {'lr': 0.00040643901811671345, 'samples': 22604800, 'steps': 44149, 'loss/train': 2.224050760269165} +03/05/2022 17:22:15 - INFO - codeparrot_training - Skipping example with length 993 (seq_length=1024) +03/05/2022 17:22:18 - INFO - codeparrot_training - Step 44150: {'lr': 0.0004064348787210795, 'samples': 22605312, 'steps': 44150, 'loss/train': 0.6646870374679565} +03/05/2022 17:22:22 - INFO - codeparrot_training - Step 44151: {'lr': 0.0004064307392549585, 'samples': 22605824, 'steps': 44151, 'loss/train': 1.684786319732666} +03/05/2022 17:22:24 - INFO - codeparrot_training - Skipping example with length 883 (seq_length=1024) +03/05/2022 17:22:27 - INFO - codeparrot_training - Step 44152: {'lr': 0.00040642659971835217, 'samples': 22606336, 'steps': 44152, 'loss/train': 0.9003918766975403} +03/05/2022 17:22:30 - INFO - codeparrot_training - Step 44153: {'lr': 0.0004064224601112625, 'samples': 22606848, 'steps': 44153, 'loss/train': 1.4049586057662964} +03/05/2022 17:22:33 - INFO - codeparrot_training - Skipping example with length 561 (seq_length=1024) +03/05/2022 17:22:35 - INFO - codeparrot_training - Step 44154: {'lr': 0.0004064183204336912, 'samples': 22607360, 'steps': 44154, 'loss/train': 2.39943528175354} +03/05/2022 17:22:39 - INFO - codeparrot_training - Step 44155: {'lr': 0.00040641418068564024, 'samples': 22607872, 'steps': 44155, 'loss/train': 1.779072642326355} +03/05/2022 17:22:41 - INFO - codeparrot_training - Skipping example with length 331 (seq_length=1024) +03/05/2022 17:22:44 - INFO - codeparrot_training - Step 44156: {'lr': 0.0004064100408671114, 'samples': 22608384, 'steps': 44156, 'loss/train': 1.7507083415985107} +03/05/2022 17:22:47 - INFO - codeparrot_training - Step 44157: {'lr': 0.0004064059009781067, 'samples': 22608896, 'steps': 44157, 'loss/train': 0.24615629017353058} +03/05/2022 17:22:50 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/05/2022 17:22:52 - INFO - codeparrot_training - Step 44158: {'lr': 0.0004064017610186279, 'samples': 22609408, 'steps': 44158, 'loss/train': 2.985563039779663} +03/05/2022 17:22:55 - INFO - codeparrot_training - Step 44159: {'lr': 0.00040639762098867684, 'samples': 22609920, 'steps': 44159, 'loss/train': 1.4311683177947998} +03/05/2022 17:22:58 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/05/2022 17:23:01 - INFO - codeparrot_training - Step 44160: {'lr': 0.0004063934808882555, 'samples': 22610432, 'steps': 44160, 'loss/train': 2.2088539600372314} +03/05/2022 17:23:04 - INFO - codeparrot_training - Step 44161: {'lr': 0.0004063893407173656, 'samples': 22610944, 'steps': 44161, 'loss/train': 1.7430106401443481} +03/05/2022 17:23:06 - INFO - codeparrot_training - Skipping example with length 842 (seq_length=1024) +03/05/2022 17:23:09 - INFO - codeparrot_training - Step 44162: {'lr': 0.00040638520047600916, 'samples': 22611456, 'steps': 44162, 'loss/train': 1.9264702796936035} +03/05/2022 17:23:12 - INFO - codeparrot_training - Step 44163: {'lr': 0.00040638106016418785, 'samples': 22611968, 'steps': 44163, 'loss/train': 1.5527173280715942} +03/05/2022 17:23:15 - INFO - codeparrot_training - Skipping example with length 9 (seq_length=1024) +03/05/2022 17:23:18 - INFO - codeparrot_training - Step 44164: {'lr': 0.0004063769197819037, 'samples': 22612480, 'steps': 44164, 'loss/train': 1.6649202108383179} +03/05/2022 17:23:21 - INFO - codeparrot_training - Step 44165: {'lr': 0.0004063727793291585, 'samples': 22612992, 'steps': 44165, 'loss/train': 1.7936458587646484} +03/05/2022 17:23:23 - INFO - codeparrot_training - Skipping example with length 531 (seq_length=1024) +03/05/2022 17:23:26 - INFO - codeparrot_training - Step 44166: {'lr': 0.00040636863880595415, 'samples': 22613504, 'steps': 44166, 'loss/train': 1.2894933223724365} +03/05/2022 17:23:29 - INFO - codeparrot_training - Step 44167: {'lr': 0.0004063644982122926, 'samples': 22614016, 'steps': 44167, 'loss/train': 1.063032627105713} +03/05/2022 17:23:32 - INFO - codeparrot_training - Skipping example with length 893 (seq_length=1024) +03/05/2022 17:23:34 - INFO - codeparrot_training - Step 44168: {'lr': 0.00040636035754817545, 'samples': 22614528, 'steps': 44168, 'loss/train': 1.2600059509277344} +03/05/2022 17:23:38 - INFO - codeparrot_training - Step 44169: {'lr': 0.00040635621681360485, 'samples': 22615040, 'steps': 44169, 'loss/train': 1.1489697694778442} +03/05/2022 17:23:40 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/05/2022 17:23:43 - INFO - codeparrot_training - Step 44170: {'lr': 0.00040635207600858247, 'samples': 22615552, 'steps': 44170, 'loss/train': 2.1656484603881836} +03/05/2022 17:23:46 - INFO - codeparrot_training - Step 44171: {'lr': 0.00040634793513311037, 'samples': 22616064, 'steps': 44171, 'loss/train': 1.8448591232299805} +03/05/2022 17:23:48 - INFO - codeparrot_training - Skipping example with length 388 (seq_length=1024) +03/05/2022 17:23:51 - INFO - codeparrot_training - Step 44172: {'lr': 0.0004063437941871903, 'samples': 22616576, 'steps': 44172, 'loss/train': 1.7750264406204224} +03/05/2022 17:23:54 - INFO - codeparrot_training - Step 44173: {'lr': 0.000406339653170824, 'samples': 22617088, 'steps': 44173, 'loss/train': 1.9253851175308228} +03/05/2022 17:23:57 - INFO - codeparrot_training - Skipping example with length 728 (seq_length=1024) +03/05/2022 17:24:00 - INFO - codeparrot_training - Step 44174: {'lr': 0.00040633551208401356, 'samples': 22617600, 'steps': 44174, 'loss/train': 1.7712149620056152} +03/05/2022 17:24:03 - INFO - codeparrot_training - Step 44175: {'lr': 0.0004063313709267607, 'samples': 22618112, 'steps': 44175, 'loss/train': 0.5307133793830872} +03/05/2022 17:24:05 - INFO - codeparrot_training - Skipping example with length 245 (seq_length=1024) +03/05/2022 17:24:09 - INFO - codeparrot_training - Step 44176: {'lr': 0.0004063272296990674, 'samples': 22618624, 'steps': 44176, 'loss/train': 1.5242984294891357} +03/05/2022 17:24:12 - INFO - codeparrot_training - Step 44177: {'lr': 0.00040632308840093533, 'samples': 22619136, 'steps': 44177, 'loss/train': 1.6285719871520996} +03/05/2022 17:24:15 - INFO - codeparrot_training - Step 44178: {'lr': 0.0004063189470323666, 'samples': 22619648, 'steps': 44178, 'loss/train': 0.6017926931381226} +03/05/2022 17:24:16 - INFO - codeparrot_training - Skipping example with length 111 (seq_length=1024) +03/05/2022 17:24:20 - INFO - codeparrot_training - Step 44179: {'lr': 0.000406314805593363, 'samples': 22620160, 'steps': 44179, 'loss/train': 0.7794324159622192} +03/05/2022 17:24:23 - INFO - codeparrot_training - Step 44180: {'lr': 0.00040631066408392636, 'samples': 22620672, 'steps': 44180, 'loss/train': 1.0582078695297241} +03/05/2022 17:24:24 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/05/2022 17:24:29 - INFO - codeparrot_training - Step 44181: {'lr': 0.0004063065225040584, 'samples': 22621184, 'steps': 44181, 'loss/train': 1.6098616123199463} +03/05/2022 17:24:32 - INFO - codeparrot_training - Step 44182: {'lr': 0.0004063023808537613, 'samples': 22621696, 'steps': 44182, 'loss/train': 0.9810751676559448} +03/05/2022 17:24:32 - INFO - codeparrot_training - Skipping example with length 890 (seq_length=1024) +03/05/2022 17:24:37 - INFO - codeparrot_training - Step 44183: {'lr': 0.00040629823913303665, 'samples': 22622208, 'steps': 44183, 'loss/train': 2.161726474761963} +03/05/2022 17:24:40 - INFO - codeparrot_training - Step 44184: {'lr': 0.0004062940973418865, 'samples': 22622720, 'steps': 44184, 'loss/train': 1.0879919528961182} +03/05/2022 17:24:42 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/05/2022 17:24:46 - INFO - codeparrot_training - Step 44185: {'lr': 0.00040628995548031254, 'samples': 22623232, 'steps': 44185, 'loss/train': 1.6878407001495361} +03/05/2022 17:24:49 - INFO - codeparrot_training - Step 44186: {'lr': 0.00040628581354831687, 'samples': 22623744, 'steps': 44186, 'loss/train': 1.3284821510314941} +03/05/2022 17:24:50 - INFO - codeparrot_training - Skipping example with length 782 (seq_length=1024) +03/05/2022 17:24:54 - INFO - codeparrot_training - Step 44187: {'lr': 0.0004062816715459011, 'samples': 22624256, 'steps': 44187, 'loss/train': 1.4101626873016357} +03/05/2022 17:24:57 - INFO - codeparrot_training - Step 44188: {'lr': 0.0004062775294730673, 'samples': 22624768, 'steps': 44188, 'loss/train': 1.5895625352859497} +03/05/2022 17:24:58 - INFO - codeparrot_training - Skipping example with length 520 (seq_length=1024) +03/05/2022 17:25:03 - INFO - codeparrot_training - Step 44189: {'lr': 0.0004062733873298172, 'samples': 22625280, 'steps': 44189, 'loss/train': 1.8601839542388916} +03/05/2022 17:25:06 - INFO - codeparrot_training - Step 44190: {'lr': 0.0004062692451161528, 'samples': 22625792, 'steps': 44190, 'loss/train': 1.711069107055664} +03/05/2022 17:25:07 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/05/2022 17:25:11 - INFO - codeparrot_training - Step 44191: {'lr': 0.00040626510283207586, 'samples': 22626304, 'steps': 44191, 'loss/train': 2.0865375995635986} +03/05/2022 17:25:14 - INFO - codeparrot_training - Step 44192: {'lr': 0.00040626096047758823, 'samples': 22626816, 'steps': 44192, 'loss/train': 1.5760648250579834} +03/05/2022 17:25:15 - INFO - codeparrot_training - Skipping example with length 484 (seq_length=1024) +03/05/2022 17:25:19 - INFO - codeparrot_training - Step 44193: {'lr': 0.0004062568180526919, 'samples': 22627328, 'steps': 44193, 'loss/train': 2.306217670440674} +03/05/2022 17:25:23 - INFO - codeparrot_training - Step 44194: {'lr': 0.0004062526755573886, 'samples': 22627840, 'steps': 44194, 'loss/train': 1.9531458616256714} +03/05/2022 17:25:24 - INFO - codeparrot_training - Skipping example with length 864 (seq_length=1024) +03/05/2022 17:25:29 - INFO - codeparrot_training - Step 44195: {'lr': 0.00040624853299168025, 'samples': 22628352, 'steps': 44195, 'loss/train': 1.2127224206924438} +03/05/2022 17:25:32 - INFO - codeparrot_training - Step 44196: {'lr': 0.0004062443903555687, 'samples': 22628864, 'steps': 44196, 'loss/train': 2.1385610103607178} +03/05/2022 17:25:35 - INFO - codeparrot_training - Step 44197: {'lr': 0.0004062402476490559, 'samples': 22629376, 'steps': 44197, 'loss/train': 1.872276782989502} +03/05/2022 17:25:35 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/05/2022 17:25:40 - INFO - codeparrot_training - Step 44198: {'lr': 0.00040623610487214366, 'samples': 22629888, 'steps': 44198, 'loss/train': 1.7696211338043213} +03/05/2022 17:25:43 - INFO - codeparrot_training - Skipping example with length 394 (seq_length=1024) +03/05/2022 17:25:45 - INFO - codeparrot_training - Step 44199: {'lr': 0.0004062319620248338, 'samples': 22630400, 'steps': 44199, 'loss/train': 1.431330680847168} +03/05/2022 17:25:49 - INFO - codeparrot_training - Step 44200: {'lr': 0.00040622781910712826, 'samples': 22630912, 'steps': 44200, 'loss/train': 1.4680168628692627} +03/05/2022 17:25:51 - INFO - codeparrot_training - Skipping example with length 657 (seq_length=1024) +03/05/2022 17:25:54 - INFO - codeparrot_training - Step 44201: {'lr': 0.00040622367611902886, 'samples': 22631424, 'steps': 44201, 'loss/train': 1.5872743129730225} +03/05/2022 17:25:57 - INFO - codeparrot_training - Step 44202: {'lr': 0.0004062195330605375, 'samples': 22631936, 'steps': 44202, 'loss/train': 1.863154649734497} +03/05/2022 17:26:00 - INFO - codeparrot_training - Step 44203: {'lr': 0.000406215389931656, 'samples': 22632448, 'steps': 44203, 'loss/train': 1.862103819847107} +03/05/2022 17:26:00 - INFO - codeparrot_training - Skipping example with length 479 (seq_length=1024) +03/05/2022 17:26:06 - INFO - codeparrot_training - Step 44204: {'lr': 0.0004062112467323863, 'samples': 22632960, 'steps': 44204, 'loss/train': 1.1745718717575073} +03/05/2022 17:26:09 - INFO - codeparrot_training - Step 44205: {'lr': 0.00040620710346273015, 'samples': 22633472, 'steps': 44205, 'loss/train': 2.060042381286621} +03/05/2022 17:26:09 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/05/2022 17:26:14 - INFO - codeparrot_training - Step 44206: {'lr': 0.00040620296012268956, 'samples': 22633984, 'steps': 44206, 'loss/train': 1.7059396505355835} +03/05/2022 17:26:17 - INFO - codeparrot_training - Step 44207: {'lr': 0.0004061988167122663, 'samples': 22634496, 'steps': 44207, 'loss/train': 1.564478874206543} +03/05/2022 17:26:17 - INFO - codeparrot_training - Skipping example with length 136 (seq_length=1024) +03/05/2022 17:26:23 - INFO - codeparrot_training - Step 44208: {'lr': 0.00040619467323146224, 'samples': 22635008, 'steps': 44208, 'loss/train': 2.2497904300689697} +03/05/2022 17:26:25 - INFO - codeparrot_training - Skipping example with length 672 (seq_length=1024) +03/05/2022 17:26:28 - INFO - codeparrot_training - Step 44209: {'lr': 0.0004061905296802793, 'samples': 22635520, 'steps': 44209, 'loss/train': 2.525846242904663} +03/05/2022 17:26:31 - INFO - codeparrot_training - Step 44210: {'lr': 0.00040618638605871934, 'samples': 22636032, 'steps': 44210, 'loss/train': 1.5402861833572388} +03/05/2022 17:26:34 - INFO - codeparrot_training - Step 44211: {'lr': 0.00040618224236678413, 'samples': 22636544, 'steps': 44211, 'loss/train': 1.8342820405960083} +03/05/2022 17:26:34 - INFO - codeparrot_training - Skipping example with length 486 (seq_length=1024) +03/05/2022 17:26:40 - INFO - codeparrot_training - Step 44212: {'lr': 0.00040617809860447564, 'samples': 22637056, 'steps': 44212, 'loss/train': 1.5014450550079346} +03/05/2022 17:26:43 - INFO - codeparrot_training - Step 44213: {'lr': 0.00040617395477179577, 'samples': 22637568, 'steps': 44213, 'loss/train': 1.8359342813491821} +03/05/2022 17:26:43 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/05/2022 17:26:48 - INFO - codeparrot_training - Step 44214: {'lr': 0.0004061698108687463, 'samples': 22638080, 'steps': 44214, 'loss/train': 1.4320462942123413} +03/05/2022 17:26:51 - INFO - codeparrot_training - Skipping example with length 994 (seq_length=1024) +03/05/2022 17:26:53 - INFO - codeparrot_training - Step 44215: {'lr': 0.00040616566689532905, 'samples': 22638592, 'steps': 44215, 'loss/train': 1.0408624410629272} +03/05/2022 17:26:57 - INFO - codeparrot_training - Step 44216: {'lr': 0.00040616152285154607, 'samples': 22639104, 'steps': 44216, 'loss/train': 1.4557064771652222} +03/05/2022 17:26:59 - INFO - codeparrot_training - Skipping example with length 948 (seq_length=1024) +03/05/2022 17:27:02 - INFO - codeparrot_training - Step 44217: {'lr': 0.000406157378737399, 'samples': 22639616, 'steps': 44217, 'loss/train': 1.5556249618530273} +03/05/2022 17:27:05 - INFO - codeparrot_training - Step 44218: {'lr': 0.0004061532345528899, 'samples': 22640128, 'steps': 44218, 'loss/train': 1.7960172891616821} +03/05/2022 17:27:08 - INFO - codeparrot_training - Step 44219: {'lr': 0.00040614909029802054, 'samples': 22640640, 'steps': 44219, 'loss/train': 1.5340495109558105} +03/05/2022 17:27:09 - INFO - codeparrot_training - Skipping example with length 837 (seq_length=1024) +03/05/2022 17:27:14 - INFO - codeparrot_training - Step 44220: {'lr': 0.0004061449459727928, 'samples': 22641152, 'steps': 44220, 'loss/train': 1.5006184577941895} +03/05/2022 17:27:17 - INFO - codeparrot_training - Step 44221: {'lr': 0.0004061408015772086, 'samples': 22641664, 'steps': 44221, 'loss/train': 0.9735212326049805} +03/05/2022 17:27:17 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/05/2022 17:27:22 - INFO - codeparrot_training - Step 44222: {'lr': 0.0004061366571112698, 'samples': 22642176, 'steps': 44222, 'loss/train': 2.436394691467285} +03/05/2022 17:27:25 - INFO - codeparrot_training - Step 44223: {'lr': 0.0004061325125749781, 'samples': 22642688, 'steps': 44223, 'loss/train': 1.5800831317901611} +03/05/2022 17:27:25 - INFO - codeparrot_training - Skipping example with length 291 (seq_length=1024) +03/05/2022 17:27:30 - INFO - codeparrot_training - Step 44224: {'lr': 0.00040612836796833556, 'samples': 22643200, 'steps': 44224, 'loss/train': 2.2592363357543945} +03/05/2022 17:27:34 - INFO - codeparrot_training - Step 44225: {'lr': 0.000406124223291344, 'samples': 22643712, 'steps': 44225, 'loss/train': 1.3619349002838135} +03/05/2022 17:27:34 - INFO - codeparrot_training - Skipping example with length 216 (seq_length=1024) +03/05/2022 17:27:39 - INFO - codeparrot_training - Step 44226: {'lr': 0.0004061200785440052, 'samples': 22644224, 'steps': 44226, 'loss/train': 1.5682839155197144} +03/05/2022 17:27:42 - INFO - codeparrot_training - Step 44227: {'lr': 0.0004061159337263213, 'samples': 22644736, 'steps': 44227, 'loss/train': 1.3725106716156006} +03/05/2022 17:27:42 - INFO - codeparrot_training - Skipping example with length 57 (seq_length=1024) +03/05/2022 17:27:47 - INFO - codeparrot_training - Step 44228: {'lr': 0.0004061117888382938, 'samples': 22645248, 'steps': 44228, 'loss/train': 1.986914038658142} +03/05/2022 17:27:50 - INFO - codeparrot_training - Step 44229: {'lr': 0.00040610764387992475, 'samples': 22645760, 'steps': 44229, 'loss/train': 1.6895278692245483} +03/05/2022 17:27:50 - INFO - codeparrot_training - Skipping example with length 236 (seq_length=1024) +03/05/2022 17:27:56 - INFO - codeparrot_training - Step 44230: {'lr': 0.0004061034988512161, 'samples': 22646272, 'steps': 44230, 'loss/train': 2.2384049892425537} +03/05/2022 17:27:59 - INFO - codeparrot_training - Step 44231: {'lr': 0.0004060993537521695, 'samples': 22646784, 'steps': 44231, 'loss/train': 1.9903305768966675} +03/05/2022 17:27:59 - INFO - codeparrot_training - Skipping example with length 792 (seq_length=1024) +03/05/2022 17:28:04 - INFO - codeparrot_training - Step 44232: {'lr': 0.00040609520858278704, 'samples': 22647296, 'steps': 44232, 'loss/train': 2.9708755016326904} +03/05/2022 17:28:07 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/05/2022 17:28:10 - INFO - codeparrot_training - Step 44233: {'lr': 0.0004060910633430704, 'samples': 22647808, 'steps': 44233, 'loss/train': 1.7914518117904663} +03/05/2022 17:28:13 - INFO - codeparrot_training - Step 44234: {'lr': 0.0004060869180330216, 'samples': 22648320, 'steps': 44234, 'loss/train': 1.0123517513275146} +03/05/2022 17:28:16 - INFO - codeparrot_training - Skipping example with length 839 (seq_length=1024) +03/05/2022 17:28:18 - INFO - codeparrot_training - Step 44235: {'lr': 0.00040608277265264243, 'samples': 22648832, 'steps': 44235, 'loss/train': 1.6269587278366089} +03/05/2022 17:28:21 - INFO - codeparrot_training - Step 44236: {'lr': 0.0004060786272019348, 'samples': 22649344, 'steps': 44236, 'loss/train': 1.8582043647766113} +03/05/2022 17:28:24 - INFO - codeparrot_training - Step 44237: {'lr': 0.00040607448168090044, 'samples': 22649856, 'steps': 44237, 'loss/train': 1.754643201828003} +03/05/2022 17:28:24 - INFO - codeparrot_training - Skipping example with length 711 (seq_length=1024) +03/05/2022 17:28:30 - INFO - codeparrot_training - Step 44238: {'lr': 0.00040607033608954136, 'samples': 22650368, 'steps': 44238, 'loss/train': 1.1515828371047974} +03/05/2022 17:28:33 - INFO - codeparrot_training - Skipping example with length 783 (seq_length=1024) +03/05/2022 17:28:35 - INFO - codeparrot_training - Step 44239: {'lr': 0.0004060661904278595, 'samples': 22650880, 'steps': 44239, 'loss/train': 0.7805472612380981} +03/05/2022 17:28:38 - INFO - codeparrot_training - Step 44240: {'lr': 0.0004060620446958565, 'samples': 22651392, 'steps': 44240, 'loss/train': 1.9941775798797607} +03/05/2022 17:28:41 - INFO - codeparrot_training - Skipping example with length 924 (seq_length=1024) +03/05/2022 17:28:43 - INFO - codeparrot_training - Step 44241: {'lr': 0.00040605789889353445, 'samples': 22651904, 'steps': 44241, 'loss/train': 1.3391072750091553} +03/05/2022 17:28:47 - INFO - codeparrot_training - Step 44242: {'lr': 0.00040605375302089507, 'samples': 22652416, 'steps': 44242, 'loss/train': 2.1510162353515625} +03/05/2022 17:28:49 - INFO - codeparrot_training - Skipping example with length 154 (seq_length=1024) +03/05/2022 17:28:52 - INFO - codeparrot_training - Step 44243: {'lr': 0.00040604960707794023, 'samples': 22652928, 'steps': 44243, 'loss/train': 1.7340010404586792} +03/05/2022 17:28:55 - INFO - codeparrot_training - Step 44244: {'lr': 0.00040604546106467196, 'samples': 22653440, 'steps': 44244, 'loss/train': 2.185940980911255} +03/05/2022 17:28:58 - INFO - codeparrot_training - Step 44245: {'lr': 0.00040604131498109193, 'samples': 22653952, 'steps': 44245, 'loss/train': 1.9358073472976685} +03/05/2022 17:28:59 - INFO - codeparrot_training - Skipping example with length 527 (seq_length=1024) +03/05/2022 17:29:04 - INFO - codeparrot_training - Step 44246: {'lr': 0.0004060371688272021, 'samples': 22654464, 'steps': 44246, 'loss/train': 3.466005563735962} +03/05/2022 17:29:07 - INFO - codeparrot_training - Step 44247: {'lr': 0.00040603302260300435, 'samples': 22654976, 'steps': 44247, 'loss/train': 1.296218991279602} +03/05/2022 17:29:07 - INFO - codeparrot_training - Skipping example with length 1008 (seq_length=1024) +03/05/2022 17:29:12 - INFO - codeparrot_training - Step 44248: {'lr': 0.00040602887630850055, 'samples': 22655488, 'steps': 44248, 'loss/train': 2.0522453784942627} +03/05/2022 17:29:16 - INFO - codeparrot_training - Step 44249: {'lr': 0.0004060247299436925, 'samples': 22656000, 'steps': 44249, 'loss/train': 1.918499231338501} +03/05/2022 17:29:16 - INFO - codeparrot_training - Skipping example with length 203 (seq_length=1024) +03/05/2022 17:29:21 - INFO - codeparrot_training - Step 44250: {'lr': 0.0004060205835085821, 'samples': 22656512, 'steps': 44250, 'loss/train': 0.5569151043891907} +03/05/2022 17:29:24 - INFO - codeparrot_training - Step 44251: {'lr': 0.00040601643700317126, 'samples': 22657024, 'steps': 44251, 'loss/train': 2.1788322925567627} +03/05/2022 17:29:24 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/05/2022 17:29:29 - INFO - codeparrot_training - Step 44252: {'lr': 0.0004060122904274618, 'samples': 22657536, 'steps': 44252, 'loss/train': 2.37040376663208} +03/05/2022 17:29:32 - INFO - codeparrot_training - Step 44253: {'lr': 0.0004060081437814557, 'samples': 22658048, 'steps': 44253, 'loss/train': 2.1400182247161865} +03/05/2022 17:29:33 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/05/2022 17:29:38 - INFO - codeparrot_training - Step 44254: {'lr': 0.00040600399706515466, 'samples': 22658560, 'steps': 44254, 'loss/train': 1.7423216104507446} +03/05/2022 17:29:41 - INFO - codeparrot_training - Step 44255: {'lr': 0.0004059998502785606, 'samples': 22659072, 'steps': 44255, 'loss/train': 2.2348949909210205} +03/05/2022 17:29:41 - INFO - codeparrot_training - Skipping example with length 11 (seq_length=1024) +03/05/2022 17:29:46 - INFO - codeparrot_training - Step 44256: {'lr': 0.0004059957034216755, 'samples': 22659584, 'steps': 44256, 'loss/train': 1.8978272676467896} +03/05/2022 17:29:49 - INFO - codeparrot_training - Step 44257: {'lr': 0.00040599155649450106, 'samples': 22660096, 'steps': 44257, 'loss/train': 2.0367531776428223} +03/05/2022 17:29:50 - INFO - codeparrot_training - Skipping example with length 212 (seq_length=1024) +03/05/2022 17:29:55 - INFO - codeparrot_training - Step 44258: {'lr': 0.00040598740949703927, 'samples': 22660608, 'steps': 44258, 'loss/train': 1.0906745195388794} +03/05/2022 17:29:58 - INFO - codeparrot_training - Step 44259: {'lr': 0.00040598326242929195, 'samples': 22661120, 'steps': 44259, 'loss/train': 1.2955445051193237} +03/05/2022 17:29:58 - INFO - codeparrot_training - Skipping example with length 570 (seq_length=1024) +03/05/2022 17:30:03 - INFO - codeparrot_training - Step 44260: {'lr': 0.00040597911529126096, 'samples': 22661632, 'steps': 44260, 'loss/train': 1.4031363725662231} +03/05/2022 17:30:06 - INFO - codeparrot_training - Step 44261: {'lr': 0.00040597496808294825, 'samples': 22662144, 'steps': 44261, 'loss/train': 1.5550216436386108} +03/05/2022 17:30:06 - INFO - codeparrot_training - Skipping example with length 412 (seq_length=1024) +03/05/2022 17:30:12 - INFO - codeparrot_training - Step 44262: {'lr': 0.0004059708208043556, 'samples': 22662656, 'steps': 44262, 'loss/train': 2.021296977996826} +03/05/2022 17:30:15 - INFO - codeparrot_training - Step 44263: {'lr': 0.00040596667345548486, 'samples': 22663168, 'steps': 44263, 'loss/train': 1.1141352653503418} +03/05/2022 17:30:15 - INFO - codeparrot_training - Skipping example with length 713 (seq_length=1024) +03/05/2022 17:30:20 - INFO - codeparrot_training - Step 44264: {'lr': 0.00040596252603633797, 'samples': 22663680, 'steps': 44264, 'loss/train': 1.9386119842529297} +03/05/2022 17:30:23 - INFO - codeparrot_training - Step 44265: {'lr': 0.0004059583785469168, 'samples': 22664192, 'steps': 44265, 'loss/train': 0.07800960540771484} +03/05/2022 17:30:24 - INFO - codeparrot_training - Skipping example with length 313 (seq_length=1024) +03/05/2022 17:30:29 - INFO - codeparrot_training - Step 44266: {'lr': 0.00040595423098722315, 'samples': 22664704, 'steps': 44266, 'loss/train': 1.2277593612670898} +03/05/2022 17:30:32 - INFO - codeparrot_training - Step 44267: {'lr': 0.000405950083357259, 'samples': 22665216, 'steps': 44267, 'loss/train': 2.001199245452881} +03/05/2022 17:30:32 - INFO - codeparrot_training - Skipping example with length 949 (seq_length=1024) +03/05/2022 17:30:37 - INFO - codeparrot_training - Step 44268: {'lr': 0.0004059459356570261, 'samples': 22665728, 'steps': 44268, 'loss/train': 1.0723458528518677} +03/05/2022 17:30:41 - INFO - codeparrot_training - Step 44269: {'lr': 0.00040594178788652636, 'samples': 22666240, 'steps': 44269, 'loss/train': 2.020496129989624} +03/05/2022 17:30:41 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/05/2022 17:30:46 - INFO - codeparrot_training - Step 44270: {'lr': 0.00040593764004576166, 'samples': 22666752, 'steps': 44270, 'loss/train': 1.7703511714935303} +03/05/2022 17:30:49 - INFO - codeparrot_training - Step 44271: {'lr': 0.0004059334921347339, 'samples': 22667264, 'steps': 44271, 'loss/train': 1.4566230773925781} +03/05/2022 17:30:50 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/05/2022 17:30:54 - INFO - codeparrot_training - Step 44272: {'lr': 0.00040592934415344486, 'samples': 22667776, 'steps': 44272, 'loss/train': 1.9302688837051392} +03/05/2022 17:30:58 - INFO - codeparrot_training - Step 44273: {'lr': 0.0004059251961018965, 'samples': 22668288, 'steps': 44273, 'loss/train': 1.6099084615707397} +03/05/2022 17:30:58 - INFO - codeparrot_training - Skipping example with length 656 (seq_length=1024) +03/05/2022 17:31:03 - INFO - codeparrot_training - Step 44274: {'lr': 0.00040592104798009066, 'samples': 22668800, 'steps': 44274, 'loss/train': 1.2225390672683716} +03/05/2022 17:31:06 - INFO - codeparrot_training - Step 44275: {'lr': 0.00040591689978802917, 'samples': 22669312, 'steps': 44275, 'loss/train': 2.022488594055176} +03/05/2022 17:31:06 - INFO - codeparrot_training - Skipping example with length 855 (seq_length=1024) +03/05/2022 17:31:11 - INFO - codeparrot_training - Step 44276: {'lr': 0.0004059127515257139, 'samples': 22669824, 'steps': 44276, 'loss/train': 1.531888484954834} +03/05/2022 17:31:15 - INFO - codeparrot_training - Step 44277: {'lr': 0.0004059086031931468, 'samples': 22670336, 'steps': 44277, 'loss/train': 1.5432149171829224} +03/05/2022 17:31:15 - INFO - codeparrot_training - Skipping example with length 286 (seq_length=1024) +03/05/2022 17:31:20 - INFO - codeparrot_training - Step 44278: {'lr': 0.00040590445479032965, 'samples': 22670848, 'steps': 44278, 'loss/train': 1.3079187870025635} +03/05/2022 17:31:23 - INFO - codeparrot_training - Step 44279: {'lr': 0.0004059003063172644, 'samples': 22671360, 'steps': 44279, 'loss/train': 1.9915398359298706} +03/05/2022 17:31:24 - INFO - codeparrot_training - Skipping example with length 995 (seq_length=1024) +03/05/2022 17:31:28 - INFO - codeparrot_training - Step 44280: {'lr': 0.0004058961577739529, 'samples': 22671872, 'steps': 44280, 'loss/train': 2.3141958713531494} +03/05/2022 17:31:31 - INFO - codeparrot_training - Step 44281: {'lr': 0.00040589200916039703, 'samples': 22672384, 'steps': 44281, 'loss/train': 1.3934261798858643} +03/05/2022 17:31:32 - INFO - codeparrot_training - Skipping example with length 676 (seq_length=1024) +03/05/2022 17:31:37 - INFO - codeparrot_training - Step 44282: {'lr': 0.0004058878604765985, 'samples': 22672896, 'steps': 44282, 'loss/train': 1.7871838808059692} +03/05/2022 17:31:40 - INFO - codeparrot_training - Step 44283: {'lr': 0.00040588371172255936, 'samples': 22673408, 'steps': 44283, 'loss/train': 2.268709182739258} +03/05/2022 17:31:40 - INFO - codeparrot_training - Skipping example with length 990 (seq_length=1024) +03/05/2022 17:31:45 - INFO - codeparrot_training - Step 44284: {'lr': 0.0004058795628982814, 'samples': 22673920, 'steps': 44284, 'loss/train': 1.6187013387680054} +03/05/2022 17:31:48 - INFO - codeparrot_training - Step 44285: {'lr': 0.0004058754140037666, 'samples': 22674432, 'steps': 44285, 'loss/train': 1.9640285968780518} +03/05/2022 17:31:49 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/05/2022 17:31:54 - INFO - codeparrot_training - Step 44286: {'lr': 0.00040587126503901664, 'samples': 22674944, 'steps': 44286, 'loss/train': 0.9832519888877869} +03/05/2022 17:31:57 - INFO - codeparrot_training - Step 44287: {'lr': 0.0004058671160040336, 'samples': 22675456, 'steps': 44287, 'loss/train': 0.11797816306352615} +03/05/2022 17:31:58 - INFO - codeparrot_training - Skipping example with length 234 (seq_length=1024) +03/05/2022 17:32:03 - INFO - codeparrot_training - Step 44288: {'lr': 0.0004058629668988192, 'samples': 22675968, 'steps': 44288, 'loss/train': 1.7761491537094116} +03/05/2022 17:32:06 - INFO - codeparrot_training - Step 44289: {'lr': 0.0004058588177233753, 'samples': 22676480, 'steps': 44289, 'loss/train': 2.0908308029174805} +03/05/2022 17:32:09 - INFO - codeparrot_training - Step 44290: {'lr': 0.0004058546684777039, 'samples': 22676992, 'steps': 44290, 'loss/train': 2.1169846057891846} +03/05/2022 17:32:10 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/05/2022 17:32:14 - INFO - codeparrot_training - Step 44291: {'lr': 0.0004058505191618067, 'samples': 22677504, 'steps': 44291, 'loss/train': 2.3530478477478027} +03/05/2022 17:32:18 - INFO - codeparrot_training - Step 44292: {'lr': 0.00040584636977568573, 'samples': 22678016, 'steps': 44292, 'loss/train': 2.309929370880127} +03/05/2022 17:32:18 - INFO - codeparrot_training - Skipping example with length 466 (seq_length=1024) +03/05/2022 17:32:23 - INFO - codeparrot_training - Step 44293: {'lr': 0.0004058422203193428, 'samples': 22678528, 'steps': 44293, 'loss/train': 1.1104202270507812} +03/05/2022 17:32:26 - INFO - codeparrot_training - Step 44294: {'lr': 0.0004058380707927798, 'samples': 22679040, 'steps': 44294, 'loss/train': 1.776258945465088} +03/05/2022 17:32:26 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/05/2022 17:32:31 - INFO - codeparrot_training - Step 44295: {'lr': 0.00040583392119599847, 'samples': 22679552, 'steps': 44295, 'loss/train': 1.234447956085205} +03/05/2022 17:32:34 - INFO - codeparrot_training - Step 44296: {'lr': 0.0004058297715290008, 'samples': 22680064, 'steps': 44296, 'loss/train': 0.7288317680358887} +03/05/2022 17:32:34 - INFO - codeparrot_training - Skipping example with length 572 (seq_length=1024) +03/05/2022 17:32:40 - INFO - codeparrot_training - Step 44297: {'lr': 0.00040582562179178864, 'samples': 22680576, 'steps': 44297, 'loss/train': 1.183093547821045} +03/05/2022 17:32:43 - INFO - codeparrot_training - Step 44298: {'lr': 0.0004058214719843639, 'samples': 22681088, 'steps': 44298, 'loss/train': 1.232583999633789} +03/05/2022 17:32:43 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/05/2022 17:32:48 - INFO - codeparrot_training - Step 44299: {'lr': 0.0004058173221067284, 'samples': 22681600, 'steps': 44299, 'loss/train': 1.935325026512146} +03/05/2022 17:32:51 - INFO - codeparrot_training - Skipping example with length 830 (seq_length=1024) +03/05/2022 17:32:53 - INFO - codeparrot_training - Step 44300: {'lr': 0.00040581317215888403, 'samples': 22682112, 'steps': 44300, 'loss/train': 2.2374117374420166} +03/05/2022 17:32:57 - INFO - codeparrot_training - Step 44301: {'lr': 0.0004058090221408326, 'samples': 22682624, 'steps': 44301, 'loss/train': 1.585386037826538} +03/05/2022 17:32:59 - INFO - codeparrot_training - Skipping example with length 606 (seq_length=1024) +03/05/2022 17:33:02 - INFO - codeparrot_training - Step 44302: {'lr': 0.0004058048720525761, 'samples': 22683136, 'steps': 44302, 'loss/train': 1.8854091167449951} +03/05/2022 17:33:05 - INFO - codeparrot_training - Step 44303: {'lr': 0.00040580072189411626, 'samples': 22683648, 'steps': 44303, 'loss/train': 1.2929693460464478} +03/05/2022 17:33:08 - INFO - codeparrot_training - Skipping example with length 634 (seq_length=1024) +03/05/2022 17:33:10 - INFO - codeparrot_training - Step 44304: {'lr': 0.00040579657166545503, 'samples': 22684160, 'steps': 44304, 'loss/train': 2.6327004432678223} +03/05/2022 17:33:13 - INFO - codeparrot_training - Step 44305: {'lr': 0.0004057924213665943, 'samples': 22684672, 'steps': 44305, 'loss/train': 2.460472583770752} +03/05/2022 17:33:16 - INFO - codeparrot_training - Skipping example with length 263 (seq_length=1024) +03/05/2022 17:33:19 - INFO - codeparrot_training - Step 44306: {'lr': 0.0004057882709975359, 'samples': 22685184, 'steps': 44306, 'loss/train': 1.272274374961853} +03/05/2022 17:33:22 - INFO - codeparrot_training - Step 44307: {'lr': 0.0004057841205582817, 'samples': 22685696, 'steps': 44307, 'loss/train': 1.69756281375885} +03/05/2022 17:33:25 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/05/2022 17:33:27 - INFO - codeparrot_training - Step 44308: {'lr': 0.0004057799700488336, 'samples': 22686208, 'steps': 44308, 'loss/train': 2.4113969802856445} +03/05/2022 17:33:30 - INFO - codeparrot_training - Step 44309: {'lr': 0.0004057758194691934, 'samples': 22686720, 'steps': 44309, 'loss/train': 1.5247724056243896} +03/05/2022 17:33:33 - INFO - codeparrot_training - Skipping example with length 835 (seq_length=1024) +03/05/2022 17:33:36 - INFO - codeparrot_training - Step 44310: {'lr': 0.00040577166881936304, 'samples': 22687232, 'steps': 44310, 'loss/train': 1.5832611322402954} +03/05/2022 17:33:39 - INFO - codeparrot_training - Step 44311: {'lr': 0.0004057675180993444, 'samples': 22687744, 'steps': 44311, 'loss/train': 2.4507689476013184} +03/05/2022 17:33:42 - INFO - codeparrot_training - Step 44312: {'lr': 0.00040576336730913933, 'samples': 22688256, 'steps': 44312, 'loss/train': 1.8700530529022217} +03/05/2022 17:33:42 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/05/2022 17:33:47 - INFO - codeparrot_training - Step 44313: {'lr': 0.00040575921644874966, 'samples': 22688768, 'steps': 44313, 'loss/train': 1.8455910682678223} +03/05/2022 17:33:51 - INFO - codeparrot_training - Step 44314: {'lr': 0.00040575506551817725, 'samples': 22689280, 'steps': 44314, 'loss/train': 1.8016554117202759} +03/05/2022 17:33:51 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/05/2022 17:33:56 - INFO - codeparrot_training - Step 44315: {'lr': 0.00040575091451742405, 'samples': 22689792, 'steps': 44315, 'loss/train': 1.6167188882827759} +03/05/2022 17:33:59 - INFO - codeparrot_training - Step 44316: {'lr': 0.0004057467634464919, 'samples': 22690304, 'steps': 44316, 'loss/train': 2.163917303085327} +03/05/2022 17:33:59 - INFO - codeparrot_training - Skipping example with length 797 (seq_length=1024) +03/05/2022 17:34:05 - INFO - codeparrot_training - Step 44317: {'lr': 0.00040574261230538267, 'samples': 22690816, 'steps': 44317, 'loss/train': 1.5848383903503418} +03/05/2022 17:34:08 - INFO - codeparrot_training - Step 44318: {'lr': 0.0004057384610940982, 'samples': 22691328, 'steps': 44318, 'loss/train': 0.7450827956199646} +03/05/2022 17:34:08 - INFO - codeparrot_training - Skipping example with length 509 (seq_length=1024) +03/05/2022 17:34:13 - INFO - codeparrot_training - Step 44319: {'lr': 0.0004057343098126404, 'samples': 22691840, 'steps': 44319, 'loss/train': 1.5624040365219116} +03/05/2022 17:34:16 - INFO - codeparrot_training - Step 44320: {'lr': 0.0004057301584610111, 'samples': 22692352, 'steps': 44320, 'loss/train': 1.9303901195526123} +03/05/2022 17:34:16 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/05/2022 17:34:21 - INFO - codeparrot_training - Step 44321: {'lr': 0.00040572600703921223, 'samples': 22692864, 'steps': 44321, 'loss/train': 2.0855484008789062} +03/05/2022 17:34:24 - INFO - codeparrot_training - Skipping example with length 758 (seq_length=1024) +03/05/2022 17:34:27 - INFO - codeparrot_training - Step 44322: {'lr': 0.0004057218555472456, 'samples': 22693376, 'steps': 44322, 'loss/train': 1.8772763013839722} +03/05/2022 17:34:30 - INFO - codeparrot_training - Step 44323: {'lr': 0.0004057177039851131, 'samples': 22693888, 'steps': 44323, 'loss/train': 2.0798118114471436} +03/05/2022 17:34:33 - INFO - codeparrot_training - Skipping example with length 890 (seq_length=1024) +03/05/2022 17:34:35 - INFO - codeparrot_training - Step 44324: {'lr': 0.00040571355235281657, 'samples': 22694400, 'steps': 44324, 'loss/train': 1.8698501586914062} +03/05/2022 17:34:38 - INFO - codeparrot_training - Step 44325: {'lr': 0.00040570940065035797, 'samples': 22694912, 'steps': 44325, 'loss/train': 2.201124906539917} +03/05/2022 17:34:42 - INFO - codeparrot_training - Step 44326: {'lr': 0.0004057052488777392, 'samples': 22695424, 'steps': 44326, 'loss/train': 1.5931663513183594} +03/05/2022 17:34:42 - INFO - codeparrot_training - Skipping example with length 462 (seq_length=1024) +03/05/2022 17:34:47 - INFO - codeparrot_training - Step 44327: {'lr': 0.0004057010970349619, 'samples': 22695936, 'steps': 44327, 'loss/train': 1.904085636138916} +03/05/2022 17:34:50 - INFO - codeparrot_training - Skipping example with length 928 (seq_length=1024) +03/05/2022 17:34:52 - INFO - codeparrot_training - Step 44328: {'lr': 0.00040569694512202815, 'samples': 22696448, 'steps': 44328, 'loss/train': 1.9694002866744995} +03/05/2022 17:34:55 - INFO - codeparrot_training - Step 44329: {'lr': 0.00040569279313893976, 'samples': 22696960, 'steps': 44329, 'loss/train': 2.118342161178589} +03/05/2022 17:34:58 - INFO - codeparrot_training - Skipping example with length 474 (seq_length=1024) +03/05/2022 17:35:01 - INFO - codeparrot_training - Step 44330: {'lr': 0.0004056886410856986, 'samples': 22697472, 'steps': 44330, 'loss/train': 1.8492714166641235} +03/05/2022 17:35:04 - INFO - codeparrot_training - Step 44331: {'lr': 0.0004056844889623065, 'samples': 22697984, 'steps': 44331, 'loss/train': 1.0148179531097412} +03/05/2022 17:35:06 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/05/2022 17:35:09 - INFO - codeparrot_training - Step 44332: {'lr': 0.0004056803367687654, 'samples': 22698496, 'steps': 44332, 'loss/train': 3.929713487625122} +03/05/2022 17:35:12 - INFO - codeparrot_training - Step 44333: {'lr': 0.0004056761845050772, 'samples': 22699008, 'steps': 44333, 'loss/train': 1.8974043130874634} +03/05/2022 17:35:15 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/05/2022 17:35:18 - INFO - codeparrot_training - Step 44334: {'lr': 0.0004056720321712436, 'samples': 22699520, 'steps': 44334, 'loss/train': 2.4527041912078857} +03/05/2022 17:35:21 - INFO - codeparrot_training - Step 44335: {'lr': 0.00040566787976726665, 'samples': 22700032, 'steps': 44335, 'loss/train': 2.197052001953125} +03/05/2022 17:35:23 - INFO - codeparrot_training - Skipping example with length 71 (seq_length=1024) +03/05/2022 17:35:26 - INFO - codeparrot_training - Step 44336: {'lr': 0.00040566372729314813, 'samples': 22700544, 'steps': 44336, 'loss/train': 1.222960114479065} +03/05/2022 17:35:29 - INFO - codeparrot_training - Step 44337: {'lr': 0.00040565957474889, 'samples': 22701056, 'steps': 44337, 'loss/train': 1.3258520364761353} +03/05/2022 17:35:32 - INFO - codeparrot_training - Skipping example with length 124 (seq_length=1024) +03/05/2022 17:35:35 - INFO - codeparrot_training - Step 44338: {'lr': 0.000405655422134494, 'samples': 22701568, 'steps': 44338, 'loss/train': 2.017596483230591} +03/05/2022 17:35:38 - INFO - codeparrot_training - Step 44339: {'lr': 0.0004056512694499621, 'samples': 22702080, 'steps': 44339, 'loss/train': 1.9503427743911743} +03/05/2022 17:35:40 - INFO - codeparrot_training - Skipping example with length 826 (seq_length=1024) +03/05/2022 17:35:43 - INFO - codeparrot_training - Step 44340: {'lr': 0.0004056471166952961, 'samples': 22702592, 'steps': 44340, 'loss/train': 1.949346899986267} +03/05/2022 17:35:46 - INFO - codeparrot_training - Step 44341: {'lr': 0.0004056429638704979, 'samples': 22703104, 'steps': 44341, 'loss/train': 2.3849599361419678} +03/05/2022 17:35:48 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/05/2022 17:35:51 - INFO - codeparrot_training - Step 44342: {'lr': 0.0004056388109755695, 'samples': 22703616, 'steps': 44342, 'loss/train': 1.7819781303405762} +03/05/2022 17:35:55 - INFO - codeparrot_training - Step 44343: {'lr': 0.0004056346580105126, 'samples': 22704128, 'steps': 44343, 'loss/train': 1.524506688117981} +03/05/2022 17:35:57 - INFO - codeparrot_training - Skipping example with length 426 (seq_length=1024) +03/05/2022 17:36:00 - INFO - codeparrot_training - Step 44344: {'lr': 0.00040563050497532905, 'samples': 22704640, 'steps': 44344, 'loss/train': 1.819542407989502} +03/05/2022 17:36:03 - INFO - codeparrot_training - Step 44345: {'lr': 0.00040562635187002083, 'samples': 22705152, 'steps': 44345, 'loss/train': 1.957534909248352} +03/05/2022 17:36:05 - INFO - codeparrot_training - Skipping example with length 858 (seq_length=1024) +03/05/2022 17:36:09 - INFO - codeparrot_training - Step 44346: {'lr': 0.0004056221986945898, 'samples': 22705664, 'steps': 44346, 'loss/train': 1.6556142568588257} +03/05/2022 17:36:12 - INFO - codeparrot_training - Step 44347: {'lr': 0.0004056180454490378, 'samples': 22706176, 'steps': 44347, 'loss/train': 0.5252249836921692} +03/05/2022 17:36:14 - INFO - codeparrot_training - Skipping example with length 163 (seq_length=1024) +03/05/2022 17:36:17 - INFO - codeparrot_training - Step 44348: {'lr': 0.00040561389213336673, 'samples': 22706688, 'steps': 44348, 'loss/train': 1.2725273370742798} +03/05/2022 17:36:20 - INFO - codeparrot_training - Step 44349: {'lr': 0.00040560973874757844, 'samples': 22707200, 'steps': 44349, 'loss/train': 1.342358946800232} +03/05/2022 17:36:23 - INFO - codeparrot_training - Skipping example with length 984 (seq_length=1024) +03/05/2022 17:36:26 - INFO - codeparrot_training - Step 44350: {'lr': 0.0004056055852916748, 'samples': 22707712, 'steps': 44350, 'loss/train': 3.893906831741333} +03/05/2022 17:36:29 - INFO - codeparrot_training - Step 44351: {'lr': 0.0004056014317656577, 'samples': 22708224, 'steps': 44351, 'loss/train': 1.267573356628418} +03/05/2022 17:36:32 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/05/2022 17:36:34 - INFO - codeparrot_training - Step 44352: {'lr': 0.00040559727816952897, 'samples': 22708736, 'steps': 44352, 'loss/train': 1.8451135158538818} +03/05/2022 17:36:37 - INFO - codeparrot_training - Step 44353: {'lr': 0.0004055931245032904, 'samples': 22709248, 'steps': 44353, 'loss/train': 1.0475108623504639} +03/05/2022 17:36:40 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/05/2022 17:36:43 - INFO - codeparrot_training - Step 44354: {'lr': 0.0004055889707669441, 'samples': 22709760, 'steps': 44354, 'loss/train': 1.7637732028961182} +03/05/2022 17:36:46 - INFO - codeparrot_training - Step 44355: {'lr': 0.0004055848169604919, 'samples': 22710272, 'steps': 44355, 'loss/train': 1.0791488885879517} +03/05/2022 17:36:49 - INFO - codeparrot_training - Skipping example with length 514 (seq_length=1024) +03/05/2022 17:36:51 - INFO - codeparrot_training - Step 44356: {'lr': 0.00040558066308393536, 'samples': 22710784, 'steps': 44356, 'loss/train': 0.8122849464416504} +03/05/2022 17:36:54 - INFO - codeparrot_training - Step 44357: {'lr': 0.0004055765091372767, 'samples': 22711296, 'steps': 44357, 'loss/train': 1.8502172231674194} +03/05/2022 17:36:57 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/05/2022 17:37:00 - INFO - codeparrot_training - Step 44358: {'lr': 0.0004055723551205177, 'samples': 22711808, 'steps': 44358, 'loss/train': 1.2112109661102295} +03/05/2022 17:37:03 - INFO - codeparrot_training - Step 44359: {'lr': 0.0004055682010336601, 'samples': 22712320, 'steps': 44359, 'loss/train': 1.5805211067199707} +03/05/2022 17:37:06 - INFO - codeparrot_training - Skipping example with length 350 (seq_length=1024) +03/05/2022 17:37:08 - INFO - codeparrot_training - Step 44360: {'lr': 0.0004055640468767059, 'samples': 22712832, 'steps': 44360, 'loss/train': 1.5246962308883667} +03/05/2022 17:37:11 - INFO - codeparrot_training - Step 44361: {'lr': 0.000405559892649657, 'samples': 22713344, 'steps': 44361, 'loss/train': 2.3209383487701416} +03/05/2022 17:37:14 - INFO - codeparrot_training - Skipping example with length 397 (seq_length=1024) +03/05/2022 17:37:17 - INFO - codeparrot_training - Step 44362: {'lr': 0.00040555573835251513, 'samples': 22713856, 'steps': 44362, 'loss/train': 0.39647552371025085} +03/05/2022 17:37:20 - INFO - codeparrot_training - Step 44363: {'lr': 0.00040555158398528237, 'samples': 22714368, 'steps': 44363, 'loss/train': 1.816434383392334} +03/05/2022 17:37:23 - INFO - codeparrot_training - Skipping example with length 744 (seq_length=1024) +03/05/2022 17:37:25 - INFO - codeparrot_training - Step 44364: {'lr': 0.0004055474295479603, 'samples': 22714880, 'steps': 44364, 'loss/train': 1.4701064825057983} +03/05/2022 17:37:28 - INFO - codeparrot_training - Step 44365: {'lr': 0.00040554327504055106, 'samples': 22715392, 'steps': 44365, 'loss/train': 1.8862247467041016} +03/05/2022 17:37:31 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/05/2022 17:37:34 - INFO - codeparrot_training - Step 44366: {'lr': 0.0004055391204630564, 'samples': 22715904, 'steps': 44366, 'loss/train': 1.5576316118240356} +03/05/2022 17:37:37 - INFO - codeparrot_training - Step 44367: {'lr': 0.0004055349658154782, 'samples': 22716416, 'steps': 44367, 'loss/train': 1.4703962802886963} +03/05/2022 17:37:39 - INFO - codeparrot_training - Skipping example with length 591 (seq_length=1024) +03/05/2022 17:37:42 - INFO - codeparrot_training - Step 44368: {'lr': 0.00040553081109781844, 'samples': 22716928, 'steps': 44368, 'loss/train': 1.6526309251785278} +03/05/2022 17:37:45 - INFO - codeparrot_training - Step 44369: {'lr': 0.0004055266563100788, 'samples': 22717440, 'steps': 44369, 'loss/train': 1.5990407466888428} +03/05/2022 17:37:48 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/05/2022 17:37:51 - INFO - codeparrot_training - Step 44370: {'lr': 0.00040552250145226124, 'samples': 22717952, 'steps': 44370, 'loss/train': 1.7175841331481934} +03/05/2022 17:37:54 - INFO - codeparrot_training - Step 44371: {'lr': 0.0004055183465243676, 'samples': 22718464, 'steps': 44371, 'loss/train': 3.3338072299957275} +03/05/2022 17:37:56 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/05/2022 17:37:59 - INFO - codeparrot_training - Step 44372: {'lr': 0.0004055141915263999, 'samples': 22718976, 'steps': 44372, 'loss/train': 1.2665890455245972} +03/05/2022 17:38:02 - INFO - codeparrot_training - Step 44373: {'lr': 0.0004055100364583598, 'samples': 22719488, 'steps': 44373, 'loss/train': 2.001713752746582} +03/05/2022 17:38:05 - INFO - codeparrot_training - Skipping example with length 61 (seq_length=1024) +03/05/2022 17:38:07 - INFO - codeparrot_training - Step 44374: {'lr': 0.0004055058813202493, 'samples': 22720000, 'steps': 44374, 'loss/train': 0.5432716608047485} +03/05/2022 17:38:11 - INFO - codeparrot_training - Step 44375: {'lr': 0.0004055017261120704, 'samples': 22720512, 'steps': 44375, 'loss/train': 1.7799867391586304} +03/05/2022 17:38:13 - INFO - codeparrot_training - Skipping example with length 566 (seq_length=1024) +03/05/2022 17:38:16 - INFO - codeparrot_training - Step 44376: {'lr': 0.00040549757083382465, 'samples': 22721024, 'steps': 44376, 'loss/train': 1.4332581758499146} +03/05/2022 17:38:19 - INFO - codeparrot_training - Step 44377: {'lr': 0.00040549341548551415, 'samples': 22721536, 'steps': 44377, 'loss/train': 1.5444945096969604} +03/05/2022 17:38:21 - INFO - codeparrot_training - Skipping example with length 491 (seq_length=1024) +03/05/2022 17:38:24 - INFO - codeparrot_training - Step 44378: {'lr': 0.0004054892600671407, 'samples': 22722048, 'steps': 44378, 'loss/train': 0.9422029852867126} +03/05/2022 17:38:28 - INFO - codeparrot_training - Step 44379: {'lr': 0.00040548510457870623, 'samples': 22722560, 'steps': 44379, 'loss/train': 1.5879135131835938} +03/05/2022 17:38:30 - INFO - codeparrot_training - Skipping example with length 195 (seq_length=1024) +03/05/2022 17:38:33 - INFO - codeparrot_training - Step 44380: {'lr': 0.00040548094902021257, 'samples': 22723072, 'steps': 44380, 'loss/train': 1.230666160583496} +03/05/2022 17:38:36 - INFO - codeparrot_training - Step 44381: {'lr': 0.00040547679339166155, 'samples': 22723584, 'steps': 44381, 'loss/train': 1.8003977537155151} +03/05/2022 17:38:39 - INFO - codeparrot_training - Skipping example with length 237 (seq_length=1024) +03/05/2022 17:38:41 - INFO - codeparrot_training - Step 44382: {'lr': 0.0004054726376930551, 'samples': 22724096, 'steps': 44382, 'loss/train': 1.00468111038208} +03/05/2022 17:38:45 - INFO - codeparrot_training - Step 44383: {'lr': 0.0004054684819243951, 'samples': 22724608, 'steps': 44383, 'loss/train': 1.1212267875671387} +03/05/2022 17:38:47 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/05/2022 17:38:50 - INFO - codeparrot_training - Step 44384: {'lr': 0.0004054643260856834, 'samples': 22725120, 'steps': 44384, 'loss/train': 2.0551135540008545} +03/05/2022 17:38:53 - INFO - codeparrot_training - Step 44385: {'lr': 0.00040546017017692183, 'samples': 22725632, 'steps': 44385, 'loss/train': 2.2011301517486572} +03/05/2022 17:38:55 - INFO - codeparrot_training - Skipping example with length 905 (seq_length=1024) +03/05/2022 17:38:58 - INFO - codeparrot_training - Step 44386: {'lr': 0.00040545601419811236, 'samples': 22726144, 'steps': 44386, 'loss/train': 1.9660754203796387} +03/05/2022 17:39:01 - INFO - codeparrot_training - Step 44387: {'lr': 0.00040545185814925676, 'samples': 22726656, 'steps': 44387, 'loss/train': 1.4196072816848755} +03/05/2022 17:39:04 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/05/2022 17:39:07 - INFO - codeparrot_training - Step 44388: {'lr': 0.00040544770203035705, 'samples': 22727168, 'steps': 44388, 'loss/train': 1.6499969959259033} +03/05/2022 17:39:10 - INFO - codeparrot_training - Step 44389: {'lr': 0.0004054435458414149, 'samples': 22727680, 'steps': 44389, 'loss/train': 1.765320897102356} +03/05/2022 17:39:12 - INFO - codeparrot_training - Skipping example with length 296 (seq_length=1024) +03/05/2022 17:39:15 - INFO - codeparrot_training - Step 44390: {'lr': 0.0004054393895824323, 'samples': 22728192, 'steps': 44390, 'loss/train': 2.1852099895477295} +03/05/2022 17:39:18 - INFO - codeparrot_training - Step 44391: {'lr': 0.00040543523325341116, 'samples': 22728704, 'steps': 44391, 'loss/train': 1.5233827829360962} +03/05/2022 17:39:21 - INFO - codeparrot_training - Skipping example with length 376 (seq_length=1024) +03/05/2022 17:39:24 - INFO - codeparrot_training - Step 44392: {'lr': 0.0004054310768543532, 'samples': 22729216, 'steps': 44392, 'loss/train': 1.9495404958724976} +03/05/2022 17:39:27 - INFO - codeparrot_training - Step 44393: {'lr': 0.00040542692038526054, 'samples': 22729728, 'steps': 44393, 'loss/train': 1.916357398033142} +03/05/2022 17:39:29 - INFO - codeparrot_training - Skipping example with length 547 (seq_length=1024) +03/05/2022 17:39:32 - INFO - codeparrot_training - Step 44394: {'lr': 0.0004054227638461348, 'samples': 22730240, 'steps': 44394, 'loss/train': 1.5309916734695435} +03/05/2022 17:39:35 - INFO - codeparrot_training - Step 44395: {'lr': 0.000405418607236978, 'samples': 22730752, 'steps': 44395, 'loss/train': 1.7917648553848267} +03/05/2022 17:39:38 - INFO - codeparrot_training - Skipping example with length 295 (seq_length=1024) +03/05/2022 17:39:41 - INFO - codeparrot_training - Step 44396: {'lr': 0.00040541445055779197, 'samples': 22731264, 'steps': 44396, 'loss/train': 1.0602810382843018} +03/05/2022 17:39:44 - INFO - codeparrot_training - Step 44397: {'lr': 0.0004054102938085786, 'samples': 22731776, 'steps': 44397, 'loss/train': 1.4008042812347412} +03/05/2022 17:39:47 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/05/2022 17:39:49 - INFO - codeparrot_training - Step 44398: {'lr': 0.0004054061369893397, 'samples': 22732288, 'steps': 44398, 'loss/train': 1.741020917892456} +03/05/2022 17:39:52 - INFO - codeparrot_training - Step 44399: {'lr': 0.0004054019801000772, 'samples': 22732800, 'steps': 44399, 'loss/train': 1.0626550912857056} +03/05/2022 17:39:56 - INFO - codeparrot_training - Step 44400: {'lr': 0.00040539782314079304, 'samples': 22733312, 'steps': 44400, 'loss/train': 1.6804600954055786} +03/05/2022 17:39:56 - INFO - codeparrot_training - Skipping example with length 60 (seq_length=1024) +03/05/2022 17:40:01 - INFO - codeparrot_training - Step 44401: {'lr': 0.000405393666111489, 'samples': 22733824, 'steps': 44401, 'loss/train': 1.4758058786392212} +03/05/2022 17:40:04 - INFO - codeparrot_training - Step 44402: {'lr': 0.0004053895090121669, 'samples': 22734336, 'steps': 44402, 'loss/train': 1.4195891618728638} +03/05/2022 17:40:04 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/05/2022 17:40:09 - INFO - codeparrot_training - Step 44403: {'lr': 0.00040538535184282877, 'samples': 22734848, 'steps': 44403, 'loss/train': 1.4407435655593872} +03/05/2022 17:40:12 - INFO - codeparrot_training - Skipping example with length 859 (seq_length=1024) +03/05/2022 17:40:15 - INFO - codeparrot_training - Step 44404: {'lr': 0.00040538119460347636, 'samples': 22735360, 'steps': 44404, 'loss/train': 1.682533860206604} +03/05/2022 17:40:18 - INFO - codeparrot_training - Step 44405: {'lr': 0.0004053770372941116, 'samples': 22735872, 'steps': 44405, 'loss/train': 2.895479440689087} +03/05/2022 17:40:21 - INFO - codeparrot_training - Step 44406: {'lr': 0.00040537287991473627, 'samples': 22736384, 'steps': 44406, 'loss/train': 1.7776925563812256} +03/05/2022 17:40:22 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/05/2022 17:40:27 - INFO - codeparrot_training - Step 44407: {'lr': 0.0004053687224653524, 'samples': 22736896, 'steps': 44407, 'loss/train': 0.8730904459953308} +03/05/2022 17:40:30 - INFO - codeparrot_training - Step 44408: {'lr': 0.0004053645649459617, 'samples': 22737408, 'steps': 44408, 'loss/train': 1.8097878694534302} +03/05/2022 17:40:30 - INFO - codeparrot_training - Skipping example with length 347 (seq_length=1024) +03/05/2022 17:40:35 - INFO - codeparrot_training - Step 44409: {'lr': 0.0004053604073565662, 'samples': 22737920, 'steps': 44409, 'loss/train': 1.6748708486557007} +03/05/2022 17:40:38 - INFO - codeparrot_training - Step 44410: {'lr': 0.0004053562496971677, 'samples': 22738432, 'steps': 44410, 'loss/train': 1.4018229246139526} +03/05/2022 17:40:38 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/05/2022 17:40:43 - INFO - codeparrot_training - Step 44411: {'lr': 0.00040535209196776803, 'samples': 22738944, 'steps': 44411, 'loss/train': 1.7083628177642822} +03/05/2022 17:40:47 - INFO - codeparrot_training - Step 44412: {'lr': 0.00040534793416836915, 'samples': 22739456, 'steps': 44412, 'loss/train': 1.480506420135498} +03/05/2022 17:40:47 - INFO - codeparrot_training - Skipping example with length 923 (seq_length=1024) +03/05/2022 17:40:52 - INFO - codeparrot_training - Step 44413: {'lr': 0.00040534377629897276, 'samples': 22739968, 'steps': 44413, 'loss/train': 1.6851592063903809} +03/05/2022 17:40:55 - INFO - codeparrot_training - Step 44414: {'lr': 0.000405339618359581, 'samples': 22740480, 'steps': 44414, 'loss/train': 0.8146441578865051} +03/05/2022 17:40:55 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/05/2022 17:41:00 - INFO - codeparrot_training - Step 44415: {'lr': 0.0004053354603501956, 'samples': 22740992, 'steps': 44415, 'loss/train': 2.245663642883301} +03/05/2022 17:41:03 - INFO - codeparrot_training - Skipping example with length 821 (seq_length=1024) +03/05/2022 17:41:06 - INFO - codeparrot_training - Step 44416: {'lr': 0.0004053313022708184, 'samples': 22741504, 'steps': 44416, 'loss/train': 1.7629432678222656} +03/05/2022 17:41:09 - INFO - codeparrot_training - Step 44417: {'lr': 0.00040532714412145135, 'samples': 22742016, 'steps': 44417, 'loss/train': 2.186368227005005} +03/05/2022 17:41:12 - INFO - codeparrot_training - Skipping example with length 588 (seq_length=1024) +03/05/2022 17:41:14 - INFO - codeparrot_training - Step 44418: {'lr': 0.0004053229859020962, 'samples': 22742528, 'steps': 44418, 'loss/train': 1.169312834739685} +03/05/2022 17:41:17 - INFO - codeparrot_training - Step 44419: {'lr': 0.00040531882761275496, 'samples': 22743040, 'steps': 44419, 'loss/train': 1.262188196182251} +03/05/2022 17:41:20 - INFO - codeparrot_training - Skipping example with length 736 (seq_length=1024) +03/05/2022 17:41:23 - INFO - codeparrot_training - Step 44420: {'lr': 0.00040531466925342947, 'samples': 22743552, 'steps': 44420, 'loss/train': 1.795200228691101} +03/05/2022 17:41:26 - INFO - codeparrot_training - Step 44421: {'lr': 0.0004053105108241216, 'samples': 22744064, 'steps': 44421, 'loss/train': 1.1145625114440918} +03/05/2022 17:41:29 - INFO - codeparrot_training - Step 44422: {'lr': 0.0004053063523248331, 'samples': 22744576, 'steps': 44422, 'loss/train': 1.5458983182907104} +03/05/2022 17:41:29 - INFO - codeparrot_training - Skipping example with length 339 (seq_length=1024) +03/05/2022 17:41:34 - INFO - codeparrot_training - Step 44423: {'lr': 0.0004053021937555661, 'samples': 22745088, 'steps': 44423, 'loss/train': 1.9506492614746094} +03/05/2022 17:41:37 - INFO - codeparrot_training - Step 44424: {'lr': 0.00040529803511632224, 'samples': 22745600, 'steps': 44424, 'loss/train': 2.1964194774627686} +03/05/2022 17:41:37 - INFO - codeparrot_training - Skipping example with length 791 (seq_length=1024) +03/05/2022 17:41:43 - INFO - codeparrot_training - Step 44425: {'lr': 0.0004052938764071035, 'samples': 22746112, 'steps': 44425, 'loss/train': 1.9158647060394287} +03/05/2022 17:41:46 - INFO - codeparrot_training - Step 44426: {'lr': 0.00040528971762791177, 'samples': 22746624, 'steps': 44426, 'loss/train': 1.4619933366775513} +03/05/2022 17:41:46 - INFO - codeparrot_training - Skipping example with length 831 (seq_length=1024) +03/05/2022 17:41:51 - INFO - codeparrot_training - Step 44427: {'lr': 0.0004052855587787488, 'samples': 22747136, 'steps': 44427, 'loss/train': 2.0949246883392334} +03/05/2022 17:41:54 - INFO - codeparrot_training - Step 44428: {'lr': 0.0004052813998596167, 'samples': 22747648, 'steps': 44428, 'loss/train': 0.8605336546897888} +03/05/2022 17:41:54 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/05/2022 17:42:00 - INFO - codeparrot_training - Step 44429: {'lr': 0.0004052772408705171, 'samples': 22748160, 'steps': 44429, 'loss/train': 1.686609148979187} +03/05/2022 17:42:03 - INFO - codeparrot_training - Skipping example with length 200 (seq_length=1024) +03/05/2022 17:42:05 - INFO - codeparrot_training - Step 44430: {'lr': 0.000405273081811452, 'samples': 22748672, 'steps': 44430, 'loss/train': 1.2404718399047852} +03/05/2022 17:42:08 - INFO - codeparrot_training - Step 44431: {'lr': 0.0004052689226824232, 'samples': 22749184, 'steps': 44431, 'loss/train': 1.9039555788040161} +03/05/2022 17:42:11 - INFO - codeparrot_training - Skipping example with length 528 (seq_length=1024) +03/05/2022 17:42:13 - INFO - codeparrot_training - Step 44432: {'lr': 0.0004052647634834327, 'samples': 22749696, 'steps': 44432, 'loss/train': 1.4113500118255615} +03/05/2022 17:42:17 - INFO - codeparrot_training - Step 44433: {'lr': 0.00040526060421448216, 'samples': 22750208, 'steps': 44433, 'loss/train': 1.552963137626648} +03/05/2022 17:42:19 - INFO - codeparrot_training - Skipping example with length 727 (seq_length=1024) +03/05/2022 17:42:22 - INFO - codeparrot_training - Step 44434: {'lr': 0.00040525644487557366, 'samples': 22750720, 'steps': 44434, 'loss/train': 1.4932875633239746} +03/05/2022 17:42:25 - INFO - codeparrot_training - Step 44435: {'lr': 0.000405252285466709, 'samples': 22751232, 'steps': 44435, 'loss/train': 1.7451249361038208} +03/05/2022 17:42:28 - INFO - codeparrot_training - Skipping example with length 239 (seq_length=1024) +03/05/2022 17:42:30 - INFO - codeparrot_training - Step 44436: {'lr': 0.0004052481259878901, 'samples': 22751744, 'steps': 44436, 'loss/train': 2.5450356006622314} +03/05/2022 17:42:34 - INFO - codeparrot_training - Step 44437: {'lr': 0.00040524396643911874, 'samples': 22752256, 'steps': 44437, 'loss/train': 1.4278934001922607} +03/05/2022 17:42:36 - INFO - codeparrot_training - Skipping example with length 769 (seq_length=1024) +03/05/2022 17:42:39 - INFO - codeparrot_training - Step 44438: {'lr': 0.00040523980682039684, 'samples': 22752768, 'steps': 44438, 'loss/train': 1.8868465423583984} +03/05/2022 17:42:42 - INFO - codeparrot_training - Step 44439: {'lr': 0.00040523564713172634, 'samples': 22753280, 'steps': 44439, 'loss/train': 1.4967641830444336} +03/05/2022 17:42:45 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/05/2022 17:42:47 - INFO - codeparrot_training - Step 44440: {'lr': 0.000405231487373109, 'samples': 22753792, 'steps': 44440, 'loss/train': 1.5777785778045654} +03/05/2022 17:42:50 - INFO - codeparrot_training - Step 44441: {'lr': 0.00040522732754454674, 'samples': 22754304, 'steps': 44441, 'loss/train': 1.3023382425308228} +03/05/2022 17:42:53 - INFO - codeparrot_training - Skipping example with length 21 (seq_length=1024) +03/05/2022 17:42:56 - INFO - codeparrot_training - Step 44442: {'lr': 0.0004052231676460415, 'samples': 22754816, 'steps': 44442, 'loss/train': 1.3849462270736694} +03/05/2022 17:42:59 - INFO - codeparrot_training - Step 44443: {'lr': 0.000405219007677595, 'samples': 22755328, 'steps': 44443, 'loss/train': 1.744574785232544} +03/05/2022 17:43:01 - INFO - codeparrot_training - Skipping example with length 668 (seq_length=1024) +03/05/2022 17:43:04 - INFO - codeparrot_training - Step 44444: {'lr': 0.0004052148476392093, 'samples': 22755840, 'steps': 44444, 'loss/train': 0.7807927131652832} +03/05/2022 17:43:07 - INFO - codeparrot_training - Step 44445: {'lr': 0.00040521068753088615, 'samples': 22756352, 'steps': 44445, 'loss/train': 1.7067683935165405} +03/05/2022 17:43:10 - INFO - codeparrot_training - Skipping example with length 477 (seq_length=1024) +03/05/2022 17:43:13 - INFO - codeparrot_training - Step 44446: {'lr': 0.0004052065273526274, 'samples': 22756864, 'steps': 44446, 'loss/train': 0.7421154379844666} +03/05/2022 17:43:16 - INFO - codeparrot_training - Step 44447: {'lr': 0.0004052023671044351, 'samples': 22757376, 'steps': 44447, 'loss/train': 1.2303489446640015} +03/05/2022 17:43:18 - INFO - codeparrot_training - Skipping example with length 608 (seq_length=1024) +03/05/2022 17:43:21 - INFO - codeparrot_training - Step 44448: {'lr': 0.0004051982067863109, 'samples': 22757888, 'steps': 44448, 'loss/train': 1.8352282047271729} +03/05/2022 17:43:25 - INFO - codeparrot_training - Step 44449: {'lr': 0.0004051940463982569, 'samples': 22758400, 'steps': 44449, 'loss/train': 2.184796094894409} +03/05/2022 17:43:27 - INFO - codeparrot_training - Skipping example with length 442 (seq_length=1024) +03/05/2022 17:43:30 - INFO - codeparrot_training - Step 44450: {'lr': 0.0004051898859402748, 'samples': 22758912, 'steps': 44450, 'loss/train': 1.5347357988357544} +03/05/2022 17:43:33 - INFO - codeparrot_training - Step 44451: {'lr': 0.00040518572541236653, 'samples': 22759424, 'steps': 44451, 'loss/train': 1.9890998601913452} +03/05/2022 17:43:36 - INFO - codeparrot_training - Skipping example with length 464 (seq_length=1024) +03/05/2022 17:43:38 - INFO - codeparrot_training - Step 44452: {'lr': 0.00040518156481453397, 'samples': 22759936, 'steps': 44452, 'loss/train': 1.9883694648742676} +03/05/2022 17:43:41 - INFO - codeparrot_training - Step 44453: {'lr': 0.0004051774041467789, 'samples': 22760448, 'steps': 44453, 'loss/train': 1.8207718133926392} +03/05/2022 17:43:44 - INFO - codeparrot_training - Skipping example with length 561 (seq_length=1024) +03/05/2022 17:43:47 - INFO - codeparrot_training - Step 44454: {'lr': 0.00040517324340910347, 'samples': 22760960, 'steps': 44454, 'loss/train': 1.9154024124145508} +03/05/2022 17:43:50 - INFO - codeparrot_training - Step 44455: {'lr': 0.0004051690826015092, 'samples': 22761472, 'steps': 44455, 'loss/train': 2.128943681716919} +03/05/2022 17:43:52 - INFO - codeparrot_training - Skipping example with length 290 (seq_length=1024) +03/05/2022 17:43:55 - INFO - codeparrot_training - Step 44456: {'lr': 0.0004051649217239982, 'samples': 22761984, 'steps': 44456, 'loss/train': 1.8793174028396606} +03/05/2022 17:43:58 - INFO - codeparrot_training - Step 44457: {'lr': 0.00040516076077657233, 'samples': 22762496, 'steps': 44457, 'loss/train': 1.732297658920288} +03/05/2022 17:44:01 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/05/2022 17:44:04 - INFO - codeparrot_training - Step 44458: {'lr': 0.0004051565997592334, 'samples': 22763008, 'steps': 44458, 'loss/train': 1.5476731061935425} +03/05/2022 17:44:07 - INFO - codeparrot_training - Step 44459: {'lr': 0.0004051524386719832, 'samples': 22763520, 'steps': 44459, 'loss/train': 1.9968795776367188} +03/05/2022 17:44:09 - INFO - codeparrot_training - Skipping example with length 781 (seq_length=1024) +03/05/2022 17:44:12 - INFO - codeparrot_training - Step 44460: {'lr': 0.0004051482775148238, 'samples': 22764032, 'steps': 44460, 'loss/train': 0.412308007478714} +03/05/2022 17:44:15 - INFO - codeparrot_training - Step 44461: {'lr': 0.00040514411628775695, 'samples': 22764544, 'steps': 44461, 'loss/train': 1.6842602491378784} +03/05/2022 17:44:17 - INFO - codeparrot_training - Skipping example with length 399 (seq_length=1024) +03/05/2022 17:44:21 - INFO - codeparrot_training - Step 44462: {'lr': 0.0004051399549907846, 'samples': 22765056, 'steps': 44462, 'loss/train': 2.3949553966522217} +03/05/2022 17:44:24 - INFO - codeparrot_training - Step 44463: {'lr': 0.0004051357936239085, 'samples': 22765568, 'steps': 44463, 'loss/train': 0.28637149930000305} +03/05/2022 17:44:26 - INFO - codeparrot_training - Skipping example with length 23 (seq_length=1024) +03/05/2022 17:44:29 - INFO - codeparrot_training - Step 44464: {'lr': 0.0004051316321871307, 'samples': 22766080, 'steps': 44464, 'loss/train': 0.5928150415420532} +03/05/2022 17:44:32 - INFO - codeparrot_training - Step 44465: {'lr': 0.0004051274706804529, 'samples': 22766592, 'steps': 44465, 'loss/train': 1.8269596099853516} +03/05/2022 17:44:34 - INFO - codeparrot_training - Skipping example with length 598 (seq_length=1024) +03/05/2022 17:44:37 - INFO - codeparrot_training - Step 44466: {'lr': 0.00040512330910387706, 'samples': 22767104, 'steps': 44466, 'loss/train': 0.8838698863983154} +03/05/2022 17:44:41 - INFO - codeparrot_training - Step 44467: {'lr': 0.0004051191474574051, 'samples': 22767616, 'steps': 44467, 'loss/train': 2.1834588050842285} +03/05/2022 17:44:43 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/05/2022 17:44:46 - INFO - codeparrot_training - Step 44468: {'lr': 0.0004051149857410388, 'samples': 22768128, 'steps': 44468, 'loss/train': 0.7807205319404602} +03/05/2022 17:44:49 - INFO - codeparrot_training - Step 44469: {'lr': 0.00040511082395478014, 'samples': 22768640, 'steps': 44469, 'loss/train': 1.894767165184021} +03/05/2022 17:44:51 - INFO - codeparrot_training - Skipping example with length 549 (seq_length=1024) +03/05/2022 17:44:54 - INFO - codeparrot_training - Step 44470: {'lr': 0.0004051066620986309, 'samples': 22769152, 'steps': 44470, 'loss/train': 1.3567360639572144} +03/05/2022 17:44:58 - INFO - codeparrot_training - Step 44471: {'lr': 0.00040510250017259297, 'samples': 22769664, 'steps': 44471, 'loss/train': 1.066641092300415} +03/05/2022 17:45:00 - INFO - codeparrot_training - Skipping example with length 6 (seq_length=1024) +03/05/2022 17:45:03 - INFO - codeparrot_training - Step 44472: {'lr': 0.0004050983381766683, 'samples': 22770176, 'steps': 44472, 'loss/train': 0.9498510956764221} +03/05/2022 17:45:06 - INFO - codeparrot_training - Step 44473: {'lr': 0.00040509417611085864, 'samples': 22770688, 'steps': 44473, 'loss/train': 1.679363489151001} +03/05/2022 17:45:08 - INFO - codeparrot_training - Skipping example with length 760 (seq_length=1024) +03/05/2022 17:45:11 - INFO - codeparrot_training - Step 44474: {'lr': 0.000405090013975166, 'samples': 22771200, 'steps': 44474, 'loss/train': 1.8203885555267334} +03/05/2022 17:45:15 - INFO - codeparrot_training - Step 44475: {'lr': 0.0004050858517695921, 'samples': 22771712, 'steps': 44475, 'loss/train': 1.4564977884292603} +03/05/2022 17:45:17 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/05/2022 17:45:20 - INFO - codeparrot_training - Step 44476: {'lr': 0.00040508168949413904, 'samples': 22772224, 'steps': 44476, 'loss/train': 1.9062387943267822} +03/05/2022 17:45:23 - INFO - codeparrot_training - Step 44477: {'lr': 0.00040507752714880854, 'samples': 22772736, 'steps': 44477, 'loss/train': 1.298945426940918} +03/05/2022 17:45:25 - INFO - codeparrot_training - Skipping example with length 36 (seq_length=1024) +03/05/2022 17:45:28 - INFO - codeparrot_training - Step 44478: {'lr': 0.0004050733647336024, 'samples': 22773248, 'steps': 44478, 'loss/train': 1.0678116083145142} +03/05/2022 17:45:32 - INFO - codeparrot_training - Step 44479: {'lr': 0.00040506920224852265, 'samples': 22773760, 'steps': 44479, 'loss/train': 1.7555063962936401} +03/05/2022 17:45:34 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/05/2022 17:45:37 - INFO - codeparrot_training - Step 44480: {'lr': 0.0004050650396935711, 'samples': 22774272, 'steps': 44480, 'loss/train': 2.0964548587799072} +03/05/2022 17:45:40 - INFO - codeparrot_training - Step 44481: {'lr': 0.00040506087706874966, 'samples': 22774784, 'steps': 44481, 'loss/train': 1.492376446723938} +03/05/2022 17:45:43 - INFO - codeparrot_training - Skipping example with length 794 (seq_length=1024) +03/05/2022 17:45:45 - INFO - codeparrot_training - Step 44482: {'lr': 0.00040505671437406017, 'samples': 22775296, 'steps': 44482, 'loss/train': 2.030416965484619} +03/05/2022 17:45:49 - INFO - codeparrot_training - Step 44483: {'lr': 0.00040505255160950453, 'samples': 22775808, 'steps': 44483, 'loss/train': 2.010260820388794} +03/05/2022 17:45:51 - INFO - codeparrot_training - Skipping example with length 673 (seq_length=1024) +03/05/2022 17:45:54 - INFO - codeparrot_training - Step 44484: {'lr': 0.00040504838877508464, 'samples': 22776320, 'steps': 44484, 'loss/train': 2.675980806350708} +03/05/2022 17:45:57 - INFO - codeparrot_training - Step 44485: {'lr': 0.0004050442258708022, 'samples': 22776832, 'steps': 44485, 'loss/train': 1.7337102890014648} +03/05/2022 17:46:00 - INFO - codeparrot_training - Step 44486: {'lr': 0.0004050400628966594, 'samples': 22777344, 'steps': 44486, 'loss/train': 1.4738069772720337} +03/05/2022 17:46:00 - INFO - codeparrot_training - Skipping example with length 868 (seq_length=1024) +03/05/2022 17:46:06 - INFO - codeparrot_training - Step 44487: {'lr': 0.0004050358998526578, 'samples': 22777856, 'steps': 44487, 'loss/train': 2.0928032398223877} +03/05/2022 17:46:09 - INFO - codeparrot_training - Step 44488: {'lr': 0.00040503173673879945, 'samples': 22778368, 'steps': 44488, 'loss/train': 1.773941993713379} +03/05/2022 17:46:09 - INFO - codeparrot_training - Skipping example with length 207 (seq_length=1024) +03/05/2022 17:46:14 - INFO - codeparrot_training - Step 44489: {'lr': 0.00040502757355508626, 'samples': 22778880, 'steps': 44489, 'loss/train': 2.341097116470337} +03/05/2022 17:46:17 - INFO - codeparrot_training - Step 44490: {'lr': 0.00040502341030152, 'samples': 22779392, 'steps': 44490, 'loss/train': 1.0215282440185547} +03/05/2022 17:46:18 - INFO - codeparrot_training - Skipping example with length 882 (seq_length=1024) +03/05/2022 17:46:23 - INFO - codeparrot_training - Step 44491: {'lr': 0.0004050192469781025, 'samples': 22779904, 'steps': 44491, 'loss/train': 1.4235018491744995} +03/05/2022 17:46:26 - INFO - codeparrot_training - Step 44492: {'lr': 0.00040501508358483583, 'samples': 22780416, 'steps': 44492, 'loss/train': 1.6066782474517822} +03/05/2022 17:46:27 - INFO - codeparrot_training - Skipping example with length 104 (seq_length=1024) +03/05/2022 17:46:31 - INFO - codeparrot_training - Step 44493: {'lr': 0.00040501092012172173, 'samples': 22780928, 'steps': 44493, 'loss/train': 0.7041665315628052} +03/05/2022 17:46:34 - INFO - codeparrot_training - Step 44494: {'lr': 0.0004050067565887621, 'samples': 22781440, 'steps': 44494, 'loss/train': 2.097938060760498} +03/05/2022 17:46:35 - INFO - codeparrot_training - Skipping example with length 360 (seq_length=1024) +03/05/2022 17:46:40 - INFO - codeparrot_training - Step 44495: {'lr': 0.00040500259298595874, 'samples': 22781952, 'steps': 44495, 'loss/train': 1.8860644102096558} +03/05/2022 17:46:43 - INFO - codeparrot_training - Step 44496: {'lr': 0.00040499842931331374, 'samples': 22782464, 'steps': 44496, 'loss/train': 1.9293296337127686} +03/05/2022 17:46:44 - INFO - codeparrot_training - Skipping example with length 167 (seq_length=1024) +03/05/2022 17:46:48 - INFO - codeparrot_training - Step 44497: {'lr': 0.0004049942655708287, 'samples': 22782976, 'steps': 44497, 'loss/train': 1.6213634014129639} +03/05/2022 17:46:51 - INFO - codeparrot_training - Step 44498: {'lr': 0.0004049901017585058, 'samples': 22783488, 'steps': 44498, 'loss/train': 1.051899790763855} +03/05/2022 17:46:52 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/05/2022 17:46:56 - INFO - codeparrot_training - Step 44499: {'lr': 0.00040498593787634664, 'samples': 22784000, 'steps': 44499, 'loss/train': 1.6054790019989014} +03/05/2022 17:46:59 - INFO - codeparrot_training - Step 44500: {'lr': 0.0004049817739243532, 'samples': 22784512, 'steps': 44500, 'loss/train': 4.172098159790039} +03/05/2022 17:47:00 - INFO - codeparrot_training - Skipping example with length 383 (seq_length=1024) +03/05/2022 17:47:05 - INFO - codeparrot_training - Step 44501: {'lr': 0.0004049776099025274, 'samples': 22785024, 'steps': 44501, 'loss/train': 1.3041694164276123} +03/05/2022 17:47:08 - INFO - codeparrot_training - Step 44502: {'lr': 0.000404973445810871, 'samples': 22785536, 'steps': 44502, 'loss/train': 1.8268110752105713} +03/05/2022 17:47:09 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/05/2022 17:47:14 - INFO - codeparrot_training - Step 44503: {'lr': 0.00040496928164938614, 'samples': 22786048, 'steps': 44503, 'loss/train': 2.2680890560150146} +03/05/2022 17:47:17 - INFO - codeparrot_training - Step 44504: {'lr': 0.0004049651174180744, 'samples': 22786560, 'steps': 44504, 'loss/train': 1.6472947597503662} +03/05/2022 17:47:20 - INFO - codeparrot_training - Step 44505: {'lr': 0.00040496095311693775, 'samples': 22787072, 'steps': 44505, 'loss/train': 1.658734917640686} +03/05/2022 17:47:20 - INFO - codeparrot_training - Skipping example with length 876 (seq_length=1024) +03/05/2022 17:47:25 - INFO - codeparrot_training - Step 44506: {'lr': 0.0004049567887459781, 'samples': 22787584, 'steps': 44506, 'loss/train': 1.7881848812103271} +03/05/2022 17:47:29 - INFO - codeparrot_training - Step 44507: {'lr': 0.0004049526243051973, 'samples': 22788096, 'steps': 44507, 'loss/train': 2.472562074661255} +03/05/2022 17:47:29 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/05/2022 17:47:34 - INFO - codeparrot_training - Step 44508: {'lr': 0.0004049484597945973, 'samples': 22788608, 'steps': 44508, 'loss/train': 2.091200828552246} +03/05/2022 17:47:37 - INFO - codeparrot_training - Step 44509: {'lr': 0.00040494429521417983, 'samples': 22789120, 'steps': 44509, 'loss/train': 1.7041828632354736} +03/05/2022 17:47:37 - INFO - codeparrot_training - Skipping example with length 902 (seq_length=1024) +03/05/2022 17:47:43 - INFO - codeparrot_training - Step 44510: {'lr': 0.0004049401305639469, 'samples': 22789632, 'steps': 44510, 'loss/train': 2.0982089042663574} +03/05/2022 17:47:46 - INFO - codeparrot_training - Step 44511: {'lr': 0.00040493596584390034, 'samples': 22790144, 'steps': 44511, 'loss/train': 1.921012043952942} +03/05/2022 17:47:47 - INFO - codeparrot_training - Skipping example with length 105 (seq_length=1024) +03/05/2022 17:47:51 - INFO - codeparrot_training - Step 44512: {'lr': 0.00040493180105404203, 'samples': 22790656, 'steps': 44512, 'loss/train': 1.5537750720977783} +03/05/2022 17:47:54 - INFO - codeparrot_training - Step 44513: {'lr': 0.0004049276361943738, 'samples': 22791168, 'steps': 44513, 'loss/train': 1.5890827178955078} +03/05/2022 17:47:55 - INFO - codeparrot_training - Skipping example with length 1012 (seq_length=1024) +03/05/2022 17:47:59 - INFO - codeparrot_training - Step 44514: {'lr': 0.0004049234712648976, 'samples': 22791680, 'steps': 44514, 'loss/train': 1.277435064315796} +03/05/2022 17:48:03 - INFO - codeparrot_training - Step 44515: {'lr': 0.00040491930626561525, 'samples': 22792192, 'steps': 44515, 'loss/train': 2.004828929901123} +03/05/2022 17:48:03 - INFO - codeparrot_training - Skipping example with length 515 (seq_length=1024) +03/05/2022 17:48:08 - INFO - codeparrot_training - Step 44516: {'lr': 0.00040491514119652875, 'samples': 22792704, 'steps': 44516, 'loss/train': 1.8087157011032104} +03/05/2022 17:48:11 - INFO - codeparrot_training - Step 44517: {'lr': 0.00040491097605763974, 'samples': 22793216, 'steps': 44517, 'loss/train': 1.6327451467514038} +03/05/2022 17:48:11 - INFO - codeparrot_training - Skipping example with length 385 (seq_length=1024) +03/05/2022 17:48:16 - INFO - codeparrot_training - Step 44518: {'lr': 0.00040490681084895034, 'samples': 22793728, 'steps': 44518, 'loss/train': 2.096229076385498} +03/05/2022 17:48:19 - INFO - codeparrot_training - Step 44519: {'lr': 0.00040490264557046217, 'samples': 22794240, 'steps': 44519, 'loss/train': 0.5677652359008789} +03/05/2022 17:48:20 - INFO - codeparrot_training - Skipping example with length 336 (seq_length=1024) +03/05/2022 17:48:25 - INFO - codeparrot_training - Step 44520: {'lr': 0.0004048984802221774, 'samples': 22794752, 'steps': 44520, 'loss/train': 2.47418475151062} +03/05/2022 17:48:28 - INFO - codeparrot_training - Step 44521: {'lr': 0.0004048943148040977, 'samples': 22795264, 'steps': 44521, 'loss/train': 1.6227787733078003} +03/05/2022 17:48:28 - INFO - codeparrot_training - Skipping example with length 327 (seq_length=1024) +03/05/2022 17:48:33 - INFO - codeparrot_training - Step 44522: {'lr': 0.0004048901493162251, 'samples': 22795776, 'steps': 44522, 'loss/train': 1.6965560913085938} +03/05/2022 17:48:36 - INFO - codeparrot_training - Step 44523: {'lr': 0.00040488598375856133, 'samples': 22796288, 'steps': 44523, 'loss/train': 1.7054929733276367} +03/05/2022 17:48:36 - INFO - codeparrot_training - Skipping example with length 616 (seq_length=1024) +03/05/2022 17:48:42 - INFO - codeparrot_training - Step 44524: {'lr': 0.0004048818181311083, 'samples': 22796800, 'steps': 44524, 'loss/train': 2.8673338890075684} +03/05/2022 17:48:45 - INFO - codeparrot_training - Step 44525: {'lr': 0.00040487765243386794, 'samples': 22797312, 'steps': 44525, 'loss/train': 1.1606882810592651} +03/05/2022 17:48:45 - INFO - codeparrot_training - Skipping example with length 416 (seq_length=1024) +03/05/2022 17:48:50 - INFO - codeparrot_training - Step 44526: {'lr': 0.0004048734866668421, 'samples': 22797824, 'steps': 44526, 'loss/train': 1.677412986755371} +03/05/2022 17:48:53 - INFO - codeparrot_training - Step 44527: {'lr': 0.0004048693208300327, 'samples': 22798336, 'steps': 44527, 'loss/train': 1.0053110122680664} +03/05/2022 17:48:53 - INFO - codeparrot_training - Skipping example with length 778 (seq_length=1024) +03/05/2022 17:48:59 - INFO - codeparrot_training - Step 44528: {'lr': 0.00040486515492344145, 'samples': 22798848, 'steps': 44528, 'loss/train': 1.4556457996368408} +03/05/2022 17:49:02 - INFO - codeparrot_training - Step 44529: {'lr': 0.00040486098894707044, 'samples': 22799360, 'steps': 44529, 'loss/train': 1.0868699550628662} +03/05/2022 17:49:02 - INFO - codeparrot_training - Skipping example with length 223 (seq_length=1024) +03/05/2022 17:49:07 - INFO - codeparrot_training - Step 44530: {'lr': 0.00040485682290092144, 'samples': 22799872, 'steps': 44530, 'loss/train': 1.2931398153305054} +03/05/2022 17:49:10 - INFO - codeparrot_training - Step 44531: {'lr': 0.0004048526567849964, 'samples': 22800384, 'steps': 44531, 'loss/train': 1.568174958229065} +03/05/2022 17:49:10 - INFO - codeparrot_training - Skipping example with length 380 (seq_length=1024) +03/05/2022 17:49:15 - INFO - codeparrot_training - Step 44532: {'lr': 0.00040484849059929705, 'samples': 22800896, 'steps': 44532, 'loss/train': 1.634194016456604} +03/05/2022 17:49:19 - INFO - codeparrot_training - Step 44533: {'lr': 0.00040484432434382547, 'samples': 22801408, 'steps': 44533, 'loss/train': 1.099392056465149} +03/05/2022 17:49:19 - INFO - codeparrot_training - Skipping example with length 524 (seq_length=1024) +03/05/2022 17:49:24 - INFO - codeparrot_training - Step 44534: {'lr': 0.0004048401580185833, 'samples': 22801920, 'steps': 44534, 'loss/train': 1.608557105064392} +03/05/2022 17:49:27 - INFO - codeparrot_training - Step 44535: {'lr': 0.00040483599162357257, 'samples': 22802432, 'steps': 44535, 'loss/train': 1.4663537740707397} +03/05/2022 17:49:27 - INFO - codeparrot_training - Skipping example with length 241 (seq_length=1024) +03/05/2022 17:49:32 - INFO - codeparrot_training - Step 44536: {'lr': 0.0004048318251587952, 'samples': 22802944, 'steps': 44536, 'loss/train': 1.623267412185669} +03/05/2022 17:49:36 - INFO - codeparrot_training - Step 44537: {'lr': 0.000404827658624253, 'samples': 22803456, 'steps': 44537, 'loss/train': 1.6842097043991089} +03/05/2022 17:49:36 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/05/2022 17:49:41 - INFO - codeparrot_training - Step 44538: {'lr': 0.00040482349201994785, 'samples': 22803968, 'steps': 44538, 'loss/train': 1.3043562173843384} +03/05/2022 17:49:44 - INFO - codeparrot_training - Step 44539: {'lr': 0.00040481932534588153, 'samples': 22804480, 'steps': 44539, 'loss/train': 1.3490920066833496} +03/05/2022 17:49:44 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/05/2022 17:49:49 - INFO - codeparrot_training - Step 44540: {'lr': 0.00040481515860205607, 'samples': 22804992, 'steps': 44540, 'loss/train': 1.8216818571090698} +03/05/2022 17:49:53 - INFO - codeparrot_training - Step 44541: {'lr': 0.00040481099178847326, 'samples': 22805504, 'steps': 44541, 'loss/train': 1.32760751247406} +03/05/2022 17:49:53 - INFO - codeparrot_training - Skipping example with length 806 (seq_length=1024) +03/05/2022 17:49:58 - INFO - codeparrot_training - Step 44542: {'lr': 0.000404806824905135, 'samples': 22806016, 'steps': 44542, 'loss/train': 1.7145955562591553} +03/05/2022 17:50:01 - INFO - codeparrot_training - Step 44543: {'lr': 0.0004048026579520433, 'samples': 22806528, 'steps': 44543, 'loss/train': 1.6666427850723267} +03/05/2022 17:50:02 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/05/2022 17:50:07 - INFO - codeparrot_training - Step 44544: {'lr': 0.00040479849092919974, 'samples': 22807040, 'steps': 44544, 'loss/train': 1.376805067062378} +03/05/2022 17:50:10 - INFO - codeparrot_training - Step 44545: {'lr': 0.00040479432383660644, 'samples': 22807552, 'steps': 44545, 'loss/train': 1.2782899141311646} +03/05/2022 17:50:10 - INFO - codeparrot_training - Skipping example with length 735 (seq_length=1024) +03/05/2022 17:50:15 - INFO - codeparrot_training - Step 44546: {'lr': 0.00040479015667426523, 'samples': 22808064, 'steps': 44546, 'loss/train': 2.152052402496338} +03/05/2022 17:50:18 - INFO - codeparrot_training - Step 44547: {'lr': 0.00040478598944217794, 'samples': 22808576, 'steps': 44547, 'loss/train': 1.6980112791061401} +03/05/2022 17:50:19 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/05/2022 17:50:24 - INFO - codeparrot_training - Step 44548: {'lr': 0.0004047818221403464, 'samples': 22809088, 'steps': 44548, 'loss/train': 2.021150588989258} +03/05/2022 17:50:27 - INFO - codeparrot_training - Step 44549: {'lr': 0.0004047776547687727, 'samples': 22809600, 'steps': 44549, 'loss/train': 1.5869733095169067} +03/05/2022 17:50:27 - INFO - codeparrot_training - Skipping example with length 857 (seq_length=1024) +03/05/2022 17:50:32 - INFO - codeparrot_training - Step 44550: {'lr': 0.00040477348732745853, 'samples': 22810112, 'steps': 44550, 'loss/train': 2.583287000656128} +03/05/2022 17:50:36 - INFO - codeparrot_training - Step 44551: {'lr': 0.0004047693198164058, 'samples': 22810624, 'steps': 44551, 'loss/train': 1.6966031789779663} +03/05/2022 17:50:36 - INFO - codeparrot_training - Skipping example with length 310 (seq_length=1024) +03/05/2022 17:50:41 - INFO - codeparrot_training - Step 44552: {'lr': 0.0004047651522356164, 'samples': 22811136, 'steps': 44552, 'loss/train': 2.473597764968872} +03/05/2022 17:50:44 - INFO - codeparrot_training - Step 44553: {'lr': 0.0004047609845850922, 'samples': 22811648, 'steps': 44553, 'loss/train': 1.9031518697738647} +03/05/2022 17:50:45 - INFO - codeparrot_training - Skipping example with length 471 (seq_length=1024) +03/05/2022 17:50:49 - INFO - codeparrot_training - Step 44554: {'lr': 0.0004047568168648351, 'samples': 22812160, 'steps': 44554, 'loss/train': 1.0363484621047974} +03/05/2022 17:50:53 - INFO - codeparrot_training - Step 44555: {'lr': 0.00040475264907484696, 'samples': 22812672, 'steps': 44555, 'loss/train': 1.5753082036972046} +03/05/2022 17:50:53 - INFO - codeparrot_training - Skipping example with length 243 (seq_length=1024) +03/05/2022 17:50:58 - INFO - codeparrot_training - Step 44556: {'lr': 0.0004047484812151296, 'samples': 22813184, 'steps': 44556, 'loss/train': 2.182079792022705} +03/05/2022 17:51:01 - INFO - codeparrot_training - Step 44557: {'lr': 0.00040474431328568506, 'samples': 22813696, 'steps': 44557, 'loss/train': 1.1794798374176025} +03/05/2022 17:51:02 - INFO - codeparrot_training - Skipping example with length 430 (seq_length=1024) +03/05/2022 17:51:06 - INFO - codeparrot_training - Step 44558: {'lr': 0.00040474014528651514, 'samples': 22814208, 'steps': 44558, 'loss/train': 1.146364688873291} +03/05/2022 17:51:10 - INFO - codeparrot_training - Step 44559: {'lr': 0.00040473597721762164, 'samples': 22814720, 'steps': 44559, 'loss/train': 1.7546452283859253} +03/05/2022 17:51:10 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/05/2022 17:51:15 - INFO - codeparrot_training - Step 44560: {'lr': 0.00040473180907900645, 'samples': 22815232, 'steps': 44560, 'loss/train': 0.8381884694099426} +03/05/2022 17:51:18 - INFO - codeparrot_training - Step 44561: {'lr': 0.0004047276408706716, 'samples': 22815744, 'steps': 44561, 'loss/train': 0.7167012095451355} +03/05/2022 17:51:18 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/05/2022 17:51:23 - INFO - codeparrot_training - Step 44562: {'lr': 0.00040472347259261875, 'samples': 22816256, 'steps': 44562, 'loss/train': 2.192063570022583} +03/05/2022 17:51:27 - INFO - codeparrot_training - Step 44563: {'lr': 0.00040471930424485, 'samples': 22816768, 'steps': 44563, 'loss/train': 1.1742388010025024} +03/05/2022 17:51:27 - INFO - codeparrot_training - Skipping example with length 816 (seq_length=1024) +03/05/2022 17:51:32 - INFO - codeparrot_training - Step 44564: {'lr': 0.0004047151358273671, 'samples': 22817280, 'steps': 44564, 'loss/train': 2.216261386871338} +03/05/2022 17:51:35 - INFO - codeparrot_training - Step 44565: {'lr': 0.00040471096734017185, 'samples': 22817792, 'steps': 44565, 'loss/train': 1.5933125019073486} +03/05/2022 17:51:35 - INFO - codeparrot_training - Skipping example with length 563 (seq_length=1024) +03/05/2022 17:51:40 - INFO - codeparrot_training - Step 44566: {'lr': 0.0004047067987832663, 'samples': 22818304, 'steps': 44566, 'loss/train': 0.7027168273925781} +03/05/2022 17:51:43 - INFO - codeparrot_training - Step 44567: {'lr': 0.00040470263015665234, 'samples': 22818816, 'steps': 44567, 'loss/train': 1.7794424295425415} +03/05/2022 17:51:44 - INFO - codeparrot_training - Skipping example with length 20 (seq_length=1024) +03/05/2022 17:51:49 - INFO - codeparrot_training - Step 44568: {'lr': 0.00040469846146033164, 'samples': 22819328, 'steps': 44568, 'loss/train': 1.461206316947937} +03/05/2022 17:51:52 - INFO - codeparrot_training - Step 44569: {'lr': 0.00040469429269430617, 'samples': 22819840, 'steps': 44569, 'loss/train': 0.896517276763916} +03/05/2022 17:51:52 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/05/2022 17:51:57 - INFO - codeparrot_training - Step 44570: {'lr': 0.00040469012385857794, 'samples': 22820352, 'steps': 44570, 'loss/train': 1.209571361541748} +03/05/2022 17:52:00 - INFO - codeparrot_training - Step 44571: {'lr': 0.0004046859549531487, 'samples': 22820864, 'steps': 44571, 'loss/train': 1.5327390432357788} +03/05/2022 17:52:00 - INFO - codeparrot_training - Skipping example with length 577 (seq_length=1024) +03/05/2022 17:52:06 - INFO - codeparrot_training - Step 44572: {'lr': 0.0004046817859780203, 'samples': 22821376, 'steps': 44572, 'loss/train': 1.8687971830368042} +03/05/2022 17:52:09 - INFO - codeparrot_training - Step 44573: {'lr': 0.00040467761693319473, 'samples': 22821888, 'steps': 44573, 'loss/train': 1.6438744068145752} +03/05/2022 17:52:09 - INFO - codeparrot_training - Skipping example with length 114 (seq_length=1024) +03/05/2022 17:52:14 - INFO - codeparrot_training - Step 44574: {'lr': 0.0004046734478186738, 'samples': 22822400, 'steps': 44574, 'loss/train': 1.8528239727020264} +03/05/2022 17:52:17 - INFO - codeparrot_training - Step 44575: {'lr': 0.0004046692786344594, 'samples': 22822912, 'steps': 44575, 'loss/train': 1.8717046976089478} +03/05/2022 17:52:18 - INFO - codeparrot_training - Skipping example with length 250 (seq_length=1024) +03/05/2022 17:52:23 - INFO - codeparrot_training - Step 44576: {'lr': 0.0004046651093805534, 'samples': 22823424, 'steps': 44576, 'loss/train': 1.5234545469284058} +03/05/2022 17:52:26 - INFO - codeparrot_training - Step 44577: {'lr': 0.0004046609400569577, 'samples': 22823936, 'steps': 44577, 'loss/train': 1.6831412315368652} +03/05/2022 17:52:26 - INFO - codeparrot_training - Skipping example with length 723 (seq_length=1024) +03/05/2022 17:52:31 - INFO - codeparrot_training - Step 44578: {'lr': 0.00040465677066367424, 'samples': 22824448, 'steps': 44578, 'loss/train': 1.78238046169281} +03/05/2022 17:52:34 - INFO - codeparrot_training - Step 44579: {'lr': 0.0004046526012007047, 'samples': 22824960, 'steps': 44579, 'loss/train': 1.1103743314743042} +03/05/2022 17:52:35 - INFO - codeparrot_training - Skipping example with length 418 (seq_length=1024) +03/05/2022 17:52:39 - INFO - codeparrot_training - Step 44580: {'lr': 0.0004046484316680511, 'samples': 22825472, 'steps': 44580, 'loss/train': 2.2987775802612305} +03/05/2022 17:52:43 - INFO - codeparrot_training - Step 44581: {'lr': 0.0004046442620657154, 'samples': 22825984, 'steps': 44581, 'loss/train': 1.830420970916748} +03/05/2022 17:52:43 - INFO - codeparrot_training - Skipping example with length 622 (seq_length=1024) +03/05/2022 17:52:48 - INFO - codeparrot_training - Step 44582: {'lr': 0.00040464009239369925, 'samples': 22826496, 'steps': 44582, 'loss/train': 1.899353265762329} +03/05/2022 17:52:51 - INFO - codeparrot_training - Step 44583: {'lr': 0.0004046359226520048, 'samples': 22827008, 'steps': 44583, 'loss/train': 1.379555344581604} +03/05/2022 17:52:52 - INFO - codeparrot_training - Skipping example with length 489 (seq_length=1024) +03/05/2022 17:52:56 - INFO - codeparrot_training - Step 44584: {'lr': 0.0004046317528406337, 'samples': 22827520, 'steps': 44584, 'loss/train': 1.4669467210769653} +03/05/2022 17:53:00 - INFO - codeparrot_training - Step 44585: {'lr': 0.0004046275829595879, 'samples': 22828032, 'steps': 44585, 'loss/train': 2.1820998191833496} +03/05/2022 17:53:00 - INFO - codeparrot_training - Skipping example with length 901 (seq_length=1024) +03/05/2022 17:53:05 - INFO - codeparrot_training - Step 44586: {'lr': 0.0004046234130088694, 'samples': 22828544, 'steps': 44586, 'loss/train': 1.9344310760498047} +03/05/2022 17:53:08 - INFO - codeparrot_training - Step 44587: {'lr': 0.00040461924298847987, 'samples': 22829056, 'steps': 44587, 'loss/train': 0.8054443001747131} +03/05/2022 17:53:08 - INFO - codeparrot_training - Skipping example with length 567 (seq_length=1024) +03/05/2022 17:53:13 - INFO - codeparrot_training - Step 44588: {'lr': 0.0004046150728984214, 'samples': 22829568, 'steps': 44588, 'loss/train': 2.062788724899292} +03/05/2022 17:53:16 - INFO - codeparrot_training - Step 44589: {'lr': 0.00040461090273869566, 'samples': 22830080, 'steps': 44589, 'loss/train': 0.3859576880931854} +03/05/2022 17:53:17 - INFO - codeparrot_training - Skipping example with length 564 (seq_length=1024) +03/05/2022 17:53:22 - INFO - codeparrot_training - Step 44590: {'lr': 0.0004046067325093047, 'samples': 22830592, 'steps': 44590, 'loss/train': 1.8838670253753662} +03/05/2022 17:53:25 - INFO - codeparrot_training - Step 44591: {'lr': 0.00040460256221025025, 'samples': 22831104, 'steps': 44591, 'loss/train': 1.2600703239440918} +03/05/2022 17:53:25 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/05/2022 17:53:30 - INFO - codeparrot_training - Step 44592: {'lr': 0.00040459839184153436, 'samples': 22831616, 'steps': 44592, 'loss/train': 0.9063652753829956} +03/05/2022 17:53:33 - INFO - codeparrot_training - Step 44593: {'lr': 0.00040459422140315876, 'samples': 22832128, 'steps': 44593, 'loss/train': 0.4345346689224243} +03/05/2022 17:53:33 - INFO - codeparrot_training - Skipping example with length 448 (seq_length=1024) +03/05/2022 17:53:38 - INFO - codeparrot_training - Step 44594: {'lr': 0.00040459005089512544, 'samples': 22832640, 'steps': 44594, 'loss/train': 1.607822299003601} +03/05/2022 17:53:42 - INFO - codeparrot_training - Step 44595: {'lr': 0.0004045858803174362, 'samples': 22833152, 'steps': 44595, 'loss/train': 1.5277867317199707} +03/05/2022 17:53:42 - INFO - codeparrot_training - Skipping example with length 594 (seq_length=1024) +03/05/2022 17:53:47 - INFO - codeparrot_training - Step 44596: {'lr': 0.0004045817096700929, 'samples': 22833664, 'steps': 44596, 'loss/train': 1.6344398260116577} +03/05/2022 17:53:50 - INFO - codeparrot_training - Step 44597: {'lr': 0.0004045775389530976, 'samples': 22834176, 'steps': 44597, 'loss/train': 0.3275397717952728} +03/05/2022 17:53:51 - INFO - codeparrot_training - Skipping example with length 950 (seq_length=1024) +03/05/2022 17:53:56 - INFO - codeparrot_training - Step 44598: {'lr': 0.00040457336816645195, 'samples': 22834688, 'steps': 44598, 'loss/train': 1.3535548448562622} +03/05/2022 17:53:59 - INFO - codeparrot_training - Step 44599: {'lr': 0.000404569197310158, 'samples': 22835200, 'steps': 44599, 'loss/train': 0.49079734086990356} +03/05/2022 17:53:59 - INFO - codeparrot_training - Skipping example with length 658 (seq_length=1024) +03/05/2022 17:54:04 - INFO - codeparrot_training - Step 44600: {'lr': 0.0004045650263842174, 'samples': 22835712, 'steps': 44600, 'loss/train': 0.6516032814979553} +03/05/2022 17:54:07 - INFO - codeparrot_training - Step 44601: {'lr': 0.0004045608553886323, 'samples': 22836224, 'steps': 44601, 'loss/train': 1.6543772220611572} +03/05/2022 17:54:08 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/05/2022 17:54:13 - INFO - codeparrot_training - Step 44602: {'lr': 0.0004045566843234044, 'samples': 22836736, 'steps': 44602, 'loss/train': 1.015753149986267} +03/05/2022 17:54:16 - INFO - codeparrot_training - Step 44603: {'lr': 0.0004045525131885357, 'samples': 22837248, 'steps': 44603, 'loss/train': 1.889438271522522} +03/05/2022 17:54:18 - INFO - codeparrot_training - Skipping example with length 584 (seq_length=1024) +03/05/2022 17:54:21 - INFO - codeparrot_training - Step 44604: {'lr': 0.0004045483419840281, 'samples': 22837760, 'steps': 44604, 'loss/train': 2.1295838356018066} +03/05/2022 17:54:24 - INFO - codeparrot_training - Step 44605: {'lr': 0.00040454417070988325, 'samples': 22838272, 'steps': 44605, 'loss/train': 2.057110548019409} +03/05/2022 17:54:26 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/05/2022 17:54:30 - INFO - codeparrot_training - Step 44606: {'lr': 0.0004045399993661033, 'samples': 22838784, 'steps': 44606, 'loss/train': 1.609908103942871} +03/05/2022 17:54:33 - INFO - codeparrot_training - Step 44607: {'lr': 0.00040453582795268994, 'samples': 22839296, 'steps': 44607, 'loss/train': 1.2735306024551392} +03/05/2022 17:54:34 - INFO - codeparrot_training - Skipping example with length 571 (seq_length=1024) +03/05/2022 17:54:38 - INFO - codeparrot_training - Step 44608: {'lr': 0.00040453165646964505, 'samples': 22839808, 'steps': 44608, 'loss/train': 2.1764392852783203} +03/05/2022 17:54:41 - INFO - codeparrot_training - Step 44609: {'lr': 0.00040452748491697074, 'samples': 22840320, 'steps': 44609, 'loss/train': 2.0880839824676514} +03/05/2022 17:54:43 - INFO - codeparrot_training - Skipping example with length 287 (seq_length=1024) +03/05/2022 17:54:46 - INFO - codeparrot_training - Step 44610: {'lr': 0.00040452331329466864, 'samples': 22840832, 'steps': 44610, 'loss/train': 2.4466021060943604} +03/05/2022 17:54:50 - INFO - codeparrot_training - Step 44611: {'lr': 0.0004045191416027407, 'samples': 22841344, 'steps': 44611, 'loss/train': 1.6570193767547607} +03/05/2022 17:54:51 - INFO - codeparrot_training - Skipping example with length 194 (seq_length=1024) +03/05/2022 17:54:55 - INFO - codeparrot_training - Step 44612: {'lr': 0.0004045149698411889, 'samples': 22841856, 'steps': 44612, 'loss/train': 1.5675358772277832} +03/05/2022 17:54:58 - INFO - codeparrot_training - Step 44613: {'lr': 0.000404510798010015, 'samples': 22842368, 'steps': 44613, 'loss/train': 1.6530590057373047} +03/05/2022 17:54:59 - INFO - codeparrot_training - Skipping example with length 130 (seq_length=1024) +03/05/2022 17:55:03 - INFO - codeparrot_training - Step 44614: {'lr': 0.0004045066261092209, 'samples': 22842880, 'steps': 44614, 'loss/train': 1.7235081195831299} +03/05/2022 17:55:06 - INFO - codeparrot_training - Step 44615: {'lr': 0.0004045024541388085, 'samples': 22843392, 'steps': 44615, 'loss/train': 1.8354604244232178} +03/05/2022 17:55:08 - INFO - codeparrot_training - Skipping example with length 419 (seq_length=1024) +03/05/2022 17:55:12 - INFO - codeparrot_training - Step 44616: {'lr': 0.0004044982820987797, 'samples': 22843904, 'steps': 44616, 'loss/train': 1.9907817840576172} +03/05/2022 17:55:15 - INFO - codeparrot_training - Step 44617: {'lr': 0.0004044941099891364, 'samples': 22844416, 'steps': 44617, 'loss/train': 1.4687731266021729} +03/05/2022 17:55:16 - INFO - codeparrot_training - Skipping example with length 541 (seq_length=1024) +03/05/2022 17:55:20 - INFO - codeparrot_training - Step 44618: {'lr': 0.0004044899378098803, 'samples': 22844928, 'steps': 44618, 'loss/train': 3.456974506378174} +03/05/2022 17:55:24 - INFO - codeparrot_training - Step 44619: {'lr': 0.00040448576556101356, 'samples': 22845440, 'steps': 44619, 'loss/train': 1.8188203573226929} +03/05/2022 17:55:25 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/05/2022 17:55:30 - INFO - codeparrot_training - Step 44620: {'lr': 0.0004044815932425379, 'samples': 22845952, 'steps': 44620, 'loss/train': 2.1145966053009033} +03/05/2022 17:55:33 - INFO - codeparrot_training - Step 44621: {'lr': 0.0004044774208544551, 'samples': 22846464, 'steps': 44621, 'loss/train': 1.9617079496383667} +03/05/2022 17:55:36 - INFO - codeparrot_training - Step 44622: {'lr': 0.00040447324839676727, 'samples': 22846976, 'steps': 44622, 'loss/train': 2.4888126850128174} +03/05/2022 17:55:37 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/05/2022 17:55:41 - INFO - codeparrot_training - Step 44623: {'lr': 0.00040446907586947614, 'samples': 22847488, 'steps': 44623, 'loss/train': 1.0431649684906006} +03/05/2022 17:55:44 - INFO - codeparrot_training - Step 44624: {'lr': 0.0004044649032725836, 'samples': 22848000, 'steps': 44624, 'loss/train': 0.9656731486320496} +03/05/2022 17:55:45 - INFO - codeparrot_training - Skipping example with length 854 (seq_length=1024) +03/05/2022 17:55:50 - INFO - codeparrot_training - Step 44625: {'lr': 0.00040446073060609156, 'samples': 22848512, 'steps': 44625, 'loss/train': 2.0026378631591797} +03/05/2022 17:55:53 - INFO - codeparrot_training - Step 44626: {'lr': 0.00040445655787000196, 'samples': 22849024, 'steps': 44626, 'loss/train': 1.5878280401229858} +03/05/2022 17:55:54 - INFO - codeparrot_training - Skipping example with length 698 (seq_length=1024) +03/05/2022 17:55:58 - INFO - codeparrot_training - Step 44627: {'lr': 0.0004044523850643166, 'samples': 22849536, 'steps': 44627, 'loss/train': 1.425322413444519} +03/05/2022 17:56:01 - INFO - codeparrot_training - Step 44628: {'lr': 0.0004044482121890374, 'samples': 22850048, 'steps': 44628, 'loss/train': 1.6432218551635742} +03/05/2022 17:56:02 - INFO - codeparrot_training - Skipping example with length 181 (seq_length=1024) +03/05/2022 17:56:07 - INFO - codeparrot_training - Step 44629: {'lr': 0.00040444403924416614, 'samples': 22850560, 'steps': 44629, 'loss/train': 1.741536021232605} +03/05/2022 17:56:10 - INFO - codeparrot_training - Step 44630: {'lr': 0.00040443986622970486, 'samples': 22851072, 'steps': 44630, 'loss/train': 1.5783207416534424} +03/05/2022 17:56:11 - INFO - codeparrot_training - Skipping example with length 605 (seq_length=1024) +03/05/2022 17:56:15 - INFO - codeparrot_training - Step 44631: {'lr': 0.0004044356931456553, 'samples': 22851584, 'steps': 44631, 'loss/train': 2.2249045372009277} +03/05/2022 17:56:19 - INFO - codeparrot_training - Step 44632: {'lr': 0.00040443151999201946, 'samples': 22852096, 'steps': 44632, 'loss/train': 0.5955970883369446} +03/05/2022 17:56:22 - INFO - codeparrot_training - Step 44633: {'lr': 0.00040442734676879907, 'samples': 22852608, 'steps': 44633, 'loss/train': 1.5316474437713623} +03/05/2022 17:56:22 - INFO - codeparrot_training - Skipping example with length 245 (seq_length=1024) +03/05/2022 17:56:27 - INFO - codeparrot_training - Step 44634: {'lr': 0.0004044231734759961, 'samples': 22853120, 'steps': 44634, 'loss/train': 1.938407301902771} +03/05/2022 17:56:30 - INFO - codeparrot_training - Step 44635: {'lr': 0.00040441900011361256, 'samples': 22853632, 'steps': 44635, 'loss/train': 1.0903067588806152} +03/05/2022 17:56:30 - INFO - codeparrot_training - Skipping example with length 181 (seq_length=1024) +03/05/2022 17:56:36 - INFO - codeparrot_training - Step 44636: {'lr': 0.0004044148266816501, 'samples': 22854144, 'steps': 44636, 'loss/train': 2.070053815841675} +03/05/2022 17:56:39 - INFO - codeparrot_training - Step 44637: {'lr': 0.0004044106531801107, 'samples': 22854656, 'steps': 44637, 'loss/train': 1.3121535778045654} +03/05/2022 17:56:39 - INFO - codeparrot_training - Skipping example with length 438 (seq_length=1024) +03/05/2022 17:56:45 - INFO - codeparrot_training - Step 44638: {'lr': 0.0004044064796089963, 'samples': 22855168, 'steps': 44638, 'loss/train': 0.9306910037994385} +03/05/2022 17:56:48 - INFO - codeparrot_training - Step 44639: {'lr': 0.0004044023059683087, 'samples': 22855680, 'steps': 44639, 'loss/train': 2.1859676837921143} +03/05/2022 17:56:49 - INFO - codeparrot_training - Skipping example with length 613 (seq_length=1024) +03/05/2022 17:56:53 - INFO - codeparrot_training - Step 44640: {'lr': 0.00040439813225804977, 'samples': 22856192, 'steps': 44640, 'loss/train': 1.3294568061828613} +03/05/2022 17:56:56 - INFO - codeparrot_training - Step 44641: {'lr': 0.00040439395847822145, 'samples': 22856704, 'steps': 44641, 'loss/train': 1.7839032411575317} +03/05/2022 17:56:57 - INFO - codeparrot_training - Skipping example with length 52 (seq_length=1024) +03/05/2022 17:57:02 - INFO - codeparrot_training - Step 44642: {'lr': 0.00040438978462882557, 'samples': 22857216, 'steps': 44642, 'loss/train': 0.5730567574501038} +03/05/2022 17:57:05 - INFO - codeparrot_training - Step 44643: {'lr': 0.0004043856107098641, 'samples': 22857728, 'steps': 44643, 'loss/train': 2.8155109882354736} +03/05/2022 17:57:06 - INFO - codeparrot_training - Skipping example with length 853 (seq_length=1024) +03/05/2022 17:57:10 - INFO - codeparrot_training - Step 44644: {'lr': 0.0004043814367213388, 'samples': 22858240, 'steps': 44644, 'loss/train': 1.6502482891082764} +03/05/2022 17:57:13 - INFO - codeparrot_training - Step 44645: {'lr': 0.00040437726266325164, 'samples': 22858752, 'steps': 44645, 'loss/train': 2.124210834503174} +03/05/2022 17:57:15 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/05/2022 17:57:19 - INFO - codeparrot_training - Step 44646: {'lr': 0.00040437308853560444, 'samples': 22859264, 'steps': 44646, 'loss/train': 1.5798161029815674} +03/05/2022 17:57:22 - INFO - codeparrot_training - Step 44647: {'lr': 0.0004043689143383991, 'samples': 22859776, 'steps': 44647, 'loss/train': 1.7014262676239014} +03/05/2022 17:57:23 - INFO - codeparrot_training - Skipping example with length 365 (seq_length=1024) +03/05/2022 17:57:27 - INFO - codeparrot_training - Step 44648: {'lr': 0.00040436474007163754, 'samples': 22860288, 'steps': 44648, 'loss/train': 1.972253680229187} +03/05/2022 17:57:30 - INFO - codeparrot_training - Step 44649: {'lr': 0.0004043605657353216, 'samples': 22860800, 'steps': 44649, 'loss/train': 1.939107060432434} +03/05/2022 17:57:31 - INFO - codeparrot_training - Skipping example with length 1000 (seq_length=1024) +03/05/2022 17:57:35 - INFO - codeparrot_training - Step 44650: {'lr': 0.00040435639132945314, 'samples': 22861312, 'steps': 44650, 'loss/train': 1.3760052919387817} +03/05/2022 17:57:39 - INFO - codeparrot_training - Step 44651: {'lr': 0.0004043522168540341, 'samples': 22861824, 'steps': 44651, 'loss/train': 1.3720450401306152} +03/05/2022 17:57:40 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/05/2022 17:57:44 - INFO - codeparrot_training - Step 44652: {'lr': 0.0004043480423090664, 'samples': 22862336, 'steps': 44652, 'loss/train': 1.9588886499404907} +03/05/2022 17:57:47 - INFO - codeparrot_training - Step 44653: {'lr': 0.0004043438676945518, 'samples': 22862848, 'steps': 44653, 'loss/train': 1.4956998825073242} +03/05/2022 17:57:48 - INFO - codeparrot_training - Skipping example with length 487 (seq_length=1024) +03/05/2022 17:57:52 - INFO - codeparrot_training - Step 44654: {'lr': 0.0004043396930104922, 'samples': 22863360, 'steps': 44654, 'loss/train': 1.8448140621185303} +03/05/2022 17:57:55 - INFO - codeparrot_training - Step 44655: {'lr': 0.0004043355182568895, 'samples': 22863872, 'steps': 44655, 'loss/train': 1.2778677940368652} +03/05/2022 17:57:57 - INFO - codeparrot_training - Skipping example with length 387 (seq_length=1024) +03/05/2022 17:58:01 - INFO - codeparrot_training - Step 44656: {'lr': 0.00040433134343374565, 'samples': 22864384, 'steps': 44656, 'loss/train': 2.1149306297302246} +03/05/2022 17:58:04 - INFO - codeparrot_training - Step 44657: {'lr': 0.0004043271685410625, 'samples': 22864896, 'steps': 44657, 'loss/train': 1.0620455741882324} +03/05/2022 17:58:05 - INFO - codeparrot_training - Skipping example with length 318 (seq_length=1024) +03/05/2022 17:58:09 - INFO - codeparrot_training - Step 44658: {'lr': 0.00040432299357884185, 'samples': 22865408, 'steps': 44658, 'loss/train': 1.153825283050537} +03/05/2022 17:58:13 - INFO - codeparrot_training - Step 44659: {'lr': 0.0004043188185470856, 'samples': 22865920, 'steps': 44659, 'loss/train': 1.006649136543274} +03/05/2022 17:58:14 - INFO - codeparrot_training - Skipping example with length 894 (seq_length=1024) +03/05/2022 17:58:18 - INFO - codeparrot_training - Step 44660: {'lr': 0.00040431464344579585, 'samples': 22866432, 'steps': 44660, 'loss/train': 1.7236055135726929} +03/05/2022 17:58:21 - INFO - codeparrot_training - Step 44661: {'lr': 0.00040431046827497415, 'samples': 22866944, 'steps': 44661, 'loss/train': 1.9976431131362915} +03/05/2022 17:58:22 - INFO - codeparrot_training - Skipping example with length 928 (seq_length=1024) +03/05/2022 17:58:26 - INFO - codeparrot_training - Step 44662: {'lr': 0.00040430629303462256, 'samples': 22867456, 'steps': 44662, 'loss/train': 2.4302759170532227} +03/05/2022 17:58:29 - INFO - codeparrot_training - Step 44663: {'lr': 0.000404302117724743, 'samples': 22867968, 'steps': 44663, 'loss/train': 2.144680976867676} +03/05/2022 17:58:31 - INFO - codeparrot_training - Skipping example with length 338 (seq_length=1024) +03/05/2022 17:58:34 - INFO - codeparrot_training - Step 44664: {'lr': 0.00040429794234533726, 'samples': 22868480, 'steps': 44664, 'loss/train': 2.2752296924591064} +03/05/2022 17:58:38 - INFO - codeparrot_training - Step 44665: {'lr': 0.0004042937668964072, 'samples': 22868992, 'steps': 44665, 'loss/train': 1.6671215295791626} +03/05/2022 17:58:39 - INFO - codeparrot_training - Skipping example with length 991 (seq_length=1024) +03/05/2022 17:58:43 - INFO - codeparrot_training - Step 44666: {'lr': 0.00040428959137795475, 'samples': 22869504, 'steps': 44666, 'loss/train': 1.5526199340820312} +03/05/2022 17:58:46 - INFO - codeparrot_training - Step 44667: {'lr': 0.0004042854157899818, 'samples': 22870016, 'steps': 44667, 'loss/train': 1.8484325408935547} +03/05/2022 17:58:48 - INFO - codeparrot_training - Skipping example with length 562 (seq_length=1024) +03/05/2022 17:58:52 - INFO - codeparrot_training - Step 44668: {'lr': 0.0004042812401324902, 'samples': 22870528, 'steps': 44668, 'loss/train': 1.860159993171692} +03/05/2022 17:58:55 - INFO - codeparrot_training - Step 44669: {'lr': 0.0004042770644054819, 'samples': 22871040, 'steps': 44669, 'loss/train': 1.5979058742523193} +03/05/2022 17:58:57 - INFO - codeparrot_training - Skipping example with length 307 (seq_length=1024) +03/05/2022 17:59:00 - INFO - codeparrot_training - Step 44670: {'lr': 0.0004042728886089587, 'samples': 22871552, 'steps': 44670, 'loss/train': 1.4401867389678955} +03/05/2022 17:59:03 - INFO - codeparrot_training - Step 44671: {'lr': 0.00040426871274292257, 'samples': 22872064, 'steps': 44671, 'loss/train': 1.4919852018356323} +03/05/2022 17:59:05 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/05/2022 17:59:08 - INFO - codeparrot_training - Step 44672: {'lr': 0.00040426453680737534, 'samples': 22872576, 'steps': 44672, 'loss/train': 1.7116501331329346} +03/05/2022 17:59:12 - INFO - codeparrot_training - Step 44673: {'lr': 0.0004042603608023189, 'samples': 22873088, 'steps': 44673, 'loss/train': 1.9249149560928345} +03/05/2022 17:59:14 - INFO - codeparrot_training - Skipping example with length 940 (seq_length=1024) +03/05/2022 17:59:17 - INFO - codeparrot_training - Step 44674: {'lr': 0.00040425618472775504, 'samples': 22873600, 'steps': 44674, 'loss/train': 1.7871631383895874} +03/05/2022 17:59:20 - INFO - codeparrot_training - Step 44675: {'lr': 0.0004042520085836857, 'samples': 22874112, 'steps': 44675, 'loss/train': 1.433358073234558} +03/05/2022 17:59:22 - INFO - codeparrot_training - Skipping example with length 952 (seq_length=1024) +03/05/2022 17:59:26 - INFO - codeparrot_training - Step 44676: {'lr': 0.0004042478323701129, 'samples': 22874624, 'steps': 44676, 'loss/train': 0.9810197353363037} +03/05/2022 17:59:29 - INFO - codeparrot_training - Step 44677: {'lr': 0.00040424365608703836, 'samples': 22875136, 'steps': 44677, 'loss/train': 1.318591833114624} +03/05/2022 17:59:31 - INFO - codeparrot_training - Skipping example with length 602 (seq_length=1024) +03/05/2022 17:59:34 - INFO - codeparrot_training - Step 44678: {'lr': 0.00040423947973446404, 'samples': 22875648, 'steps': 44678, 'loss/train': 1.9786490201950073} +03/05/2022 17:59:37 - INFO - codeparrot_training - Step 44679: {'lr': 0.00040423530331239177, 'samples': 22876160, 'steps': 44679, 'loss/train': 2.048063039779663} +03/05/2022 17:59:39 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/05/2022 17:59:43 - INFO - codeparrot_training - Step 44680: {'lr': 0.0004042311268208234, 'samples': 22876672, 'steps': 44680, 'loss/train': 1.1694409847259521} +03/05/2022 17:59:46 - INFO - codeparrot_training - Step 44681: {'lr': 0.00040422695025976084, 'samples': 22877184, 'steps': 44681, 'loss/train': 1.512878179550171} +03/05/2022 17:59:48 - INFO - codeparrot_training - Skipping example with length 284 (seq_length=1024) +03/05/2022 17:59:51 - INFO - codeparrot_training - Step 44682: {'lr': 0.00040422277362920614, 'samples': 22877696, 'steps': 44682, 'loss/train': 1.7980293035507202} +03/05/2022 17:59:54 - INFO - codeparrot_training - Step 44683: {'lr': 0.0004042185969291609, 'samples': 22878208, 'steps': 44683, 'loss/train': 1.7321280241012573} +03/05/2022 17:59:57 - INFO - codeparrot_training - Skipping example with length 279 (seq_length=1024) +03/05/2022 18:00:00 - INFO - codeparrot_training - Step 44684: {'lr': 0.00040421442015962727, 'samples': 22878720, 'steps': 44684, 'loss/train': 1.7772185802459717} +03/05/2022 18:00:03 - INFO - codeparrot_training - Step 44685: {'lr': 0.0004042102433206069, 'samples': 22879232, 'steps': 44685, 'loss/train': 1.0352022647857666} +03/05/2022 18:00:05 - INFO - codeparrot_training - Skipping example with length 40 (seq_length=1024) +03/05/2022 18:00:08 - INFO - codeparrot_training - Step 44686: {'lr': 0.0004042060664121018, 'samples': 22879744, 'steps': 44686, 'loss/train': 0.8618285059928894} +03/05/2022 18:00:11 - INFO - codeparrot_training - Step 44687: {'lr': 0.00040420188943411385, 'samples': 22880256, 'steps': 44687, 'loss/train': 1.0239217281341553} +03/05/2022 18:00:14 - INFO - codeparrot_training - Skipping example with length 943 (seq_length=1024) +03/05/2022 18:00:17 - INFO - codeparrot_training - Step 44688: {'lr': 0.0004041977123866448, 'samples': 22880768, 'steps': 44688, 'loss/train': 1.0650559663772583} +03/05/2022 18:00:20 - INFO - codeparrot_training - Step 44689: {'lr': 0.0004041935352696968, 'samples': 22881280, 'steps': 44689, 'loss/train': 0.9315967559814453} +03/05/2022 18:00:23 - INFO - codeparrot_training - Skipping example with length 878 (seq_length=1024) +03/05/2022 18:00:25 - INFO - codeparrot_training - Step 44690: {'lr': 0.00040418935808327153, 'samples': 22881792, 'steps': 44690, 'loss/train': 1.9698084592819214} +03/05/2022 18:00:28 - INFO - codeparrot_training - Step 44691: {'lr': 0.00040418518082737087, 'samples': 22882304, 'steps': 44691, 'loss/train': 3.1027791500091553} +03/05/2022 18:00:31 - INFO - codeparrot_training - Skipping example with length 478 (seq_length=1024) +03/05/2022 18:00:33 - INFO - codeparrot_training - Step 44692: {'lr': 0.0004041810035019967, 'samples': 22882816, 'steps': 44692, 'loss/train': 1.7663195133209229} +03/05/2022 18:00:37 - INFO - codeparrot_training - Step 44693: {'lr': 0.00040417682610715107, 'samples': 22883328, 'steps': 44693, 'loss/train': 0.7426781058311462} +03/05/2022 18:00:39 - INFO - codeparrot_training - Skipping example with length 244 (seq_length=1024) +03/05/2022 18:00:42 - INFO - codeparrot_training - Step 44694: {'lr': 0.00040417264864283563, 'samples': 22883840, 'steps': 44694, 'loss/train': 1.9624840021133423} +03/05/2022 18:00:45 - INFO - codeparrot_training - Step 44695: {'lr': 0.00040416847110905243, 'samples': 22884352, 'steps': 44695, 'loss/train': 2.138732671737671} +03/05/2022 18:00:48 - INFO - codeparrot_training - Skipping example with length 818 (seq_length=1024) +03/05/2022 18:00:50 - INFO - codeparrot_training - Step 44696: {'lr': 0.0004041642935058033, 'samples': 22884864, 'steps': 44696, 'loss/train': 0.8881983757019043} +03/05/2022 18:00:53 - INFO - codeparrot_training - Step 44697: {'lr': 0.0004041601158330901, 'samples': 22885376, 'steps': 44697, 'loss/train': 1.7319632768630981} +03/05/2022 18:00:56 - INFO - codeparrot_training - Skipping example with length 738 (seq_length=1024) +03/05/2022 18:00:59 - INFO - codeparrot_training - Step 44698: {'lr': 0.0004041559380909148, 'samples': 22885888, 'steps': 44698, 'loss/train': 1.7242436408996582} +03/05/2022 18:01:02 - INFO - codeparrot_training - Step 44699: {'lr': 0.00040415176027927915, 'samples': 22886400, 'steps': 44699, 'loss/train': 1.6091818809509277} +03/05/2022 18:01:05 - INFO - codeparrot_training - Step 44700: {'lr': 0.00040414758239818506, 'samples': 22886912, 'steps': 44700, 'loss/train': 1.5661252737045288} +03/05/2022 18:01:06 - INFO - codeparrot_training - Skipping example with length 951 (seq_length=1024) +03/05/2022 18:01:11 - INFO - codeparrot_training - Step 44701: {'lr': 0.00040414340444763455, 'samples': 22887424, 'steps': 44701, 'loss/train': 1.155908465385437} +03/05/2022 18:01:14 - INFO - codeparrot_training - Step 44702: {'lr': 0.0004041392264276292, 'samples': 22887936, 'steps': 44702, 'loss/train': 1.9351023435592651} +03/05/2022 18:01:14 - INFO - codeparrot_training - Skipping example with length 958 (seq_length=1024) +03/05/2022 18:01:19 - INFO - codeparrot_training - Step 44703: {'lr': 0.00040413504833817127, 'samples': 22888448, 'steps': 44703, 'loss/train': 1.3385438919067383} +03/05/2022 18:01:22 - INFO - codeparrot_training - Step 44704: {'lr': 0.0004041308701792625, 'samples': 22888960, 'steps': 44704, 'loss/train': 0.7219630479812622} +03/05/2022 18:01:23 - INFO - codeparrot_training - Skipping example with length 208 (seq_length=1024) +03/05/2022 18:01:28 - INFO - codeparrot_training - Step 44705: {'lr': 0.00040412669195090466, 'samples': 22889472, 'steps': 44705, 'loss/train': 1.3339745998382568} +03/05/2022 18:01:31 - INFO - codeparrot_training - Step 44706: {'lr': 0.0004041225136530997, 'samples': 22889984, 'steps': 44706, 'loss/train': 0.8325849771499634} +03/05/2022 18:01:31 - INFO - codeparrot_training - Skipping example with length 299 (seq_length=1024) +03/05/2022 18:01:36 - INFO - codeparrot_training - Step 44707: {'lr': 0.0004041183352858495, 'samples': 22890496, 'steps': 44707, 'loss/train': 1.157829999923706} +03/05/2022 18:01:39 - INFO - codeparrot_training - Step 44708: {'lr': 0.00040411415684915596, 'samples': 22891008, 'steps': 44708, 'loss/train': 1.410601258277893} +03/05/2022 18:01:39 - INFO - codeparrot_training - Skipping example with length 810 (seq_length=1024) +03/05/2022 18:01:44 - INFO - codeparrot_training - Step 44709: {'lr': 0.000404109978343021, 'samples': 22891520, 'steps': 44709, 'loss/train': 1.8412503004074097} +03/05/2022 18:01:48 - INFO - codeparrot_training - Step 44710: {'lr': 0.0004041057997674464, 'samples': 22892032, 'steps': 44710, 'loss/train': 1.4749433994293213} +03/05/2022 18:01:48 - INFO - codeparrot_training - Skipping example with length 201 (seq_length=1024) +03/05/2022 18:01:53 - INFO - codeparrot_training - Step 44711: {'lr': 0.0004041016211224342, 'samples': 22892544, 'steps': 44711, 'loss/train': 1.165158987045288} +03/05/2022 18:01:56 - INFO - codeparrot_training - Step 44712: {'lr': 0.0004040974424079862, 'samples': 22893056, 'steps': 44712, 'loss/train': 1.8320212364196777} +03/05/2022 18:01:57 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/05/2022 18:02:01 - INFO - codeparrot_training - Step 44713: {'lr': 0.00040409326362410416, 'samples': 22893568, 'steps': 44713, 'loss/train': 1.5248128175735474} +03/05/2022 18:02:05 - INFO - codeparrot_training - Step 44714: {'lr': 0.0004040890847707901, 'samples': 22894080, 'steps': 44714, 'loss/train': 1.8229146003723145} +03/05/2022 18:02:05 - INFO - codeparrot_training - Skipping example with length 773 (seq_length=1024) +03/05/2022 18:02:10 - INFO - codeparrot_training - Step 44715: {'lr': 0.0004040849058480459, 'samples': 22894592, 'steps': 44715, 'loss/train': 1.7934163808822632} +03/05/2022 18:02:13 - INFO - codeparrot_training - Step 44716: {'lr': 0.0004040807268558734, 'samples': 22895104, 'steps': 44716, 'loss/train': 2.1351511478424072} +03/05/2022 18:02:14 - INFO - codeparrot_training - Skipping example with length 420 (seq_length=1024) +03/05/2022 18:02:18 - INFO - codeparrot_training - Step 44717: {'lr': 0.0004040765477942745, 'samples': 22895616, 'steps': 44717, 'loss/train': 1.5345141887664795} +03/05/2022 18:02:21 - INFO - codeparrot_training - Step 44718: {'lr': 0.0004040723686632512, 'samples': 22896128, 'steps': 44718, 'loss/train': 1.7860428094863892} +03/05/2022 18:02:22 - INFO - codeparrot_training - Skipping example with length 315 (seq_length=1024) +03/05/2022 18:02:27 - INFO - codeparrot_training - Step 44719: {'lr': 0.00040406818946280514, 'samples': 22896640, 'steps': 44719, 'loss/train': 0.9843380451202393} +03/05/2022 18:02:30 - INFO - codeparrot_training - Step 44720: {'lr': 0.0004040640101929384, 'samples': 22897152, 'steps': 44720, 'loss/train': 1.941428780555725} +03/05/2022 18:02:30 - INFO - codeparrot_training - Skipping example with length 458 (seq_length=1024) +03/05/2022 18:02:35 - INFO - codeparrot_training - Step 44721: {'lr': 0.0004040598308536527, 'samples': 22897664, 'steps': 44721, 'loss/train': 1.6175614595413208} +03/05/2022 18:02:38 - INFO - codeparrot_training - Step 44722: {'lr': 0.0004040556514449501, 'samples': 22898176, 'steps': 44722, 'loss/train': 2.392336368560791} +03/05/2022 18:02:38 - INFO - codeparrot_training - Skipping example with length 407 (seq_length=1024) +03/05/2022 18:02:43 - INFO - codeparrot_training - Step 44723: {'lr': 0.0004040514719668324, 'samples': 22898688, 'steps': 44723, 'loss/train': 1.6978230476379395} +03/05/2022 18:02:47 - INFO - codeparrot_training - Step 44724: {'lr': 0.00040404729241930144, 'samples': 22899200, 'steps': 44724, 'loss/train': 1.69320809841156} +03/05/2022 18:02:47 - INFO - codeparrot_training - Skipping example with length 214 (seq_length=1024) +03/05/2022 18:02:52 - INFO - codeparrot_training - Step 44725: {'lr': 0.0004040431128023592, 'samples': 22899712, 'steps': 44725, 'loss/train': 2.0603320598602295} +03/05/2022 18:02:55 - INFO - codeparrot_training - Step 44726: {'lr': 0.0004040389331160075, 'samples': 22900224, 'steps': 44726, 'loss/train': 2.0827510356903076} +03/05/2022 18:02:56 - INFO - codeparrot_training - Skipping example with length 863 (seq_length=1024) +03/05/2022 18:03:00 - INFO - codeparrot_training - Step 44727: {'lr': 0.00040403475336024816, 'samples': 22900736, 'steps': 44727, 'loss/train': 1.467716932296753} +03/05/2022 18:03:04 - INFO - codeparrot_training - Step 44728: {'lr': 0.0004040305735350832, 'samples': 22901248, 'steps': 44728, 'loss/train': 0.8228468894958496} +03/05/2022 18:03:04 - INFO - codeparrot_training - Skipping example with length 113 (seq_length=1024) +03/05/2022 18:03:09 - INFO - codeparrot_training - Step 44729: {'lr': 0.00040402639364051443, 'samples': 22901760, 'steps': 44729, 'loss/train': 1.2233939170837402} +03/05/2022 18:03:12 - INFO - codeparrot_training - Step 44730: {'lr': 0.0004040222136765437, 'samples': 22902272, 'steps': 44730, 'loss/train': 2.7464325428009033} +03/05/2022 18:03:13 - INFO - codeparrot_training - Skipping example with length 947 (seq_length=1024) +03/05/2022 18:03:17 - INFO - codeparrot_training - Step 44731: {'lr': 0.000404018033643173, 'samples': 22902784, 'steps': 44731, 'loss/train': 1.8208836317062378} +03/05/2022 18:03:21 - INFO - codeparrot_training - Step 44732: {'lr': 0.00040401385354040415, 'samples': 22903296, 'steps': 44732, 'loss/train': 1.7777717113494873} +03/05/2022 18:03:21 - INFO - codeparrot_training - Skipping example with length 439 (seq_length=1024) +03/05/2022 18:03:26 - INFO - codeparrot_training - Step 44733: {'lr': 0.00040400967336823903, 'samples': 22903808, 'steps': 44733, 'loss/train': 1.181457281112671} +03/05/2022 18:03:29 - INFO - codeparrot_training - Step 44734: {'lr': 0.0004040054931266795, 'samples': 22904320, 'steps': 44734, 'loss/train': 1.2707637548446655} +03/05/2022 18:03:29 - INFO - codeparrot_training - Skipping example with length 532 (seq_length=1024) +03/05/2022 18:03:34 - INFO - codeparrot_training - Step 44735: {'lr': 0.0004040013128157275, 'samples': 22904832, 'steps': 44735, 'loss/train': 1.1931248903274536} +03/05/2022 18:03:38 - INFO - codeparrot_training - Step 44736: {'lr': 0.00040399713243538483, 'samples': 22905344, 'steps': 44736, 'loss/train': 1.4922715425491333} +03/05/2022 18:03:38 - INFO - codeparrot_training - Skipping example with length 47 (seq_length=1024) +03/05/2022 18:03:43 - INFO - codeparrot_training - Step 44737: {'lr': 0.00040399295198565344, 'samples': 22905856, 'steps': 44737, 'loss/train': 1.8285335302352905} +03/05/2022 18:03:46 - INFO - codeparrot_training - Step 44738: {'lr': 0.0004039887714665352, 'samples': 22906368, 'steps': 44738, 'loss/train': 1.448887825012207} +03/05/2022 18:03:46 - INFO - codeparrot_training - Skipping example with length 570 (seq_length=1024) +03/05/2022 18:03:51 - INFO - codeparrot_training - Step 44739: {'lr': 0.0004039845908780321, 'samples': 22906880, 'steps': 44739, 'loss/train': 1.2528409957885742} +03/05/2022 18:03:55 - INFO - codeparrot_training - Step 44740: {'lr': 0.00040398041022014585, 'samples': 22907392, 'steps': 44740, 'loss/train': 1.7456319332122803} +03/05/2022 18:03:55 - INFO - codeparrot_training - Skipping example with length 706 (seq_length=1024) +03/05/2022 18:04:00 - INFO - codeparrot_training - Step 44741: {'lr': 0.0004039762294928784, 'samples': 22907904, 'steps': 44741, 'loss/train': 1.465722680091858} +03/05/2022 18:04:03 - INFO - codeparrot_training - Step 44742: {'lr': 0.0004039720486962316, 'samples': 22908416, 'steps': 44742, 'loss/train': 1.469164490699768} +03/05/2022 18:04:03 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/05/2022 18:04:08 - INFO - codeparrot_training - Step 44743: {'lr': 0.00040396786783020747, 'samples': 22908928, 'steps': 44743, 'loss/train': 1.6062458753585815} +03/05/2022 18:04:11 - INFO - codeparrot_training - Step 44744: {'lr': 0.00040396368689480766, 'samples': 22909440, 'steps': 44744, 'loss/train': 2.29386043548584} +03/05/2022 18:04:11 - INFO - codeparrot_training - Skipping example with length 985 (seq_length=1024) +03/05/2022 18:04:17 - INFO - codeparrot_training - Step 44745: {'lr': 0.00040395950589003425, 'samples': 22909952, 'steps': 44745, 'loss/train': 2.72452712059021} +03/05/2022 18:04:20 - INFO - codeparrot_training - Step 44746: {'lr': 0.00040395532481588914, 'samples': 22910464, 'steps': 44746, 'loss/train': 1.93293297290802} +03/05/2022 18:04:20 - INFO - codeparrot_training - Skipping example with length 953 (seq_length=1024) +03/05/2022 18:04:25 - INFO - codeparrot_training - Step 44747: {'lr': 0.00040395114367237407, 'samples': 22910976, 'steps': 44747, 'loss/train': 2.1386055946350098} +03/05/2022 18:04:28 - INFO - codeparrot_training - Step 44748: {'lr': 0.00040394696245949093, 'samples': 22911488, 'steps': 44748, 'loss/train': 1.6204466819763184} +03/05/2022 18:04:29 - INFO - codeparrot_training - Skipping example with length 913 (seq_length=1024) +03/05/2022 18:04:34 - INFO - codeparrot_training - Step 44749: {'lr': 0.0004039427811772417, 'samples': 22912000, 'steps': 44749, 'loss/train': 1.7062934637069702} +03/05/2022 18:04:37 - INFO - codeparrot_training - Step 44750: {'lr': 0.0004039385998256283, 'samples': 22912512, 'steps': 44750, 'loss/train': 1.934909701347351} +03/05/2022 18:04:37 - INFO - codeparrot_training - Skipping example with length 1001 (seq_length=1024) +03/05/2022 18:04:43 - INFO - codeparrot_training - Step 44751: {'lr': 0.0004039344184046525, 'samples': 22913024, 'steps': 44751, 'loss/train': 1.1866395473480225} +03/05/2022 18:04:46 - INFO - codeparrot_training - Step 44752: {'lr': 0.00040393023691431617, 'samples': 22913536, 'steps': 44752, 'loss/train': 1.999855399131775} +03/05/2022 18:04:47 - INFO - codeparrot_training - Skipping example with length 557 (seq_length=1024) +03/05/2022 18:04:51 - INFO - codeparrot_training - Step 44753: {'lr': 0.00040392605535462137, 'samples': 22914048, 'steps': 44753, 'loss/train': 1.30694580078125} +03/05/2022 18:04:54 - INFO - codeparrot_training - Step 44754: {'lr': 0.00040392187372556977, 'samples': 22914560, 'steps': 44754, 'loss/train': 1.4586372375488281} +03/05/2022 18:04:56 - INFO - codeparrot_training - Skipping example with length 873 (seq_length=1024) +03/05/2022 18:05:00 - INFO - codeparrot_training - Step 44755: {'lr': 0.00040391769202716333, 'samples': 22915072, 'steps': 44755, 'loss/train': 0.947270929813385} +03/05/2022 18:05:03 - INFO - codeparrot_training - Step 44756: {'lr': 0.00040391351025940406, 'samples': 22915584, 'steps': 44756, 'loss/train': 1.3067102432250977} +03/05/2022 18:05:04 - INFO - codeparrot_training - Skipping example with length 106 (seq_length=1024) +03/05/2022 18:05:08 - INFO - codeparrot_training - Step 44757: {'lr': 0.00040390932842229363, 'samples': 22916096, 'steps': 44757, 'loss/train': 1.604349970817566} +03/05/2022 18:05:11 - INFO - codeparrot_training - Step 44758: {'lr': 0.0004039051465158341, 'samples': 22916608, 'steps': 44758, 'loss/train': 1.7319743633270264} +03/05/2022 18:05:12 - INFO - codeparrot_training - Skipping example with length 764 (seq_length=1024) +03/05/2022 18:05:16 - INFO - codeparrot_training - Step 44759: {'lr': 0.0004039009645400272, 'samples': 22917120, 'steps': 44759, 'loss/train': 1.6737405061721802} +03/05/2022 18:05:20 - INFO - codeparrot_training - Step 44760: {'lr': 0.00040389678249487504, 'samples': 22917632, 'steps': 44760, 'loss/train': 1.6790167093276978} +03/05/2022 18:05:21 - INFO - codeparrot_training - Skipping example with length 979 (seq_length=1024) +03/05/2022 18:05:25 - INFO - codeparrot_training - Step 44761: {'lr': 0.00040389260038037924, 'samples': 22918144, 'steps': 44761, 'loss/train': 2.531528949737549} +03/05/2022 18:05:28 - INFO - codeparrot_training - Step 44762: {'lr': 0.0004038884181965419, 'samples': 22918656, 'steps': 44762, 'loss/train': 1.1833093166351318} +03/05/2022 18:05:29 - INFO - codeparrot_training - Skipping example with length 489 (seq_length=1024) +03/05/2022 18:05:34 - INFO - codeparrot_training - Step 44763: {'lr': 0.0004038842359433647, 'samples': 22919168, 'steps': 44763, 'loss/train': 1.6753454208374023} +03/05/2022 18:05:37 - INFO - codeparrot_training - Step 44764: {'lr': 0.0004038800536208497, 'samples': 22919680, 'steps': 44764, 'loss/train': 2.1583492755889893} +03/05/2022 18:05:39 - INFO - codeparrot_training - Skipping example with length 1016 (seq_length=1024) +03/05/2022 18:05:42 - INFO - codeparrot_training - Step 44765: {'lr': 0.00040387587122899877, 'samples': 22920192, 'steps': 44765, 'loss/train': 0.8816787600517273} +03/05/2022 18:05:45 - INFO - codeparrot_training - Step 44766: {'lr': 0.0004038716887678137, 'samples': 22920704, 'steps': 44766, 'loss/train': 1.5429033041000366} +03/05/2022 18:05:47 - INFO - codeparrot_training - Skipping example with length 664 (seq_length=1024) +03/05/2022 18:05:51 - INFO - codeparrot_training - Step 44767: {'lr': 0.0004038675062372964, 'samples': 22921216, 'steps': 44767, 'loss/train': 1.9629766941070557} +03/05/2022 18:05:54 - INFO - codeparrot_training - Step 44768: {'lr': 0.00040386332363744884, 'samples': 22921728, 'steps': 44768, 'loss/train': 1.8926219940185547} +03/05/2022 18:05:55 - INFO - codeparrot_training - Skipping example with length 183 (seq_length=1024) +03/05/2022 18:05:59 - INFO - codeparrot_training - Step 44769: {'lr': 0.0004038591409682728, 'samples': 22922240, 'steps': 44769, 'loss/train': 2.050028085708618} +03/05/2022 18:06:02 - INFO - codeparrot_training - Step 44770: {'lr': 0.00040385495822977015, 'samples': 22922752, 'steps': 44770, 'loss/train': 1.8652485609054565} +03/05/2022 18:06:04 - INFO - codeparrot_training - Skipping example with length 56 (seq_length=1024) +03/05/2022 18:06:07 - INFO - codeparrot_training - Step 44771: {'lr': 0.00040385077542194294, 'samples': 22923264, 'steps': 44771, 'loss/train': 1.6996325254440308} +03/05/2022 18:06:11 - INFO - codeparrot_training - Step 44772: {'lr': 0.0004038465925447929, 'samples': 22923776, 'steps': 44772, 'loss/train': 1.9292057752609253} +03/05/2022 18:06:12 - INFO - codeparrot_training - Skipping example with length 80 (seq_length=1024) +03/05/2022 18:06:16 - INFO - codeparrot_training - Step 44773: {'lr': 0.00040384240959832196, 'samples': 22924288, 'steps': 44773, 'loss/train': 1.6491014957427979} +03/05/2022 18:06:19 - INFO - codeparrot_training - Step 44774: {'lr': 0.000403838226582532, 'samples': 22924800, 'steps': 44774, 'loss/train': 1.7343530654907227} +03/05/2022 18:06:20 - INFO - codeparrot_training - Skipping example with length 41 (seq_length=1024) +03/05/2022 18:06:24 - INFO - codeparrot_training - Step 44775: {'lr': 0.00040383404349742484, 'samples': 22925312, 'steps': 44775, 'loss/train': 1.5150847434997559} +03/05/2022 18:06:27 - INFO - codeparrot_training - Step 44776: {'lr': 0.0004038298603430025, 'samples': 22925824, 'steps': 44776, 'loss/train': 1.566767692565918} +03/05/2022 18:06:29 - INFO - codeparrot_training - Skipping example with length 170 (seq_length=1024) +03/05/2022 18:06:33 - INFO - codeparrot_training - Step 44777: {'lr': 0.0004038256771192668, 'samples': 22926336, 'steps': 44777, 'loss/train': 1.926650047302246} +03/05/2022 18:06:36 - INFO - codeparrot_training - Step 44778: {'lr': 0.00040382149382621967, 'samples': 22926848, 'steps': 44778, 'loss/train': 1.8420392274856567} +03/05/2022 18:06:37 - INFO - codeparrot_training - Skipping example with length 306 (seq_length=1024) +03/05/2022 18:06:41 - INFO - codeparrot_training - Step 44779: {'lr': 0.00040381731046386295, 'samples': 22927360, 'steps': 44779, 'loss/train': 1.264277458190918} +03/05/2022 18:06:44 - INFO - codeparrot_training - Step 44780: {'lr': 0.0004038131270321984, 'samples': 22927872, 'steps': 44780, 'loss/train': 1.203944444656372} +03/05/2022 18:06:45 - INFO - codeparrot_training - Skipping example with length 874 (seq_length=1024) +03/05/2022 18:06:50 - INFO - codeparrot_training - Step 44781: {'lr': 0.0004038089435312281, 'samples': 22928384, 'steps': 44781, 'loss/train': 3.237008810043335} +03/05/2022 18:06:53 - INFO - codeparrot_training - Step 44782: {'lr': 0.0004038047599609539, 'samples': 22928896, 'steps': 44782, 'loss/train': 1.7700474262237549} +03/05/2022 18:06:53 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/05/2022 18:06:58 - INFO - codeparrot_training - Step 44783: {'lr': 0.00040380057632137756, 'samples': 22929408, 'steps': 44783, 'loss/train': 1.3696911334991455} +03/05/2022 18:07:01 - INFO - codeparrot_training - Step 44784: {'lr': 0.0004037963926125011, 'samples': 22929920, 'steps': 44784, 'loss/train': 1.3393715620040894} +03/05/2022 18:07:02 - INFO - codeparrot_training - Skipping example with length 186 (seq_length=1024) +03/05/2022 18:07:07 - INFO - codeparrot_training - Step 44785: {'lr': 0.00040379220883432644, 'samples': 22930432, 'steps': 44785, 'loss/train': 1.1983351707458496} +03/05/2022 18:07:10 - INFO - codeparrot_training - Step 44786: {'lr': 0.0004037880249868553, 'samples': 22930944, 'steps': 44786, 'loss/train': 1.852432131767273} +03/05/2022 18:07:11 - INFO - codeparrot_training - Skipping example with length 165 (seq_length=1024) +03/05/2022 18:07:15 - INFO - codeparrot_training - Step 44787: {'lr': 0.00040378384107008967, 'samples': 22931456, 'steps': 44787, 'loss/train': 3.012937068939209} +03/05/2022 18:07:18 - INFO - codeparrot_training - Step 44788: {'lr': 0.00040377965708403133, 'samples': 22931968, 'steps': 44788, 'loss/train': 1.446277379989624} +03/05/2022 18:07:20 - INFO - codeparrot_training - Skipping example with length 739 (seq_length=1024) +03/05/2022 18:07:24 - INFO - codeparrot_training - Step 44789: {'lr': 0.00040377547302868235, 'samples': 22932480, 'steps': 44789, 'loss/train': 2.1813833713531494} +03/05/2022 18:07:27 - INFO - codeparrot_training - Step 44790: {'lr': 0.00040377128890404444, 'samples': 22932992, 'steps': 44790, 'loss/train': 1.3893228769302368} +03/05/2022 18:07:29 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/05/2022 18:07:32 - INFO - codeparrot_training - Step 44791: {'lr': 0.00040376710471011967, 'samples': 22933504, 'steps': 44791, 'loss/train': 0.5895149111747742} +03/05/2022 18:07:36 - INFO - codeparrot_training - Step 44792: {'lr': 0.0004037629204469098, 'samples': 22934016, 'steps': 44792, 'loss/train': 2.247985363006592} +03/05/2022 18:07:38 - INFO - codeparrot_training - Skipping example with length 101 (seq_length=1024) +03/05/2022 18:07:41 - INFO - codeparrot_training - Step 44793: {'lr': 0.0004037587361144166, 'samples': 22934528, 'steps': 44793, 'loss/train': 1.6632044315338135} +03/05/2022 18:07:44 - INFO - codeparrot_training - Step 44794: {'lr': 0.0004037545517126422, 'samples': 22935040, 'steps': 44794, 'loss/train': 1.826718807220459} +03/05/2022 18:07:46 - INFO - codeparrot_training - Skipping example with length 629 (seq_length=1024) +03/05/2022 18:07:49 - INFO - codeparrot_training - Step 44795: {'lr': 0.0004037503672415883, 'samples': 22935552, 'steps': 44795, 'loss/train': 0.7954122424125671} +03/05/2022 18:07:52 - INFO - codeparrot_training - Step 44796: {'lr': 0.000403746182701257, 'samples': 22936064, 'steps': 44796, 'loss/train': 1.3659608364105225} +03/05/2022 18:07:54 - INFO - codeparrot_training - Skipping example with length 689 (seq_length=1024) +03/05/2022 18:07:58 - INFO - codeparrot_training - Step 44797: {'lr': 0.0004037419980916499, 'samples': 22936576, 'steps': 44797, 'loss/train': 0.5968263149261475} +03/05/2022 18:08:01 - INFO - codeparrot_training - Step 44798: {'lr': 0.00040373781341276904, 'samples': 22937088, 'steps': 44798, 'loss/train': 1.1562955379486084} +03/05/2022 18:08:03 - INFO - codeparrot_training - Skipping example with length 574 (seq_length=1024) +03/05/2022 18:08:06 - INFO - codeparrot_training - Step 44799: {'lr': 0.00040373362866461633, 'samples': 22937600, 'steps': 44799, 'loss/train': 1.068071722984314} +03/05/2022 18:08:09 - INFO - codeparrot_training - Step 44800: {'lr': 0.0004037294438471936, 'samples': 22938112, 'steps': 44800, 'loss/train': 1.5918644666671753} +03/05/2022 18:08:11 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/05/2022 18:08:15 - INFO - codeparrot_training - Step 44801: {'lr': 0.00040372525896050285, 'samples': 22938624, 'steps': 44801, 'loss/train': 1.641677975654602} +03/05/2022 18:08:18 - INFO - codeparrot_training - Step 44802: {'lr': 0.0004037210740045457, 'samples': 22939136, 'steps': 44802, 'loss/train': 1.9541047811508179} +03/05/2022 18:08:23 - INFO - codeparrot_training - Step 44803: {'lr': 0.0004037168889793243, 'samples': 22939648, 'steps': 44803, 'loss/train': 1.513360857963562} +03/05/2022 18:08:26 - INFO - codeparrot_training - Step 44804: {'lr': 0.0004037127038848404, 'samples': 22940160, 'steps': 44804, 'loss/train': 1.5611988306045532} +03/05/2022 18:08:28 - INFO - codeparrot_training - Skipping example with length 759 (seq_length=1024) +03/05/2022 18:08:31 - INFO - codeparrot_training - Step 44805: {'lr': 0.00040370851872109604, 'samples': 22940672, 'steps': 44805, 'loss/train': 1.2371946573257446} +03/05/2022 18:08:35 - INFO - codeparrot_training - Step 44806: {'lr': 0.0004037043334880929, 'samples': 22941184, 'steps': 44806, 'loss/train': 2.403083086013794} +03/05/2022 18:08:36 - INFO - codeparrot_training - Skipping example with length 16 (seq_length=1024) +03/05/2022 18:08:40 - INFO - codeparrot_training - Step 44807: {'lr': 0.000403700148185833, 'samples': 22941696, 'steps': 44807, 'loss/train': 1.6951184272766113} +03/05/2022 18:08:43 - INFO - codeparrot_training - Step 44808: {'lr': 0.00040369596281431816, 'samples': 22942208, 'steps': 44808, 'loss/train': 1.3963836431503296} +03/05/2022 18:08:45 - INFO - codeparrot_training - Skipping example with length 199 (seq_length=1024) +03/05/2022 18:08:48 - INFO - codeparrot_training - Step 44809: {'lr': 0.0004036917773735502, 'samples': 22942720, 'steps': 44809, 'loss/train': 1.5122878551483154} +03/05/2022 18:08:52 - INFO - codeparrot_training - Step 44810: {'lr': 0.00040368759186353123, 'samples': 22943232, 'steps': 44810, 'loss/train': 0.5519545078277588} +03/05/2022 18:08:53 - INFO - codeparrot_training - Skipping example with length 10 (seq_length=1024) +03/05/2022 18:08:57 - INFO - codeparrot_training - Step 44811: {'lr': 0.0004036834062842629, 'samples': 22943744, 'steps': 44811, 'loss/train': 2.0449063777923584} +03/05/2022 18:09:00 - INFO - codeparrot_training - Step 44812: {'lr': 0.00040367922063574735, 'samples': 22944256, 'steps': 44812, 'loss/train': 1.9026380777359009} +03/05/2022 18:09:02 - INFO - codeparrot_training - Skipping example with length 100 (seq_length=1024) +03/05/2022 18:09:05 - INFO - codeparrot_training - Step 44813: {'lr': 0.0004036750349179862, 'samples': 22944768, 'steps': 44813, 'loss/train': 0.47377830743789673} +03/05/2022 18:09:08 - INFO - codeparrot_training - Step 44814: {'lr': 0.00040367084913098153, 'samples': 22945280, 'steps': 44814, 'loss/train': 1.7244184017181396} +03/05/2022 18:09:10 - INFO - codeparrot_training - Skipping example with length 473 (seq_length=1024) +03/05/2022 18:09:14 - INFO - codeparrot_training - Step 44815: {'lr': 0.000403666663274735, 'samples': 22945792, 'steps': 44815, 'loss/train': 1.5762567520141602} +03/05/2022 18:09:17 - INFO - codeparrot_training - Step 44816: {'lr': 0.0004036624773492488, 'samples': 22946304, 'steps': 44816, 'loss/train': 1.7458164691925049} +03/05/2022 18:09:19 - INFO - codeparrot_training - Skipping example with length 941 (seq_length=1024) +03/05/2022 18:09:22 - INFO - codeparrot_training - Step 44817: {'lr': 0.0004036582913545246, 'samples': 22946816, 'steps': 44817, 'loss/train': 1.249213457107544} +03/05/2022 18:09:25 - INFO - codeparrot_training - Step 44818: {'lr': 0.0004036541052905643, 'samples': 22947328, 'steps': 44818, 'loss/train': 1.8429397344589233} +03/05/2022 18:09:27 - INFO - codeparrot_training - Skipping example with length 838 (seq_length=1024) +03/05/2022 18:09:31 - INFO - codeparrot_training - Step 44819: {'lr': 0.0004036499191573699, 'samples': 22947840, 'steps': 44819, 'loss/train': 2.037851572036743} +03/05/2022 18:09:34 - INFO - codeparrot_training - Step 44820: {'lr': 0.00040364573295494316, 'samples': 22948352, 'steps': 44820, 'loss/train': 0.39653921127319336} +03/05/2022 18:09:36 - INFO - codeparrot_training - Skipping example with length 914 (seq_length=1024) +03/05/2022 18:09:39 - INFO - codeparrot_training - Step 44821: {'lr': 0.00040364154668328604, 'samples': 22948864, 'steps': 44821, 'loss/train': 1.0286256074905396} +03/05/2022 18:09:42 - INFO - codeparrot_training - Step 44822: {'lr': 0.0004036373603424004, 'samples': 22949376, 'steps': 44822, 'loss/train': 1.5136947631835938} +03/05/2022 18:09:48 - INFO - codeparrot_training - Step 44823: {'lr': 0.00040363317393228814, 'samples': 22949888, 'steps': 44823, 'loss/train': 1.829037070274353} +03/05/2022 18:09:51 - INFO - codeparrot_training - Step 44824: {'lr': 0.00040362898745295117, 'samples': 22950400, 'steps': 44824, 'loss/train': 1.7559410333633423} +03/05/2022 18:09:53 - INFO - codeparrot_training - Skipping example with length 299 (seq_length=1024) +03/05/2022 18:09:56 - INFO - codeparrot_training - Step 44825: {'lr': 0.00040362480090439136, 'samples': 22950912, 'steps': 44825, 'loss/train': 2.6628947257995605} +03/05/2022 18:09:59 - INFO - codeparrot_training - Step 44826: {'lr': 0.00040362061428661055, 'samples': 22951424, 'steps': 44826, 'loss/train': 1.542106032371521} +03/05/2022 18:10:02 - INFO - codeparrot_training - Skipping example with length 408 (seq_length=1024) +03/05/2022 18:10:05 - INFO - codeparrot_training - Step 44827: {'lr': 0.0004036164275996107, 'samples': 22951936, 'steps': 44827, 'loss/train': 1.7421807050704956} +03/05/2022 18:10:08 - INFO - codeparrot_training - Step 44828: {'lr': 0.00040361224084339365, 'samples': 22952448, 'steps': 44828, 'loss/train': 1.2294347286224365} +03/05/2022 18:10:10 - INFO - codeparrot_training - Skipping example with length 787 (seq_length=1024) +03/05/2022 18:10:13 - INFO - codeparrot_training - Step 44829: {'lr': 0.00040360805401796124, 'samples': 22952960, 'steps': 44829, 'loss/train': 0.35329821705818176} +03/05/2022 18:10:16 - INFO - codeparrot_training - Step 44830: {'lr': 0.0004036038671233154, 'samples': 22953472, 'steps': 44830, 'loss/train': 2.22332501411438} +03/05/2022 18:10:20 - INFO - codeparrot_training - Step 44831: {'lr': 0.00040359968015945814, 'samples': 22953984, 'steps': 44831, 'loss/train': 1.1991316080093384} +03/05/2022 18:10:20 - INFO - codeparrot_training - Skipping example with length 815 (seq_length=1024) +03/05/2022 18:10:25 - INFO - codeparrot_training - Step 44832: {'lr': 0.0004035954931263912, 'samples': 22954496, 'steps': 44832, 'loss/train': 1.5273411273956299} +03/05/2022 18:10:28 - INFO - codeparrot_training - Step 44833: {'lr': 0.00040359130602411644, 'samples': 22955008, 'steps': 44833, 'loss/train': 2.10172438621521} +03/05/2022 18:10:28 - INFO - codeparrot_training - Skipping example with length 886 (seq_length=1024) +03/05/2022 18:10:33 - INFO - codeparrot_training - Step 44834: {'lr': 0.0004035871188526358, 'samples': 22955520, 'steps': 44834, 'loss/train': 1.650887131690979} +03/05/2022 18:10:37 - INFO - codeparrot_training - Step 44835: {'lr': 0.00040358293161195125, 'samples': 22956032, 'steps': 44835, 'loss/train': 1.2172049283981323} +03/05/2022 18:10:37 - INFO - codeparrot_training - Skipping example with length 963 (seq_length=1024) +03/05/2022 18:10:42 - INFO - codeparrot_training - Step 44836: {'lr': 0.0004035787443020645, 'samples': 22956544, 'steps': 44836, 'loss/train': 1.6648024320602417} +03/05/2022 18:10:45 - INFO - codeparrot_training - Step 44837: {'lr': 0.00040357455692297765, 'samples': 22957056, 'steps': 44837, 'loss/train': 0.8538150191307068} +03/05/2022 18:10:45 - INFO - codeparrot_training - Skipping example with length 852 (seq_length=1024) +03/05/2022 18:10:50 - INFO - codeparrot_training - Step 44838: {'lr': 0.0004035703694746924, 'samples': 22957568, 'steps': 44838, 'loss/train': 0.5543742775917053} +03/05/2022 18:10:54 - INFO - codeparrot_training - Step 44839: {'lr': 0.0004035661819572108, 'samples': 22958080, 'steps': 44839, 'loss/train': 1.2282826900482178} +03/05/2022 18:10:54 - INFO - codeparrot_training - Skipping example with length 987 (seq_length=1024) +03/05/2022 18:10:59 - INFO - codeparrot_training - Step 44840: {'lr': 0.0004035619943705345, 'samples': 22958592, 'steps': 44840, 'loss/train': 2.2534658908843994} +03/05/2022 18:11:02 - INFO - codeparrot_training - Step 44841: {'lr': 0.0004035578067146657, 'samples': 22959104, 'steps': 44841, 'loss/train': 1.5484944581985474} +03/05/2022 18:11:03 - INFO - codeparrot_training - Skipping example with length 261 (seq_length=1024) +03/05/2022 18:11:07 - INFO - codeparrot_training - Step 44842: {'lr': 0.000403553618989606, 'samples': 22959616, 'steps': 44842, 'loss/train': 1.2229118347167969} +03/05/2022 18:11:10 - INFO - codeparrot_training - Step 44843: {'lr': 0.0004035494311953575, 'samples': 22960128, 'steps': 44843, 'loss/train': 2.5433695316314697} +03/05/2022 18:11:11 - INFO - codeparrot_training - Skipping example with length 568 (seq_length=1024) +03/05/2022 18:11:16 - INFO - codeparrot_training - Step 44844: {'lr': 0.0004035452433319219, 'samples': 22960640, 'steps': 44844, 'loss/train': 2.018252372741699} +03/05/2022 18:11:19 - INFO - codeparrot_training - Step 44845: {'lr': 0.0004035410553993012, 'samples': 22961152, 'steps': 44845, 'loss/train': 2.3787059783935547} +03/05/2022 18:11:19 - INFO - codeparrot_training - Skipping example with length 617 (seq_length=1024) +03/05/2022 18:11:24 - INFO - codeparrot_training - Step 44846: {'lr': 0.00040353686739749733, 'samples': 22961664, 'steps': 44846, 'loss/train': 2.578923463821411} +03/05/2022 18:11:27 - INFO - codeparrot_training - Step 44847: {'lr': 0.0004035326793265121, 'samples': 22962176, 'steps': 44847, 'loss/train': 2.3171775341033936} +03/05/2022 18:11:28 - INFO - codeparrot_training - Skipping example with length 722 (seq_length=1024) +03/05/2022 18:11:33 - INFO - codeparrot_training - Step 44848: {'lr': 0.0004035284911863474, 'samples': 22962688, 'steps': 44848, 'loss/train': 1.2615833282470703} +03/05/2022 18:11:36 - INFO - codeparrot_training - Step 44849: {'lr': 0.00040352430297700513, 'samples': 22963200, 'steps': 44849, 'loss/train': 1.3438448905944824} +03/05/2022 18:11:36 - INFO - codeparrot_training - Skipping example with length 185 (seq_length=1024) +03/05/2022 18:11:41 - INFO - codeparrot_training - Step 44850: {'lr': 0.00040352011469848713, 'samples': 22963712, 'steps': 44850, 'loss/train': 1.82333505153656} +03/05/2022 18:11:44 - INFO - codeparrot_training - Step 44851: {'lr': 0.00040351592635079535, 'samples': 22964224, 'steps': 44851, 'loss/train': 1.966168999671936} +03/05/2022 18:11:45 - INFO - codeparrot_training - Skipping example with length 83 (seq_length=1024) +03/05/2022 18:11:50 - INFO - codeparrot_training - Step 44852: {'lr': 0.0004035117379339318, 'samples': 22964736, 'steps': 44852, 'loss/train': 0.8026823401451111} +03/05/2022 18:11:53 - INFO - codeparrot_training - Step 44853: {'lr': 0.00040350754944789815, 'samples': 22965248, 'steps': 44853, 'loss/train': 1.0080134868621826} +03/05/2022 18:11:53 - INFO - codeparrot_training - Skipping example with length 776 (seq_length=1024) +03/05/2022 18:11:58 - INFO - codeparrot_training - Step 44854: {'lr': 0.0004035033608926963, 'samples': 22965760, 'steps': 44854, 'loss/train': 2.3097434043884277} +03/05/2022 18:12:01 - INFO - codeparrot_training - Step 44855: {'lr': 0.0004034991722683282, 'samples': 22966272, 'steps': 44855, 'loss/train': 1.7704964876174927} +03/05/2022 18:12:02 - INFO - codeparrot_training - Skipping example with length 49 (seq_length=1024) +03/05/2022 18:12:07 - INFO - codeparrot_training - Step 44856: {'lr': 0.0004034949835747958, 'samples': 22966784, 'steps': 44856, 'loss/train': 1.0299673080444336} +03/05/2022 18:12:10 - INFO - codeparrot_training - Step 44857: {'lr': 0.00040349079481210096, 'samples': 22967296, 'steps': 44857, 'loss/train': 1.6317873001098633} +03/05/2022 18:12:10 - INFO - codeparrot_training - Skipping example with length 1014 (seq_length=1024) +03/05/2022 18:12:15 - INFO - codeparrot_training - Step 44858: {'lr': 0.00040348660598024547, 'samples': 22967808, 'steps': 44858, 'loss/train': 1.7027883529663086} +03/05/2022 18:12:18 - INFO - codeparrot_training - Step 44859: {'lr': 0.0004034824170792313, 'samples': 22968320, 'steps': 44859, 'loss/train': 0.9177788496017456} +03/05/2022 18:12:18 - INFO - codeparrot_training - Skipping example with length 224 (seq_length=1024) +03/05/2022 18:12:23 - INFO - codeparrot_training - Step 44860: {'lr': 0.0004034782281090603, 'samples': 22968832, 'steps': 44860, 'loss/train': 1.7191507816314697} +03/05/2022 18:12:27 - INFO - codeparrot_training - Step 44861: {'lr': 0.00040347403906973445, 'samples': 22969344, 'steps': 44861, 'loss/train': 1.8175432682037354} +03/05/2022 18:12:27 - INFO - codeparrot_training - Skipping example with length 907 (seq_length=1024) +03/05/2022 18:12:32 - INFO - codeparrot_training - Step 44862: {'lr': 0.0004034698499612555, 'samples': 22969856, 'steps': 44862, 'loss/train': 1.5070908069610596} +03/05/2022 18:12:35 - INFO - codeparrot_training - Step 44863: {'lr': 0.00040346566078362545, 'samples': 22970368, 'steps': 44863, 'loss/train': 1.8992211818695068} +03/05/2022 18:12:35 - INFO - codeparrot_training - Skipping example with length 472 (seq_length=1024) +03/05/2022 18:12:40 - INFO - codeparrot_training - Step 44864: {'lr': 0.0004034614715368461, 'samples': 22970880, 'steps': 44864, 'loss/train': 2.2642335891723633} +03/05/2022 18:12:44 - INFO - codeparrot_training - Step 44865: {'lr': 0.0004034572822209194, 'samples': 22971392, 'steps': 44865, 'loss/train': 1.288116455078125} +03/05/2022 18:12:44 - INFO - codeparrot_training - Skipping example with length 784 (seq_length=1024) +03/05/2022 18:12:49 - INFO - codeparrot_training - Step 44866: {'lr': 0.00040345309283584726, 'samples': 22971904, 'steps': 44866, 'loss/train': 1.4501556158065796} +03/05/2022 18:12:52 - INFO - codeparrot_training - Step 44867: {'lr': 0.0004034489033816314, 'samples': 22972416, 'steps': 44867, 'loss/train': 1.7057571411132812} +03/05/2022 18:12:52 - INFO - codeparrot_training - Skipping example with length 164 (seq_length=1024) +03/05/2022 18:12:57 - INFO - codeparrot_training - Step 44868: {'lr': 0.00040344471385827396, 'samples': 22972928, 'steps': 44868, 'loss/train': 0.5685482621192932} +03/05/2022 18:13:00 - INFO - codeparrot_training - Step 44869: {'lr': 0.00040344052426577665, 'samples': 22973440, 'steps': 44869, 'loss/train': 1.2279940843582153} +03/05/2022 18:13:01 - INFO - codeparrot_training - Skipping example with length 457 (seq_length=1024) +03/05/2022 18:13:06 - INFO - codeparrot_training - Step 44870: {'lr': 0.0004034363346041414, 'samples': 22973952, 'steps': 44870, 'loss/train': 1.0874946117401123} +03/05/2022 18:13:09 - INFO - codeparrot_training - Step 44871: {'lr': 0.0004034321448733701, 'samples': 22974464, 'steps': 44871, 'loss/train': 1.900829792022705} +03/05/2022 18:13:09 - INFO - codeparrot_training - Skipping example with length 180 (seq_length=1024) +03/05/2022 18:13:14 - INFO - codeparrot_training - Step 44872: {'lr': 0.00040342795507346464, 'samples': 22974976, 'steps': 44872, 'loss/train': 1.5672444105148315} +03/05/2022 18:13:17 - INFO - codeparrot_training - Step 44873: {'lr': 0.000403423765204427, 'samples': 22975488, 'steps': 44873, 'loss/train': 1.2006477117538452} +03/05/2022 18:13:19 - INFO - codeparrot_training - Skipping example with length 252 (seq_length=1024) +03/05/2022 18:13:23 - INFO - codeparrot_training - Step 44874: {'lr': 0.0004034195752662589, 'samples': 22976000, 'steps': 44874, 'loss/train': 1.7386012077331543} +03/05/2022 18:13:26 - INFO - codeparrot_training - Step 44875: {'lr': 0.00040341538525896233, 'samples': 22976512, 'steps': 44875, 'loss/train': 1.7685891389846802} +03/05/2022 18:13:29 - INFO - codeparrot_training - Skipping example with length 677 (seq_length=1024) +03/05/2022 18:13:31 - INFO - codeparrot_training - Step 44876: {'lr': 0.0004034111951825391, 'samples': 22977024, 'steps': 44876, 'loss/train': 1.4942201375961304} +03/05/2022 18:13:35 - INFO - codeparrot_training - Step 44877: {'lr': 0.00040340700503699116, 'samples': 22977536, 'steps': 44877, 'loss/train': 0.6549243927001953} +03/05/2022 18:13:37 - INFO - codeparrot_training - Skipping example with length 517 (seq_length=1024) +03/05/2022 18:13:40 - INFO - codeparrot_training - Step 44878: {'lr': 0.0004034028148223204, 'samples': 22978048, 'steps': 44878, 'loss/train': 1.978482961654663} +03/05/2022 18:13:43 - INFO - codeparrot_training - Step 44879: {'lr': 0.0004033986245385288, 'samples': 22978560, 'steps': 44879, 'loss/train': 0.42425018548965454} +03/05/2022 18:13:46 - INFO - codeparrot_training - Step 44880: {'lr': 0.0004033944341856181, 'samples': 22979072, 'steps': 44880, 'loss/train': 2.043513774871826} +03/05/2022 18:13:47 - INFO - codeparrot_training - Skipping example with length 221 (seq_length=1024) +03/05/2022 18:13:52 - INFO - codeparrot_training - Step 44881: {'lr': 0.00040339024376359015, 'samples': 22979584, 'steps': 44881, 'loss/train': 2.2219607830047607} +03/05/2022 18:13:55 - INFO - codeparrot_training - Step 44882: {'lr': 0.000403386053272447, 'samples': 22980096, 'steps': 44882, 'loss/train': 0.967627227306366} +03/05/2022 18:13:55 - INFO - codeparrot_training - Skipping example with length 679 (seq_length=1024) +03/05/2022 18:14:00 - INFO - codeparrot_training - Step 44883: {'lr': 0.0004033818627121904, 'samples': 22980608, 'steps': 44883, 'loss/train': 1.7280353307724} +03/05/2022 18:14:03 - INFO - codeparrot_training - Step 44884: {'lr': 0.00040337767208282235, 'samples': 22981120, 'steps': 44884, 'loss/train': 2.123446226119995} +03/05/2022 18:14:03 - INFO - codeparrot_training - Skipping example with length 209 (seq_length=1024) +03/05/2022 18:14:09 - INFO - codeparrot_training - Step 44885: {'lr': 0.00040337348138434466, 'samples': 22981632, 'steps': 44885, 'loss/train': 0.9136842489242554} +03/05/2022 18:14:12 - INFO - codeparrot_training - Step 44886: {'lr': 0.00040336929061675933, 'samples': 22982144, 'steps': 44886, 'loss/train': 1.9451669454574585} +03/05/2022 18:14:13 - INFO - codeparrot_training - Skipping example with length 1003 (seq_length=1024) +03/05/2022 18:14:17 - INFO - codeparrot_training - Step 44887: {'lr': 0.0004033650997800681, 'samples': 22982656, 'steps': 44887, 'loss/train': 1.790920615196228} +03/05/2022 18:14:20 - INFO - codeparrot_training - Step 44888: {'lr': 0.00040336090887427284, 'samples': 22983168, 'steps': 44888, 'loss/train': 2.058544158935547} +03/05/2022 18:14:21 - INFO - codeparrot_training - Skipping example with length 314 (seq_length=1024) +03/05/2022 18:14:26 - INFO - codeparrot_training - Step 44889: {'lr': 0.00040335671789937564, 'samples': 22983680, 'steps': 44889, 'loss/train': 1.4013477563858032} +03/05/2022 18:14:29 - INFO - codeparrot_training - Step 44890: {'lr': 0.00040335252685537817, 'samples': 22984192, 'steps': 44890, 'loss/train': 1.1435911655426025} +03/05/2022 18:14:30 - INFO - codeparrot_training - Skipping example with length 381 (seq_length=1024) +03/05/2022 18:14:34 - INFO - codeparrot_training - Step 44891: {'lr': 0.0004033483357422825, 'samples': 22984704, 'steps': 44891, 'loss/train': 1.3723818063735962} +03/05/2022 18:14:37 - INFO - codeparrot_training - Step 44892: {'lr': 0.0004033441445600904, 'samples': 22985216, 'steps': 44892, 'loss/train': 1.0748956203460693} +03/05/2022 18:14:38 - INFO - codeparrot_training - Skipping example with length 820 (seq_length=1024) +03/05/2022 18:14:42 - INFO - codeparrot_training - Step 44893: {'lr': 0.0004033399533088038, 'samples': 22985728, 'steps': 44893, 'loss/train': 1.7123327255249023} +03/05/2022 18:14:46 - INFO - codeparrot_training - Step 44894: {'lr': 0.00040333576198842456, 'samples': 22986240, 'steps': 44894, 'loss/train': 2.081317901611328} +03/05/2022 18:14:46 - INFO - codeparrot_training - Skipping example with length 661 (seq_length=1024) +03/05/2022 18:14:51 - INFO - codeparrot_training - Step 44895: {'lr': 0.00040333157059895463, 'samples': 22986752, 'steps': 44895, 'loss/train': 2.1863160133361816} +03/05/2022 18:14:54 - INFO - codeparrot_training - Step 44896: {'lr': 0.0004033273791403959, 'samples': 22987264, 'steps': 44896, 'loss/train': 0.7213594317436218} +03/05/2022 18:14:55 - INFO - codeparrot_training - Skipping example with length 964 (seq_length=1024) +03/05/2022 18:14:59 - INFO - codeparrot_training - Step 44897: {'lr': 0.0004033231876127501, 'samples': 22987776, 'steps': 44897, 'loss/train': 1.5745676755905151} +03/05/2022 18:15:02 - INFO - codeparrot_training - Step 44898: {'lr': 0.00040331899601601934, 'samples': 22988288, 'steps': 44898, 'loss/train': 1.4205262660980225} +03/05/2022 18:15:03 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/05/2022 18:15:08 - INFO - codeparrot_training - Step 44899: {'lr': 0.0004033148043502054, 'samples': 22988800, 'steps': 44899, 'loss/train': 1.2586705684661865} +03/05/2022 18:15:11 - INFO - codeparrot_training - Step 44900: {'lr': 0.00040331061261531014, 'samples': 22989312, 'steps': 44900, 'loss/train': 1.2307225465774536} +03/05/2022 18:15:11 - INFO - codeparrot_training - Skipping example with length 522 (seq_length=1024) +03/05/2022 18:15:16 - INFO - codeparrot_training - Step 44901: {'lr': 0.0004033064208113355, 'samples': 22989824, 'steps': 44901, 'loss/train': 1.616256594657898} +03/05/2022 18:15:19 - INFO - codeparrot_training - Step 44902: {'lr': 0.00040330222893828334, 'samples': 22990336, 'steps': 44902, 'loss/train': 1.0946621894836426} +03/05/2022 18:15:20 - INFO - codeparrot_training - Skipping example with length 29 (seq_length=1024) +03/05/2022 18:15:25 - INFO - codeparrot_training - Step 44903: {'lr': 0.0004032980369961555, 'samples': 22990848, 'steps': 44903, 'loss/train': 1.6257131099700928} +03/05/2022 18:15:28 - INFO - codeparrot_training - Step 44904: {'lr': 0.000403293844984954, 'samples': 22991360, 'steps': 44904, 'loss/train': 2.135741710662842} +03/05/2022 18:15:28 - INFO - codeparrot_training - Skipping example with length 215 (seq_length=1024) +03/05/2022 18:15:33 - INFO - codeparrot_training - Step 44905: {'lr': 0.00040328965290468066, 'samples': 22991872, 'steps': 44905, 'loss/train': 1.345975637435913} +03/05/2022 18:15:36 - INFO - codeparrot_training - Step 44906: {'lr': 0.00040328546075533745, 'samples': 22992384, 'steps': 44906, 'loss/train': 1.7020704746246338} +03/05/2022 18:15:37 - INFO - codeparrot_training - Skipping example with length 949 (seq_length=1024) +03/05/2022 18:15:42 - INFO - codeparrot_training - Step 44907: {'lr': 0.00040328126853692606, 'samples': 22992896, 'steps': 44907, 'loss/train': 1.6809134483337402} +03/05/2022 18:15:45 - INFO - codeparrot_training - Step 44908: {'lr': 0.00040327707624944855, 'samples': 22993408, 'steps': 44908, 'loss/train': 1.8412874937057495} +03/05/2022 18:15:45 - INFO - codeparrot_training - Skipping example with length 978 (seq_length=1024) +03/05/2022 18:15:50 - INFO - codeparrot_training - Step 44909: {'lr': 0.0004032728838929067, 'samples': 22993920, 'steps': 44909, 'loss/train': 1.8732070922851562} +03/05/2022 18:15:53 - INFO - codeparrot_training - Step 44910: {'lr': 0.0004032686914673025, 'samples': 22994432, 'steps': 44910, 'loss/train': 2.2097203731536865} +03/05/2022 18:15:53 - INFO - codeparrot_training - Skipping example with length 267 (seq_length=1024) +03/05/2022 18:15:58 - INFO - codeparrot_training - Step 44911: {'lr': 0.00040326449897263775, 'samples': 22994944, 'steps': 44911, 'loss/train': 1.767197847366333} +03/05/2022 18:16:02 - INFO - codeparrot_training - Step 44912: {'lr': 0.0004032603064089144, 'samples': 22995456, 'steps': 44912, 'loss/train': 1.4624030590057373} +03/05/2022 18:16:02 - INFO - codeparrot_training - Skipping example with length 856 (seq_length=1024) +03/05/2022 18:16:07 - INFO - codeparrot_training - Step 44913: {'lr': 0.00040325611377613435, 'samples': 22995968, 'steps': 44913, 'loss/train': 2.354966163635254} +03/05/2022 18:16:10 - INFO - codeparrot_training - Step 44914: {'lr': 0.00040325192107429944, 'samples': 22996480, 'steps': 44914, 'loss/train': 0.42661499977111816} +03/05/2022 18:16:10 - INFO - codeparrot_training - Skipping example with length 53 (seq_length=1024) +03/05/2022 18:16:15 - INFO - codeparrot_training - Step 44915: {'lr': 0.00040324772830341163, 'samples': 22996992, 'steps': 44915, 'loss/train': 1.8519648313522339} +03/05/2022 18:16:19 - INFO - codeparrot_training - Step 44916: {'lr': 0.0004032435354634726, 'samples': 22997504, 'steps': 44916, 'loss/train': 1.7476165294647217} +03/05/2022 18:16:19 - INFO - codeparrot_training - Skipping example with length 829 (seq_length=1024) +03/05/2022 18:16:24 - INFO - codeparrot_training - Step 44917: {'lr': 0.00040323934255448457, 'samples': 22998016, 'steps': 44917, 'loss/train': 1.595212459564209} +03/05/2022 18:16:27 - INFO - codeparrot_training - Step 44918: {'lr': 0.00040323514957644915, 'samples': 22998528, 'steps': 44918, 'loss/train': 1.0353597402572632} +03/05/2022 18:16:27 - INFO - codeparrot_training - Skipping example with length 42 (seq_length=1024) +03/05/2022 18:16:32 - INFO - codeparrot_training - Step 44919: {'lr': 0.00040323095652936843, 'samples': 22999040, 'steps': 44919, 'loss/train': 1.5653719902038574} +03/05/2022 18:16:36 - INFO - codeparrot_training - Step 44920: {'lr': 0.00040322676341324415, 'samples': 22999552, 'steps': 44920, 'loss/train': 2.8286185264587402} +03/05/2022 18:16:36 - INFO - codeparrot_training - Skipping example with length 123 (seq_length=1024) +03/05/2022 18:16:41 - INFO - codeparrot_training - Step 44921: {'lr': 0.0004032225702280783, 'samples': 23000064, 'steps': 44921, 'loss/train': 1.5484157800674438} +03/05/2022 18:16:44 - INFO - codeparrot_training - Step 44922: {'lr': 0.00040321837697387264, 'samples': 23000576, 'steps': 44922, 'loss/train': 2.174879550933838} +03/05/2022 18:16:44 - INFO - codeparrot_training - Skipping example with length 749 (seq_length=1024) +03/05/2022 18:16:49 - INFO - codeparrot_training - Step 44923: {'lr': 0.00040321418365062915, 'samples': 23001088, 'steps': 44923, 'loss/train': 1.3169763088226318} +03/05/2022 18:16:53 - INFO - codeparrot_training - Step 44924: {'lr': 0.00040320999025834973, 'samples': 23001600, 'steps': 44924, 'loss/train': 1.6843597888946533} +03/05/2022 18:16:53 - INFO - codeparrot_training - Skipping example with length 444 (seq_length=1024) +03/05/2022 18:16:58 - INFO - codeparrot_training - Step 44925: {'lr': 0.0004032057967970363, 'samples': 23002112, 'steps': 44925, 'loss/train': 1.9349087476730347} +03/05/2022 18:17:01 - INFO - codeparrot_training - Step 44926: {'lr': 0.0004032016032666907, 'samples': 23002624, 'steps': 44926, 'loss/train': 2.3804214000701904} +03/05/2022 18:17:07 - INFO - codeparrot_training - Step 44927: {'lr': 0.00040319740966731477, 'samples': 23003136, 'steps': 44927, 'loss/train': 1.4779523611068726} +03/05/2022 18:17:10 - INFO - codeparrot_training - Step 44928: {'lr': 0.0004031932159989105, 'samples': 23003648, 'steps': 44928, 'loss/train': 1.8512465953826904} +03/05/2022 18:17:10 - INFO - codeparrot_training - Skipping example with length 530 (seq_length=1024) +03/05/2022 18:17:15 - INFO - codeparrot_training - Step 44929: {'lr': 0.0004031890222614797, 'samples': 23004160, 'steps': 44929, 'loss/train': 2.372429132461548} +03/05/2022 18:17:18 - INFO - codeparrot_training - Step 44930: {'lr': 0.0004031848284550243, 'samples': 23004672, 'steps': 44930, 'loss/train': 1.250178337097168} +03/05/2022 18:17:18 - INFO - codeparrot_training - Skipping example with length 249 (seq_length=1024) +03/05/2022 18:17:24 - INFO - codeparrot_training - Step 44931: {'lr': 0.0004031806345795462, 'samples': 23005184, 'steps': 44931, 'loss/train': 2.136544942855835} +03/05/2022 18:17:27 - INFO - codeparrot_training - Step 44932: {'lr': 0.0004031764406350472, 'samples': 23005696, 'steps': 44932, 'loss/train': 1.3886854648590088} +03/05/2022 18:17:27 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/05/2022 18:17:32 - INFO - codeparrot_training - Step 44933: {'lr': 0.0004031722466215293, 'samples': 23006208, 'steps': 44933, 'loss/train': 1.5678693056106567} +03/05/2022 18:17:35 - INFO - codeparrot_training - Step 44934: {'lr': 0.00040316805253899434, 'samples': 23006720, 'steps': 44934, 'loss/train': 1.8112289905548096} +03/05/2022 18:17:36 - INFO - codeparrot_training - Skipping example with length 908 (seq_length=1024) +03/05/2022 18:17:40 - INFO - codeparrot_training - Step 44935: {'lr': 0.0004031638583874443, 'samples': 23007232, 'steps': 44935, 'loss/train': 1.5763739347457886} +03/05/2022 18:17:44 - INFO - codeparrot_training - Step 44936: {'lr': 0.0004031596641668809, 'samples': 23007744, 'steps': 44936, 'loss/train': 2.039832830429077} +03/05/2022 18:17:44 - INFO - codeparrot_training - Skipping example with length 785 (seq_length=1024) +03/05/2022 18:17:49 - INFO - codeparrot_training - Step 44937: {'lr': 0.0004031554698773061, 'samples': 23008256, 'steps': 44937, 'loss/train': 1.8764402866363525} +03/05/2022 18:17:52 - INFO - codeparrot_training - Step 44938: {'lr': 0.0004031512755187219, 'samples': 23008768, 'steps': 44938, 'loss/train': 0.6601639986038208} +03/05/2022 18:17:53 - INFO - codeparrot_training - Skipping example with length 922 (seq_length=1024) +03/05/2022 18:17:58 - INFO - codeparrot_training - Step 44939: {'lr': 0.00040314708109113003, 'samples': 23009280, 'steps': 44939, 'loss/train': 1.1560558080673218} +03/05/2022 18:18:01 - INFO - codeparrot_training - Step 44940: {'lr': 0.0004031428865945325, 'samples': 23009792, 'steps': 44940, 'loss/train': 2.061797618865967} +03/05/2022 18:18:02 - INFO - codeparrot_training - Skipping example with length 535 (seq_length=1024) +03/05/2022 18:18:06 - INFO - codeparrot_training - Step 44941: {'lr': 0.0004031386920289311, 'samples': 23010304, 'steps': 44941, 'loss/train': 2.099787712097168} +03/05/2022 18:18:09 - INFO - codeparrot_training - Step 44942: {'lr': 0.0004031344973943278, 'samples': 23010816, 'steps': 44942, 'loss/train': 1.4427982568740845} +03/05/2022 18:18:10 - INFO - codeparrot_training - Skipping example with length 254 (seq_length=1024) +03/05/2022 18:18:15 - INFO - codeparrot_training - Step 44943: {'lr': 0.00040313030269072445, 'samples': 23011328, 'steps': 44943, 'loss/train': 1.2495648860931396} +03/05/2022 18:18:18 - INFO - codeparrot_training - Step 44944: {'lr': 0.00040312610791812286, 'samples': 23011840, 'steps': 44944, 'loss/train': 1.8767129182815552} +03/05/2022 18:18:18 - INFO - codeparrot_training - Skipping example with length 997 (seq_length=1024) +03/05/2022 18:18:23 - INFO - codeparrot_training - Step 44945: {'lr': 0.00040312191307652513, 'samples': 23012352, 'steps': 44945, 'loss/train': 2.5527396202087402} +03/05/2022 18:18:26 - INFO - codeparrot_training - Step 44946: {'lr': 0.000403117718165933, 'samples': 23012864, 'steps': 44946, 'loss/train': 1.0728100538253784} +03/05/2022 18:18:27 - INFO - codeparrot_training - Skipping example with length 1013 (seq_length=1024) +03/05/2022 18:18:31 - INFO - codeparrot_training - Step 44947: {'lr': 0.00040311352318634844, 'samples': 23013376, 'steps': 44947, 'loss/train': 1.8301002979278564} +03/05/2022 18:18:34 - INFO - codeparrot_training - Step 44948: {'lr': 0.00040310932813777316, 'samples': 23013888, 'steps': 44948, 'loss/train': 1.7112009525299072} +03/05/2022 18:18:35 - INFO - codeparrot_training - Skipping example with length 28 (seq_length=1024) +03/05/2022 18:18:40 - INFO - codeparrot_training - Step 44949: {'lr': 0.0004031051330202092, 'samples': 23014400, 'steps': 44949, 'loss/train': 2.486818790435791} +03/05/2022 18:18:43 - INFO - codeparrot_training - Step 44950: {'lr': 0.00040310093783365854, 'samples': 23014912, 'steps': 44950, 'loss/train': 0.5539466142654419} +03/05/2022 18:18:43 - INFO - codeparrot_training - Skipping example with length 159 (seq_length=1024) +03/05/2022 18:18:48 - INFO - codeparrot_training - Step 44951: {'lr': 0.0004030967425781229, 'samples': 23015424, 'steps': 44951, 'loss/train': 1.3671976327896118} +03/05/2022 18:18:51 - INFO - codeparrot_training - Step 44952: {'lr': 0.0004030925472536042, 'samples': 23015936, 'steps': 44952, 'loss/train': 2.364353656768799} +03/05/2022 18:18:52 - INFO - codeparrot_training - Skipping example with length 630 (seq_length=1024) +03/05/2022 18:18:57 - INFO - codeparrot_training - Step 44953: {'lr': 0.0004030883518601044, 'samples': 23016448, 'steps': 44953, 'loss/train': 2.1105401515960693} +03/05/2022 18:19:00 - INFO - codeparrot_training - Step 44954: {'lr': 0.0004030841563976254, 'samples': 23016960, 'steps': 44954, 'loss/train': 1.2658382654190063} +03/05/2022 18:19:00 - INFO - codeparrot_training - Skipping example with length 731 (seq_length=1024) +03/05/2022 18:19:05 - INFO - codeparrot_training - Step 44955: {'lr': 0.00040307996086616895, 'samples': 23017472, 'steps': 44955, 'loss/train': 1.4302927255630493} +03/05/2022 18:19:08 - INFO - codeparrot_training - Step 44956: {'lr': 0.00040307576526573704, 'samples': 23017984, 'steps': 44956, 'loss/train': 1.6597983837127686} +03/05/2022 18:19:09 - INFO - codeparrot_training - Skipping example with length 326 (seq_length=1024) +03/05/2022 18:19:13 - INFO - codeparrot_training - Step 44957: {'lr': 0.00040307156959633154, 'samples': 23018496, 'steps': 44957, 'loss/train': 1.9302769899368286} +03/05/2022 18:19:17 - INFO - codeparrot_training - Step 44958: {'lr': 0.00040306737385795437, 'samples': 23019008, 'steps': 44958, 'loss/train': 1.6942694187164307} +03/05/2022 18:19:17 - INFO - codeparrot_training - Skipping example with length 231 (seq_length=1024) +03/05/2022 18:19:22 - INFO - codeparrot_training - Step 44959: {'lr': 0.00040306317805060746, 'samples': 23019520, 'steps': 44959, 'loss/train': 1.5243468284606934} +03/05/2022 18:19:25 - INFO - codeparrot_training - Step 44960: {'lr': 0.0004030589821742926, 'samples': 23020032, 'steps': 44960, 'loss/train': 1.9547573328018188} +03/05/2022 18:19:26 - INFO - codeparrot_training - Skipping example with length 128 (seq_length=1024) +03/05/2022 18:19:30 - INFO - codeparrot_training - Step 44961: {'lr': 0.00040305478622901177, 'samples': 23020544, 'steps': 44961, 'loss/train': 2.1968748569488525} +03/05/2022 18:19:34 - INFO - codeparrot_training - Step 44962: {'lr': 0.0004030505902147668, 'samples': 23021056, 'steps': 44962, 'loss/train': 1.7099828720092773} +03/05/2022 18:19:34 - INFO - codeparrot_training - Skipping example with length 48 (seq_length=1024) +03/05/2022 18:19:39 - INFO - codeparrot_training - Step 44963: {'lr': 0.00040304639413155953, 'samples': 23021568, 'steps': 44963, 'loss/train': 1.245296835899353} +03/05/2022 18:19:42 - INFO - codeparrot_training - Step 44964: {'lr': 0.0004030421979793919, 'samples': 23022080, 'steps': 44964, 'loss/train': 1.6791975498199463} +03/05/2022 18:19:43 - INFO - codeparrot_training - Skipping example with length 329 (seq_length=1024) +03/05/2022 18:19:47 - INFO - codeparrot_training - Step 44965: {'lr': 0.0004030380017582659, 'samples': 23022592, 'steps': 44965, 'loss/train': 1.2727630138397217} +03/05/2022 18:19:51 - INFO - codeparrot_training - Step 44966: {'lr': 0.0004030338054681833, 'samples': 23023104, 'steps': 44966, 'loss/train': 1.7665297985076904} +03/05/2022 18:19:52 - INFO - codeparrot_training - Skipping example with length 13 (seq_length=1024) +03/05/2022 18:19:56 - INFO - codeparrot_training - Step 44967: {'lr': 0.0004030296091091461, 'samples': 23023616, 'steps': 44967, 'loss/train': 1.162051796913147} +03/05/2022 18:19:59 - INFO - codeparrot_training - Step 44968: {'lr': 0.000403025412681156, 'samples': 23024128, 'steps': 44968, 'loss/train': 1.6649079322814941} +03/05/2022 18:20:01 - INFO - codeparrot_training - Skipping example with length 646 (seq_length=1024) +03/05/2022 18:20:04 - INFO - codeparrot_training - Step 44969: {'lr': 0.00040302121618421505, 'samples': 23024640, 'steps': 44969, 'loss/train': 1.437058925628662} +03/05/2022 18:20:08 - INFO - codeparrot_training - Step 44970: {'lr': 0.0004030170196183252, 'samples': 23025152, 'steps': 44970, 'loss/train': 1.713722825050354} +03/05/2022 18:20:09 - INFO - codeparrot_training - Skipping example with length 496 (seq_length=1024) +03/05/2022 18:20:13 - INFO - codeparrot_training - Step 44971: {'lr': 0.00040301282298348806, 'samples': 23025664, 'steps': 44971, 'loss/train': 2.3053159713745117} +03/05/2022 18:20:16 - INFO - codeparrot_training - Step 44972: {'lr': 0.0004030086262797058, 'samples': 23026176, 'steps': 44972, 'loss/train': 2.8190853595733643} +03/05/2022 18:20:17 - INFO - codeparrot_training - Skipping example with length 410 (seq_length=1024) +03/05/2022 18:20:21 - INFO - codeparrot_training - Step 44973: {'lr': 0.0004030044295069803, 'samples': 23026688, 'steps': 44973, 'loss/train': 2.0130844116210938} +03/05/2022 18:20:24 - INFO - codeparrot_training - Step 44974: {'lr': 0.00040300023266531327, 'samples': 23027200, 'steps': 44974, 'loss/train': 1.7837414741516113} +03/05/2022 18:20:26 - INFO - codeparrot_training - Skipping example with length 998 (seq_length=1024) +03/05/2022 18:20:30 - INFO - codeparrot_training - Step 44975: {'lr': 0.0004029960357547067, 'samples': 23027712, 'steps': 44975, 'loss/train': 2.274665594100952} +03/05/2022 18:20:33 - INFO - codeparrot_training - Step 44976: {'lr': 0.0004029918387751625, 'samples': 23028224, 'steps': 44976, 'loss/train': 1.8014600276947021} +03/05/2022 18:20:35 - INFO - codeparrot_training - Skipping example with length 337 (seq_length=1024) +03/05/2022 18:20:38 - INFO - codeparrot_training - Step 44977: {'lr': 0.00040298764172668253, 'samples': 23028736, 'steps': 44977, 'loss/train': 1.6265074014663696} +03/05/2022 18:20:41 - INFO - codeparrot_training - Step 44978: {'lr': 0.00040298344460926866, 'samples': 23029248, 'steps': 44978, 'loss/train': 1.7987114191055298} +03/05/2022 18:20:43 - INFO - codeparrot_training - Skipping example with length 946 (seq_length=1024) +03/05/2022 18:20:47 - INFO - codeparrot_training - Step 44979: {'lr': 0.0004029792474229228, 'samples': 23029760, 'steps': 44979, 'loss/train': 1.5913589000701904} +03/05/2022 18:20:50 - INFO - codeparrot_training - Step 44980: {'lr': 0.00040297505016764697, 'samples': 23030272, 'steps': 44980, 'loss/train': 1.769344687461853} +03/05/2022 18:20:51 - INFO - codeparrot_training - Skipping example with length 359 (seq_length=1024) +03/05/2022 18:20:55 - INFO - codeparrot_training - Step 44981: {'lr': 0.00040297085284344284, 'samples': 23030784, 'steps': 44981, 'loss/train': 0.8428802490234375} +03/05/2022 18:20:58 - INFO - codeparrot_training - Step 44982: {'lr': 0.0004029666554503124, 'samples': 23031296, 'steps': 44982, 'loss/train': 1.437956690788269} +03/05/2022 18:21:00 - INFO - codeparrot_training - Skipping example with length 92 (seq_length=1024) +03/05/2022 18:21:04 - INFO - codeparrot_training - Step 44983: {'lr': 0.0004029624579882576, 'samples': 23031808, 'steps': 44983, 'loss/train': 1.8152235746383667} +03/05/2022 18:21:07 - INFO - codeparrot_training - Step 44984: {'lr': 0.00040295826045728023, 'samples': 23032320, 'steps': 44984, 'loss/train': 1.6174103021621704} +03/05/2022 18:21:08 - INFO - codeparrot_training - Skipping example with length 88 (seq_length=1024) +03/05/2022 18:21:12 - INFO - codeparrot_training - Step 44985: {'lr': 0.00040295406285738224, 'samples': 23032832, 'steps': 44985, 'loss/train': 2.224730968475342} +03/05/2022 18:21:15 - INFO - codeparrot_training - Step 44986: {'lr': 0.00040294986518856553, 'samples': 23033344, 'steps': 44986, 'loss/train': 1.362060308456421} +03/05/2022 18:21:17 - INFO - codeparrot_training - Skipping example with length 692 (seq_length=1024) +03/05/2022 18:21:21 - INFO - codeparrot_training - Step 44987: {'lr': 0.00040294566745083195, 'samples': 23033856, 'steps': 44987, 'loss/train': 1.2531940937042236} +03/05/2022 18:21:24 - INFO - codeparrot_training - Step 44988: {'lr': 0.00040294146964418344, 'samples': 23034368, 'steps': 44988, 'loss/train': 1.5120103359222412} +03/05/2022 18:21:25 - INFO - codeparrot_training - Skipping example with length 558 (seq_length=1024) +03/05/2022 18:21:29 - INFO - codeparrot_training - Step 44989: {'lr': 0.00040293727176862184, 'samples': 23034880, 'steps': 44989, 'loss/train': 2.1658706665039062} +03/05/2022 18:21:32 - INFO - codeparrot_training - Step 44990: {'lr': 0.000402933073824149, 'samples': 23035392, 'steps': 44990, 'loss/train': 1.9054800271987915} +03/05/2022 18:21:35 - INFO - codeparrot_training - Skipping example with length 488 (seq_length=1024) +03/05/2022 18:21:38 - INFO - codeparrot_training - Step 44991: {'lr': 0.000402928875810767, 'samples': 23035904, 'steps': 44991, 'loss/train': 0.985665500164032} +03/05/2022 18:21:41 - INFO - codeparrot_training - Step 44992: {'lr': 0.00040292467772847754, 'samples': 23036416, 'steps': 44992, 'loss/train': 1.8608412742614746} +03/05/2022 18:21:43 - INFO - codeparrot_training - Skipping example with length 911 (seq_length=1024) +03/05/2022 18:21:46 - INFO - codeparrot_training - Step 44993: {'lr': 0.00040292047957728264, 'samples': 23036928, 'steps': 44993, 'loss/train': 1.44491708278656} +03/05/2022 18:21:50 - INFO - codeparrot_training - Step 44994: {'lr': 0.00040291628135718404, 'samples': 23037440, 'steps': 44994, 'loss/train': 2.1944782733917236} +03/05/2022 18:21:52 - INFO - codeparrot_training - Skipping example with length 905 (seq_length=1024) +03/05/2022 18:21:55 - INFO - codeparrot_training - Step 44995: {'lr': 0.0004029120830681838, 'samples': 23037952, 'steps': 44995, 'loss/train': 2.1066973209381104} +03/05/2022 18:21:58 - INFO - codeparrot_training - Step 44996: {'lr': 0.0004029078847102837, 'samples': 23038464, 'steps': 44996, 'loss/train': 1.686539649963379} +03/05/2022 18:22:01 - INFO - codeparrot_training - Skipping example with length 190 (seq_length=1024) +03/05/2022 18:22:03 - INFO - codeparrot_training - Step 44997: {'lr': 0.00040290368628348564, 'samples': 23038976, 'steps': 44997, 'loss/train': 1.7934890985488892} +03/05/2022 18:22:06 - INFO - codeparrot_training - Step 44998: {'lr': 0.00040289948778779157, 'samples': 23039488, 'steps': 44998, 'loss/train': 0.8123205900192261} +03/05/2022 18:22:09 - INFO - codeparrot_training - Skipping example with length 488 (seq_length=1024) +03/05/2022 18:22:12 - INFO - codeparrot_training - Step 44999: {'lr': 0.00040289528922320334, 'samples': 23040000, 'steps': 44999, 'loss/train': 1.7236690521240234} +03/05/2022 18:22:12 - INFO - codeparrot_training - Evaluating and saving model checkpoint