Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

flickr8k_checkpoints/checkpoint-1208/adapter.safetensors +1 -1
flickr8k_checkpoints/checkpoint-1208/eval_state.json +0 -0
flickr8k_checkpoints/checkpoint-1208/optimizer.pt +1 -1
flickr8k_checkpoints/checkpoint-1208/scheduler.pt +1 -1
flickr8k_checkpoints/checkpoint-1208/trainer_state.json +247 -247

flickr8k_checkpoints/checkpoint-1208/adapter.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e99df632ba281149a567ab0f850dc3be5c51d9b63810eb68577d29603d14c0e
 size 17064856

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c2baa719ba3d00319742871c6ccf4f3f8a64aa3dbd92a3923116006562f9ba9
 size 17064856

flickr8k_checkpoints/checkpoint-1208/eval_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

flickr8k_checkpoints/checkpoint-1208/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:067b5fde8b60af2370dac43b51a0eaaed1d55487d4847e177307de9f998befed
 size 8714492

 version https://git-lfs.github.com/spec/v1
+oid sha256:33cc226a0932968bbd20d757ed7c15ae79d5418ad0f04f6e722ba1f6f8445b7d
 size 8714492

flickr8k_checkpoints/checkpoint-1208/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6db6a4c086774bca871a88695029095eba8e64c70a4cdf4e980cb249f462eb44
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:229084bbb0e922d44dba4cc9206f54220a162a8229897c2f3d636ef81c5c5418
 size 1064

flickr8k_checkpoints/checkpoint-1208/trainer_state.json CHANGED Viewed

@@ -10,565 +10,565 @@
   "log_history": [
     {
       "epoch": 0.026490066225165563,
-      "grad_norm": 0.23711644113063812,
       "learning_rate": 0.0004324324324324325,
-      "loss": 4.5092,
       "step": 16
     },
     {
       "epoch": 0.052980132450331126,
-      "grad_norm": 0.285342812538147,
-      "learning_rate": 0.0007297297297297297,
-      "loss": 4.9762,
       "step": 32
     },
     {
       "epoch": 0.07947019867549669,
-      "grad_norm": 0.09107156842947006,
-      "learning_rate": 0.0009999352232600816,
-      "loss": 4.3898,
       "step": 48
     },
     {
       "epoch": 0.10596026490066225,
-      "grad_norm": 0.11629017442464828,
-      "learning_rate": 0.0009991293467102582,
-      "loss": 4.3831,
       "step": 64
     },
     {
       "epoch": 0.13245033112582782,
-      "grad_norm": 0.1688951849937439,
-      "learning_rate": 0.000997403926531526,
-      "loss": 4.3703,
       "step": 80
     },
     {
       "epoch": 0.15894039735099338,
-      "grad_norm": 0.11590718477964401,
-      "learning_rate": 0.0009947621414572996,
-      "loss": 4.4239,
       "step": 96
     },
     {
       "epoch": 0.18543046357615894,
-      "grad_norm": 0.1131146252155304,
-      "learning_rate": 0.0009912088584356955,
-      "loss": 4.3535,
       "step": 112
     },
     {
       "epoch": 0.2119205298013245,
-      "grad_norm": 0.136209174990654,
-      "learning_rate": 0.000986750623663177,
-      "loss": 4.3339,
       "step": 128
     },
     {
       "epoch": 0.23841059602649006,
-      "grad_norm": 0.18205471336841583,
-      "learning_rate": 0.000981395650524528,
-      "loss": 4.3282,
       "step": 144
     },
     {
       "epoch": 0.26490066225165565,
-      "grad_norm": 0.10476769506931305,
-      "learning_rate": 0.000975153804461372,
-      "loss": 4.3313,
       "step": 160
     },
     {
       "epoch": 0.2913907284768212,
-      "grad_norm": 0.0960649624466896,
-      "learning_rate": 0.0009680365847971162,
-      "loss": 4.3201,
       "step": 176
     },
     {
       "epoch": 0.31788079470198677,
-      "grad_norm": 0.13584400713443756,
-      "learning_rate": 0.0009600571035518034,
-      "loss": 4.3756,
       "step": 192
     },
     {
       "epoch": 0.3443708609271523,
-      "grad_norm": 0.10583332926034927,
-      "learning_rate": 0.000951230061285898,
-      "loss": 4.3302,
       "step": 208
     },
     {
       "epoch": 0.3708609271523179,
-      "grad_norm": 0.13830633461475372,
-      "learning_rate": 0.0009415717200175151,
-      "loss": 4.3383,
       "step": 224
     },
     {
       "epoch": 0.3973509933774834,
-      "grad_norm": 0.12482637912034988,
-      "learning_rate": 0.0009310998732629798,
-      "loss": 4.3083,
       "step": 240
     },
     {
       "epoch": 0.423841059602649,
-      "grad_norm": 0.11580520868301392,
-      "learning_rate": 0.0009198338132559177,
-      "loss": 4.3313,
       "step": 256
     },
     {
       "epoch": 0.4503311258278146,
-      "grad_norm": 0.15633529424667358,
-      "learning_rate": 0.0009077942954052636,
-      "loss": 4.3099,
       "step": 272
     },
     {
       "epoch": 0.4768211920529801,
-      "grad_norm": 0.12987282872200012,
-      "learning_rate": 0.0008950035000576705,
-      "loss": 4.2932,
       "step": 288
     },
     {
       "epoch": 0.5033112582781457,
-      "grad_norm": 0.1630668193101883,
-      "learning_rate": 0.000881484991634762,
-      "loss": 4.3073,
       "step": 304
     },
     {
       "epoch": 0.5298013245033113,
-      "grad_norm": 0.14686144888401031,
-      "learning_rate": 0.0008672636752205099,
-      "loss": 4.2848,
       "step": 320
     },
     {
       "epoch": 0.5562913907284768,
-      "grad_norm": 0.1868494600057602,
-      "learning_rate": 0.0008523657506787162,
-      "loss": 4.3405,
       "step": 336
     },
     {
       "epoch": 0.5827814569536424,
-      "grad_norm": 0.11744031310081482,
-      "learning_rate": 0.0008368186643851284,
-      "loss": 4.3128,
       "step": 352
     },
     {
       "epoch": 0.609271523178808,
-      "grad_norm": 0.14506162703037262,
-      "learning_rate": 0.0008206510586631114,
-      "loss": 4.3208,
       "step": 368
     },
     {
       "epoch": 0.6357615894039735,
-      "grad_norm": 0.12040343135595322,
-      "learning_rate": 0.0008038927190160309,
-      "loss": 4.2675,
       "step": 384
     },
     {
       "epoch": 0.6622516556291391,
-      "grad_norm": 0.11899886280298233,
-      "learning_rate": 0.000786574519253562,
-      "loss": 4.2686,
       "step": 400
     },
     {
       "epoch": 0.6887417218543046,
-      "grad_norm": 0.12363821268081665,
-      "learning_rate": 0.0007687283646130157,
-      "loss": 4.2989,
       "step": 416
     },
     {
       "epoch": 0.7152317880794702,
-      "grad_norm": 0.13222382962703705,
-      "learning_rate": 0.0007503871329804718,
-      "loss": 4.2919,
       "step": 432
     },
     {
       "epoch": 0.7417218543046358,
-      "grad_norm": 0.11946064233779907,
-      "learning_rate": 0.0007315846143200053,
-      "loss": 4.2956,
       "step": 448
     },
     {
       "epoch": 0.7682119205298014,
-      "grad_norm": 0.1136200875043869,
-      "learning_rate": 0.0007123554484225956,
-      "loss": 4.251,
       "step": 464
     },
     {
       "epoch": 0.7947019867549668,
-      "grad_norm": 0.13116462528705597,
-      "learning_rate": 0.0006927350610894034,
-      "loss": 4.3105,
       "step": 480
     },
     {
       "epoch": 0.8211920529801324,
-      "grad_norm": 0.10559297353029251,
-      "learning_rate": 0.0006727595988669864,
-      "loss": 4.262,
       "step": 496
     },
     {
       "epoch": 0.847682119205298,
-      "grad_norm": 0.11165904253721237,
-      "learning_rate": 0.0006524658624546864,
-      "loss": 4.3069,
       "step": 512
     },
     {
       "epoch": 0.8741721854304636,
-      "grad_norm": 0.10997848957777023,
-      "learning_rate": 0.0006318912389068766,
-      "loss": 4.2901,
       "step": 528
     },
     {
       "epoch": 0.9006622516556292,
-      "grad_norm": 0.11981035768985748,
-      "learning_rate": 0.0006110736327549683,
-      "loss": 4.2744,
       "step": 544
     },
     {
       "epoch": 0.9271523178807947,
-      "grad_norm": 0.11829685419797897,
-      "learning_rate": 0.0005900513961760718,
-      "loss": 4.3009,
       "step": 560
     },
     {
       "epoch": 0.9536423841059603,
-      "grad_norm": 0.09722407907247543,
-      "learning_rate": 0.0005688632583369634,
-      "loss": 4.2738,
       "step": 576
     },
     {
       "epoch": 0.9801324503311258,
-      "grad_norm": 0.12697675824165344,
-      "learning_rate": 0.0005475482540435239,
-      "loss": 4.3521,
       "step": 592
     },
     {
       "epoch": 1.0,
-      "eval_bleu": 0.10730040886167266,
-      "eval_cap_loss": 1.229803388481898,
-      "eval_con_loss": 1.8703024170256608,
-      "eval_loss": 3.100105801560231,
       "step": 604
     },
     {
       "epoch": 1.0,
-      "eval_bleu": 0.10730040886167266,
-      "eval_cap_loss": 1.229803388481898,
-      "eval_con_loss": 1.8703024170256608,
-      "eval_loss": 3.100105801560231,
-      "eval_runtime": 247.7711,
-      "eval_samples_per_second": 19.49,
-      "eval_steps_per_second": 2.438,
       "step": 604
     },
     {
       "epoch": 1.0066225165562914,
-      "grad_norm": 0.13247686624526978,
-      "learning_rate": 0.000526145651827102,
-      "loss": 4.2509,
       "step": 608
     },
     {
       "epoch": 1.033112582781457,
-      "grad_norm": 0.1049540564417839,
-      "learning_rate": 0.0005046948816002839,
-      "loss": 4.2821,
       "step": 624
     },
     {
       "epoch": 1.0596026490066226,
-      "grad_norm": 0.1295829862356186,
-      "learning_rate": 0.00048323546201535375,
-      "loss": 4.2916,
       "step": 640
     },
     {
       "epoch": 1.086092715231788,
-      "grad_norm": 0.14188766479492188,
-      "learning_rate": 0.0004618069276592665,
-      "loss": 4.2996,
       "step": 656
     },
     {
       "epoch": 1.1125827814569536,
-      "grad_norm": 0.12427990883588791,
-      "learning_rate": 0.0004404487562192665,
-      "loss": 4.3079,
       "step": 672
     },
     {
       "epoch": 1.1390728476821192,
-      "grad_norm": 0.10745778679847717,
-      "learning_rate": 0.0004192002957533321,
-      "loss": 4.284,
       "step": 688
     },
     {
       "epoch": 1.1655629139072847,
-      "grad_norm": 0.1536702811717987,
-      "learning_rate": 0.00039810069219943343,
-      "loss": 4.2841,
       "step": 704
     },
     {
       "epoch": 1.1920529801324504,
-      "grad_norm": 0.1520632952451706,
-      "learning_rate": 0.0003771888172571579,
-      "loss": 4.3065,
       "step": 720
     },
     {
       "epoch": 1.218543046357616,
-      "grad_norm": 0.1317160278558731,
-      "learning_rate": 0.0003565031967745614,
-      "loss": 4.2853,
       "step": 736
     },
     {
       "epoch": 1.2450331125827814,
-      "grad_norm": 0.1610012948513031,
-      "learning_rate": 0.00033608193977218185,
-      "loss": 4.3149,
       "step": 752
     },
     {
       "epoch": 1.271523178807947,
-      "grad_norm": 0.13628298044204712,
-      "learning_rate": 0.0003159626682349709,
-      "loss": 4.3005,
       "step": 768
     },
     {
       "epoch": 1.2980132450331126,
-      "grad_norm": 0.1552676558494568,
-      "learning_rate": 0.00029618244780148955,
-      "loss": 4.2843,
       "step": 784
     },
     {
       "epoch": 1.3245033112582782,
-      "grad_norm": 0.1701873242855072,
-      "learning_rate": 0.0002767777194780578,
-      "loss": 4.3283,
       "step": 800
     },
     {
       "epoch": 1.3509933774834437,
-      "grad_norm": 0.10417599231004715,
-      "learning_rate": 0.00025778423250366167,
-      "loss": 4.2768,
       "step": 816
     },
     {
       "epoch": 1.3774834437086092,
-      "grad_norm": 0.14864106476306915,
-      "learning_rate": 0.0002392369784893001,
-      "loss": 4.2471,
       "step": 832
     },
     {
       "epoch": 1.403973509933775,
-      "grad_norm": 0.14134187996387482,
-      "learning_rate": 0.00022117012695310468,
-      "loss": 4.2717,
       "step": 848
     },
     {
       "epoch": 1.4304635761589404,
-      "grad_norm": 0.12026234716176987,
-      "learning_rate": 0.0002036169623700001,
-      "loss": 4.282,
       "step": 864
     },
     {
       "epoch": 1.4569536423841059,
-      "grad_norm": 0.15808941423892975,
-      "learning_rate": 0.000186609822851872,
-      "loss": 4.2811,
       "step": 880
     },
     {
       "epoch": 1.4834437086092715,
-      "grad_norm": 0.11768582463264465,
-      "learning_rate": 0.00017018004057121894,
-      "loss": 4.2527,
       "step": 896
     },
     {
       "epoch": 1.5099337748344372,
-      "grad_norm": 0.10410495847463608,
-      "learning_rate": 0.00015435788403803702,
-      "loss": 4.3271,
       "step": 912
     },
     {
       "epoch": 1.5364238410596025,
-      "grad_norm": 0.1393006443977356,
-      "learning_rate": 0.00013917250233628969,
-      "loss": 4.2384,
       "step": 928
     },
     {
       "epoch": 1.5629139072847682,
-      "grad_norm": 0.11578142642974854,
-      "learning_rate": 0.00012465187142268687,
-      "loss": 4.2827,
       "step": 944
     },
     {
       "epoch": 1.589403973509934,
-      "grad_norm": 0.12900201976299286,
-      "learning_rate": 0.00011082274258671376,
-      "loss": 4.2757,
       "step": 960
     },
     {
       "epoch": 1.6158940397350994,
-      "grad_norm": 0.10685920715332031,
-      "learning_rate": 9.771059316685665e-05,
-      "loss": 4.2779,
       "step": 976
     },
     {
       "epoch": 1.6423841059602649,
-      "grad_norm": 0.13312238454818726,
-      "learning_rate": 8.533957961382238e-05,
-      "loss": 4.2988,
       "step": 992
     },
     {
       "epoch": 1.6688741721854305,
-      "grad_norm": 0.1473645716905594,
-      "learning_rate": 7.373249298722506e-05,
-      "loss": 4.2973,
       "step": 1008
     },
     {
       "epoch": 1.695364238410596,
-      "grad_norm": 0.10669790208339691,
-      "learning_rate": 6.29107169677236e-05,
-      "loss": 4.2533,
       "step": 1024
     },
     {
       "epoch": 1.7218543046357615,
-      "grad_norm": 0.13500191271305084,
-      "learning_rate": 5.28941884619693e-05,
-      "loss": 4.3073,
       "step": 1040
     },
     {
       "epoch": 1.7483443708609272,
-      "grad_norm": 0.1250208169221878,
-      "learning_rate": 4.370136087293658e-05,
-      "loss": 4.2093,
       "step": 1056
     },
     {
       "epoch": 1.7748344370860927,
-      "grad_norm": 0.1221160963177681,
-      "learning_rate": 3.534917010330652e-05,
-      "loss": 4.2625,
       "step": 1072
     },
     {
       "epoch": 1.8013245033112582,
-      "grad_norm": 0.11934095621109009,
-      "learning_rate": 2.7853003354533555e-05,
-      "loss": 4.3391,
       "step": 1088
     },
     {
       "epoch": 1.8278145695364238,
-      "grad_norm": 0.13353998959064484,
-      "learning_rate": 2.1226670779077306e-05,
-      "loss": 4.2848,
       "step": 1104
     },
     {
       "epoch": 1.8543046357615895,
-      "grad_norm": 0.10006093233823776,
-      "learning_rate": 1.5482380038023768e-05,
-      "loss": 4.2608,
       "step": 1120
     },
     {
       "epoch": 1.8807947019867548,
-      "grad_norm": 0.10387697070837021,
-      "learning_rate": 1.0630713810969639e-05,
-      "loss": 4.2986,
       "step": 1136
     },
     {
       "epoch": 1.9072847682119205,
-      "grad_norm": 0.13752028346061707,
-      "learning_rate": 6.680610299601708e-06,
-      "loss": 4.2593,
       "step": 1152
     },
     {
       "epoch": 1.9337748344370862,
-      "grad_norm": 0.1313001811504364,
-      "learning_rate": 3.639346760890283e-06,
-      "loss": 4.2522,
       "step": 1168
     },
     {
       "epoch": 1.9602649006622517,
-      "grad_norm": 0.11591313779354095,
-      "learning_rate": 1.5125261002330026e-06,
-      "loss": 4.2476,
       "step": 1184
     },
     {
       "epoch": 1.9867549668874172,
-      "grad_norm": 0.11690162122249603,
-      "learning_rate": 3.040665492491379e-07,
-      "loss": 4.3406,
       "step": 1200
     },
     {
       "epoch": 2.0,
-      "eval_bleu": 0.11121865675263325,
-      "eval_cap_loss": 1.2156698384032345,
-      "eval_con_loss": 1.8595664712372204,
-      "eval_loss": 3.0752363122062176,
       "step": 1208
     },
     {
       "epoch": 2.0,
-      "eval_bleu": 0.11121865675263325,
-      "eval_cap_loss": 1.2156698384032345,
-      "eval_con_loss": 1.8595664712372204,
-      "eval_loss": 3.0752363122062176,
-      "eval_runtime": 250.9492,
-      "eval_samples_per_second": 19.243,
-      "eval_steps_per_second": 2.407,
       "step": 1208
     }
   ],
@@ -593,5 +593,5 @@
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null,
-  "tau_value": 3.5545
 }

   "log_history": [
     {
       "epoch": 0.026490066225165563,
+      "grad_norm": 0.12706246972084045,
       "learning_rate": 0.0004324324324324325,
+      "loss": 4.2683,
       "step": 16
     },
     {
       "epoch": 0.052980132450331126,
+      "grad_norm": 0.1603141874074936,
+      "learning_rate": 0.000864864864864865,
+      "loss": 4.2333,
       "step": 32
     },
     {
       "epoch": 0.07947019867549669,
+      "grad_norm": 0.11292944103479385,
+      "learning_rate": 0.0009997822892796068,
+      "loss": 4.2693,
       "step": 48
     },
     {
       "epoch": 0.10596026490066225,
+      "grad_norm": 0.126583069562912,
+      "learning_rate": 0.000998688816161266,
+      "loss": 4.2745,
       "step": 64
     },
     {
       "epoch": 0.13245033112582782,
+      "grad_norm": 0.1791575849056244,
+      "learning_rate": 0.0009966766110013582,
+      "loss": 4.2707,
       "step": 80
     },
     {
       "epoch": 0.15894039735099338,
+      "grad_norm": 0.16615551710128784,
+      "learning_rate": 0.0009937493808759087,
+      "loss": 4.3332,
       "step": 96
     },
     {
       "epoch": 0.18543046357615894,
+      "grad_norm": 0.17333188652992249,
+      "learning_rate": 0.0009899125186070988,
+      "loss": 4.2671,
       "step": 112
     },
     {
       "epoch": 0.2119205298013245,
+      "grad_norm": 0.14814811944961548,
+      "learning_rate": 0.0009851730928280944,
+      "loss": 4.2474,
       "step": 128
     },
     {
       "epoch": 0.23841059602649006,
+      "grad_norm": 0.16701027750968933,
+      "learning_rate": 0.0009795398349605373,
+      "loss": 4.248,
       "step": 144
     },
     {
       "epoch": 0.26490066225165565,
+      "grad_norm": 0.11271824687719345,
+      "learning_rate": 0.0009730231231286876,
+      "loss": 4.2506,
       "step": 160
     },
     {
       "epoch": 0.2913907284768212,
+      "grad_norm": 0.10354705899953842,
+      "learning_rate": 0.0009656349630398554,
+      "loss": 4.2424,
       "step": 176
     },
     {
       "epoch": 0.31788079470198677,
+      "grad_norm": 0.1165025606751442,
+      "learning_rate": 0.0009573889658663424,
+      "loss": 4.2973,
       "step": 192
     },
     {
       "epoch": 0.3443708609271523,
+      "grad_norm": 0.1250074803829193,
+      "learning_rate": 0.0009483003231696446,
+      "loss": 4.2571,
       "step": 208
     },
     {
       "epoch": 0.3708609271523179,
+      "grad_norm": 0.13243281841278076,
+      "learning_rate": 0.0009383857789131097,
+      "loss": 4.2659,
       "step": 224
     },
     {
       "epoch": 0.3973509933774834,
+      "grad_norm": 0.10707055032253265,
+      "learning_rate": 0.0009276635986146136,
+      "loss": 4.2331,
       "step": 240
     },
     {
       "epoch": 0.423841059602649,
+      "grad_norm": 0.12676522135734558,
+      "learning_rate": 0.0009161535356960828,
+      "loss": 4.2585,
       "step": 256
     },
     {
       "epoch": 0.4503311258278146,
+      "grad_norm": 0.10248049348592758,
+      "learning_rate": 0.0009038767950918592,
+      "loss": 4.2375,
       "step": 272
     },
     {
       "epoch": 0.4768211920529801,
+      "grad_norm": 0.16809116303920746,
+      "learning_rate": 0.0008908559941829497,
+      "loss": 4.2249,
       "step": 288
     },
     {
       "epoch": 0.5033112582781457,
+      "grad_norm": 0.11593299359083176,
+      "learning_rate": 0.0008771151211291332,
+      "loss": 4.2411,
       "step": 304
     },
     {
       "epoch": 0.5298013245033113,
+      "grad_norm": 0.11452265083789825,
+      "learning_rate": 0.0008626794906756866,
+      "loss": 4.2158,
       "step": 320
     },
     {
       "epoch": 0.5562913907284768,
+      "grad_norm": 0.14648571610450745,
+      "learning_rate": 0.0008475756975161504,
+      "loss": 4.2718,
       "step": 336
     },
     {
       "epoch": 0.5827814569536424,
+      "grad_norm": 0.11873907595872879,
+      "learning_rate": 0.00083183156729705,
+      "loss": 4.2431,
       "step": 352
     },
     {
       "epoch": 0.609271523178808,
+      "grad_norm": 0.13245505094528198,
+      "learning_rate": 0.0008154761053548404,
+      "loss": 4.2546,
       "step": 368
     },
     {
       "epoch": 0.6357615894039735,
+      "grad_norm": 0.12086515128612518,
+      "learning_rate": 0.000798539443279511,
+      "loss": 4.2042,
       "step": 384
     },
     {
       "epoch": 0.6622516556291391,
+      "grad_norm": 0.09804444760084152,
+      "learning_rate": 0.0007810527834033009,
+      "loss": 4.2046,
       "step": 400
     },
     {
       "epoch": 0.6887417218543046,
+      "grad_norm": 0.12007380276918411,
+      "learning_rate": 0.00076304834131679,
+      "loss": 4.2311,
       "step": 416
     },
     {
       "epoch": 0.7152317880794702,
+      "grad_norm": 0.12332040816545486,
+      "learning_rate": 0.0007445592865182695,
+      "loss": 4.2304,
       "step": 432
     },
     {
       "epoch": 0.7417218543046358,
+      "grad_norm": 0.12303052097558975,
+      "learning_rate": 0.0007256196813057318,
+      "loss": 4.2351,
       "step": 448
     },
     {
       "epoch": 0.7682119205298014,
+      "grad_norm": 0.09435882419347763,
+      "learning_rate": 0.0007062644180240614,
+      "loss": 4.1903,
       "step": 464
     },
     {
       "epoch": 0.7947019867549668,
+      "grad_norm": 0.12024106830358505,
+      "learning_rate": 0.0006865291547830324,
+      "loss": 4.2468,
       "step": 480
     },
     {
       "epoch": 0.8211920529801324,
+      "grad_norm": 0.09838169813156128,
+      "learning_rate": 0.000666450249764542,
+      "loss": 4.1978,
       "step": 496
     },
     {
       "epoch": 0.847682119205298,
+      "grad_norm": 0.10983674973249435,
+      "learning_rate": 0.0006460646942401058,
+      "loss": 4.2443,
       "step": 512
     },
     {
       "epoch": 0.8741721854304636,
+      "grad_norm": 0.09954366832971573,
+      "learning_rate": 0.0006254100444220115,
+      "loss": 4.227,
       "step": 528
     },
     {
       "epoch": 0.9006622516556292,
+      "grad_norm": 0.12015046179294586,
+      "learning_rate": 0.0006045243522736885,
+      "loss": 4.2154,
       "step": 544
     },
     {
       "epoch": 0.9271523178807947,
+      "grad_norm": 0.10851255804300308,
+      "learning_rate": 0.0005834460954067559,
+      "loss": 4.242,
       "step": 560
     },
     {
       "epoch": 0.9536423841059603,
+      "grad_norm": 0.0975833460688591,
+      "learning_rate": 0.0005622141061939006,
+      "loss": 4.2135,
       "step": 576
     },
     {
       "epoch": 0.9801324503311258,
+      "grad_norm": 0.10621072351932526,
+      "learning_rate": 0.0005408675002281818,
+      "loss": 4.2932,
       "step": 592
     },
     {
       "epoch": 1.0,
+      "eval_bleu": 0.1135024238637086,
+      "eval_cap_loss": 1.2033403850351738,
+      "eval_con_loss": 1.786089795906812,
+      "eval_loss": 2.9894301812380353,
       "step": 604
     },
     {
       "epoch": 1.0,
+      "eval_bleu": 0.1135024238637086,
+      "eval_cap_loss": 1.2033403850351738,
+      "eval_con_loss": 1.786089795906812,
+      "eval_loss": 2.9894301812380353,
+      "eval_runtime": 416.7065,
+      "eval_samples_per_second": 11.588,
+      "eval_steps_per_second": 1.449,
       "step": 604
     },
     {
       "epoch": 1.0066225165562914,
+      "grad_norm": 0.13524451851844788,
+      "learning_rate": 0.0005194456042605587,
+      "loss": 4.1897,
       "step": 608
     },
     {
       "epoch": 1.033112582781457,
+      "grad_norm": 0.09372173994779587,
+      "learning_rate": 0.0004979878837484043,
+      "loss": 4.2234,
       "step": 624
     },
     {
       "epoch": 1.0596026490066226,
+      "grad_norm": 0.10636495053768158,
+      "learning_rate": 0.00047653387014848014,
+      "loss": 4.2304,
       "step": 640
     },
     {
       "epoch": 1.086092715231788,
+      "grad_norm": 0.1295643150806427,
+      "learning_rate": 0.0004551230880883208,
+      "loss": 4.2388,
       "step": 656
     },
     {
       "epoch": 1.1125827814569536,
+      "grad_norm": 0.10932028293609619,
+      "learning_rate": 0.00043379498255020037,
+      "loss": 4.2505,
       "step": 672
     },
     {
       "epoch": 1.1390728476821192,
+      "grad_norm": 0.11317116022109985,
+      "learning_rate": 0.00041258884620182804,
+      "loss": 4.2256,
       "step": 688
     },
     {
       "epoch": 1.1655629139072847,
+      "grad_norm": 0.15960782766342163,
+      "learning_rate": 0.00039154374700765316,
+      "loss": 4.2248,
       "step": 704
     },
     {
       "epoch": 1.1920529801324504,
+      "grad_norm": 0.14448010921478271,
+      "learning_rate": 0.00037069845625413954,
+      "loss": 4.248,
       "step": 720
     },
     {
       "epoch": 1.218543046357616,
+      "grad_norm": 0.12560078501701355,
+      "learning_rate": 0.0003500913771216081,
+      "loss": 4.2264,
       "step": 736
     },
     {
       "epoch": 1.2450331125827814,
+      "grad_norm": 0.15087343752384186,
+      "learning_rate": 0.0003297604739342396,
+      "loss": 4.2582,
       "step": 752
     },
     {
       "epoch": 1.271523178807947,
+      "grad_norm": 0.14385798573493958,
+      "learning_rate": 0.00030974320221858066,
+      "loss": 4.2413,
       "step": 768
     },
     {
       "epoch": 1.2980132450331126,
+      "grad_norm": 0.12157344818115234,
+      "learning_rate": 0.0002900764396994049,
+      "loss": 4.2285,
       "step": 784
     },
     {
       "epoch": 1.3245033112582782,
+      "grad_norm": 0.14533455669879913,
+      "learning_rate": 0.00027079641836005473,
+      "loss": 4.2719,
       "step": 800
     },
     {
       "epoch": 1.3509933774834437,
+      "grad_norm": 0.11581210792064667,
+      "learning_rate": 0.0002519386576924303,
+      "loss": 4.2191,
       "step": 816
     },
     {
       "epoch": 1.3774834437086092,
+      "grad_norm": 0.11933822929859161,
+      "learning_rate": 0.0002335378992595995,
+      "loss": 4.1929,
       "step": 832
     },
     {
       "epoch": 1.403973509933775,
+      "grad_norm": 0.13587485253810883,
+      "learning_rate": 0.0002156280426915786,
+      "loss": 4.2147,
       "step": 848
     },
     {
       "epoch": 1.4304635761589404,
+      "grad_norm": 0.11555938422679901,
+      "learning_rate": 0.00019824208323220656,
+      "loss": 4.2268,
       "step": 864
     },
     {
       "epoch": 1.4569536423841059,
+      "grad_norm": 0.12277619540691376,
+      "learning_rate": 0.00018141205095216294,
+      "loss": 4.2261,
       "step": 880
     },
     {
       "epoch": 1.4834437086092715,
+      "grad_norm": 0.11538510024547577,
+      "learning_rate": 0.00016516895174012043,
+      "loss": 4.1956,
       "step": 896
     },
     {
       "epoch": 1.5099337748344372,
+      "grad_norm": 0.10595931112766266,
+      "learning_rate": 0.00014954271018074368,
+      "loss": 4.2703,
       "step": 912
     },
     {
       "epoch": 1.5364238410596025,
+      "grad_norm": 0.12333059310913086,
+      "learning_rate": 0.00013456211442476813,
+      "loss": 4.1822,
       "step": 928
     },
     {
       "epoch": 1.5629139072847682,
+      "grad_norm": 0.10703016817569733,
+      "learning_rate": 0.00012025476315272743,
+      "loss": 4.2289,
       "step": 944
     },
     {
       "epoch": 1.589403973509934,
+      "grad_norm": 0.11507616937160492,
+      "learning_rate": 0.00010664701473003396,
+      "loss": 4.2223,
       "step": 960
     },
     {
       "epoch": 1.6158940397350994,
+      "grad_norm": 0.09708770364522934,
+      "learning_rate": 9.376393864708821e-05,
+      "loss": 4.2223,
       "step": 976
     },
     {
       "epoch": 1.6423841059602649,
+      "grad_norm": 0.11372318863868713,
+      "learning_rate": 8.162926933387499e-05,
+      "loss": 4.2433,
       "step": 992
     },
     {
       "epoch": 1.6688741721854305,
+      "grad_norm": 0.16325490176677704,
+      "learning_rate": 7.026536243413539e-05,
+      "loss": 4.2427,
       "step": 1008
     },
     {
       "epoch": 1.695364238410596,
+      "grad_norm": 0.10167232900857925,
+      "learning_rate": 5.969315361967087e-05,
+      "loss": 4.1995,
       "step": 1024
     },
     {
       "epoch": 1.7218543046357615,
+      "grad_norm": 0.11737249046564102,
+      "learning_rate": 4.9932120020654116e-05,
+      "loss": 4.2522,
       "step": 1040
     },
     {
       "epoch": 1.7483443708609272,
+      "grad_norm": 0.10857795923948288,
+      "learning_rate": 4.100024434300437e-05,
+      "loss": 4.1555,
       "step": 1056
     },
     {
       "epoch": 1.7748344370860927,
+      "grad_norm": 0.11734642088413239,
+      "learning_rate": 3.2913981738933395e-05,
+      "loss": 4.208,
       "step": 1072
     },
     {
       "epoch": 1.8013245033112582,
+      "grad_norm": 0.10702993720769882,
+      "learning_rate": 2.5688229491697356e-05,
+      "loss": 4.2835,
       "step": 1088
     },
     {
       "epoch": 1.8278145695364238,
+      "grad_norm": 0.12669378519058228,
+      "learning_rate": 1.9336299570401396e-05,
+      "loss": 4.2288,
       "step": 1104
     },
     {
       "epoch": 1.8543046357615895,
+      "grad_norm": 0.0894075259566307,
+      "learning_rate": 1.3869894105423109e-05,
+      "loss": 4.2051,
       "step": 1120
     },
     {
       "epoch": 1.8807947019867548,
+      "grad_norm": 0.09530144929885864,
+      "learning_rate": 9.299083829632516e-06,
+      "loss": 4.2449,
       "step": 1136
     },
     {
       "epoch": 1.9072847682119205,
+      "grad_norm": 0.13094228506088257,
+      "learning_rate": 5.632289525129064e-06,
+      "loss": 4.2017,
       "step": 1152
     },
     {
       "epoch": 1.9337748344370862,
+      "grad_norm": 0.12416987866163254,
+      "learning_rate": 2.8762665096744854e-06,
+      "loss": 4.1943,
       "step": 1168
     },
     {
       "epoch": 1.9602649006622517,
+      "grad_norm": 0.10805880278348923,
+      "learning_rate": 1.036092191402882e-06,
+      "loss": 4.1936,
       "step": 1184
     },
     {
       "epoch": 1.9867549668874172,
+      "grad_norm": 0.11822624504566193,
+      "learning_rate": 1.1515671473599775e-07,
+      "loss": 4.2833,
       "step": 1200
     },
     {
       "epoch": 2.0,
+      "eval_bleu": 0.1159956729511227,
+      "eval_cap_loss": 1.1952345237037203,
+      "eval_con_loss": 1.7797789394066035,
+      "eval_loss": 2.975013460544561,
       "step": 1208
     },
     {
       "epoch": 2.0,
+      "eval_bleu": 0.1159956729511227,
+      "eval_cap_loss": 1.1952345237037203,
+      "eval_con_loss": 1.7797789394066035,
+      "eval_loss": 2.975013460544561,
+      "eval_runtime": 297.535,
+      "eval_samples_per_second": 16.23,
+      "eval_steps_per_second": 2.03,
       "step": 1208
     }
   ],
   "train_batch_size": 32,
   "trial_name": null,
   "trial_params": null,
+  "tau_value": 5.2037
 }