Training in progress, step 50, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/README.md +1 -1
last-checkpoint/adapter_config.json +4 -4
last-checkpoint/adapter_model.safetensors +2 -2
last-checkpoint/optimizer.pt +2 -2
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +222 -954
last-checkpoint/training_args.bin +1 -1

last-checkpoint/README.md CHANGED Viewed

@@ -1,5 +1,5 @@
 ---
-base_model: unsloth/Llama-3.2-1B-Instruct
 library_name: peft
 ---

 ---
+base_model: unsloth/Llama-3.2-3B-Instruct
 library_name: peft
 ---

last-checkpoint/adapter_config.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "alpha_pattern": {},
   "auto_mapping": null,
-  "base_model_name_or_path": "unsloth/Llama-3.2-1B-Instruct",
   "bias": "none",
   "fan_in_fan_out": null,
   "inference_mode": true,
@@ -20,12 +20,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
-    "v_proj",
-    "gate_proj",
     "k_proj",
     "up_proj",
     "o_proj",
     "down_proj"
   ],
   "task_type": "CAUSAL_LM",

 {
   "alpha_pattern": {},
   "auto_mapping": null,
+  "base_model_name_or_path": "unsloth/Llama-3.2-3B-Instruct",
   "bias": "none",
   "fan_in_fan_out": null,
   "inference_mode": true,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "k_proj",
+    "gate_proj",
     "up_proj",
     "o_proj",
+    "v_proj",
+    "q_proj",
     "down_proj"
   ],
   "task_type": "CAUSAL_LM",

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c3f769ff1e4ba4c98c03a624ed01a33e854004d0f07165933ad20c79838391d
-size 45118424

 version https://git-lfs.github.com/spec/v1
+oid sha256:ef850e00f6ddccf7a92baced38593af32bacbc7deb92d8d8407aa4aa0408de1a
+size 97307544

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c89e8e9eeced7aa8ba2c881e43808e892dc2bfda03265131865bd0d70d2f6ac2
-size 90365754

 version https://git-lfs.github.com/spec/v1
+oid sha256:c1bc63593d20de9621903470fe84da0c3845eac9f62fcf47fa7fc53c31b55e0a
+size 194840426

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15bd35e71399de8fb4118025c15f8deb2ca06276b3bf83802724255e61131eb7
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:9985d87c9fe1230092403fc0c5f05d287ced3986c4900ee14ddc69775bac2f24
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:340e82764c0efd5e9e8cadc3d39d5cf7050cd6012718448008af049ab8a6b827
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6a3057cc3055b088ac745445a3a938968f2d198c20a2487c1a93787006959980
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,1122 +1,390 @@
 {
-  "best_metric": 2.570774555206299,
-  "best_model_checkpoint": "miner_id_24/checkpoint-150",
-  "epoch": 0.037460198539052254,
   "eval_steps": 25,
-  "global_step": 150,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
-      "epoch": 0.00024973465692701506,
-      "grad_norm": 1.0262023210525513,
-      "learning_rate": 2.8571428571428573e-06,
-      "loss": 3.2239,
       "step": 1
     },
     {
-      "epoch": 0.00024973465692701506,
-      "eval_loss": 3.366023302078247,
-      "eval_runtime": 2.886,
-      "eval_samples_per_second": 17.325,
-      "eval_steps_per_second": 2.772,
       "step": 1
     },
     {
-      "epoch": 0.0004994693138540301,
-      "grad_norm": 0.8814900517463684,
-      "learning_rate": 5.7142857142857145e-06,
-      "loss": 3.2252,
       "step": 2
     },
     {
-      "epoch": 0.0007492039707810451,
-      "grad_norm": 1.10072660446167,
-      "learning_rate": 8.571428571428573e-06,
-      "loss": 3.339,
       "step": 3
     },
     {
-      "epoch": 0.0009989386277080602,
-      "grad_norm": 1.023310661315918,
-      "learning_rate": 1.1428571428571429e-05,
-      "loss": 3.2591,
       "step": 4
     },
     {
-      "epoch": 0.0012486732846350753,
-      "grad_norm": 1.0511233806610107,
-      "learning_rate": 1.4285714285714285e-05,
-      "loss": 3.3003,
       "step": 5
     },
     {
-      "epoch": 0.0014984079415620902,
-      "grad_norm": 0.9858811497688293,
-      "learning_rate": 1.7142857142857145e-05,
-      "loss": 3.2781,
       "step": 6
     },
     {
-      "epoch": 0.0017481425984891053,
-      "grad_norm": 1.1659033298492432,
-      "learning_rate": 2e-05,
-      "loss": 3.2301,
       "step": 7
     },
     {
-      "epoch": 0.0019978772554161204,
-      "grad_norm": 1.0043777227401733,
-      "learning_rate": 2.2857142857142858e-05,
-      "loss": 3.1818,
       "step": 8
     },
     {
-      "epoch": 0.0022476119123431356,
-      "grad_norm": 1.0616182088851929,
-      "learning_rate": 2.5714285714285714e-05,
-      "loss": 3.2333,
       "step": 9
     },
     {
-      "epoch": 0.0024973465692701507,
-      "grad_norm": 1.107161521911621,
-      "learning_rate": 2.857142857142857e-05,
-      "loss": 3.3466,
       "step": 10
     },
     {
-      "epoch": 0.0027470812261971653,
-      "grad_norm": 1.209277868270874,
-      "learning_rate": 3.142857142857143e-05,
-      "loss": 3.2744,
       "step": 11
     },
     {
-      "epoch": 0.0029968158831241805,
-      "grad_norm": 1.04523766040802,
-      "learning_rate": 3.428571428571429e-05,
-      "loss": 3.1888,
       "step": 12
     },
     {
-      "epoch": 0.0032465505400511956,
-      "grad_norm": 1.0623782873153687,
-      "learning_rate": 3.7142857142857143e-05,
-      "loss": 3.1969,
       "step": 13
     },
     {
-      "epoch": 0.0034962851969782107,
-      "grad_norm": 1.007890224456787,
-      "learning_rate": 4e-05,
-      "loss": 3.2903,
       "step": 14
     },
     {
-      "epoch": 0.0037460198539052258,
-      "grad_norm": 0.8465790152549744,
-      "learning_rate": 4.2857142857142856e-05,
-      "loss": 3.0649,
       "step": 15
     },
     {
-      "epoch": 0.003995754510832241,
-      "grad_norm": 0.8478276133537292,
-      "learning_rate": 4.5714285714285716e-05,
-      "loss": 3.1391,
       "step": 16
     },
     {
-      "epoch": 0.004245489167759256,
-      "grad_norm": 0.7651631236076355,
-      "learning_rate": 4.8571428571428576e-05,
-      "loss": 3.1899,
       "step": 17
     },
     {
-      "epoch": 0.004495223824686271,
-      "grad_norm": 0.7943688631057739,
-      "learning_rate": 5.142857142857143e-05,
-      "loss": 3.051,
       "step": 18
     },
     {
-      "epoch": 0.004744958481613286,
-      "grad_norm": 0.7817432284355164,
-      "learning_rate": 5.428571428571428e-05,
-      "loss": 3.0732,
       "step": 19
     },
     {
-      "epoch": 0.004994693138540301,
-      "grad_norm": 0.9154627919197083,
-      "learning_rate": 5.714285714285714e-05,
-      "loss": 3.1392,
       "step": 20
     },
     {
-      "epoch": 0.005244427795467316,
-      "grad_norm": 0.9029829502105713,
-      "learning_rate": 6e-05,
-      "loss": 3.21,
       "step": 21
     },
     {
-      "epoch": 0.005494162452394331,
-      "grad_norm": 0.8615948557853699,
-      "learning_rate": 6.285714285714286e-05,
-      "loss": 2.9363,
       "step": 22
     },
     {
-      "epoch": 0.005743897109321346,
-      "grad_norm": 0.8935572504997253,
-      "learning_rate": 6.571428571428571e-05,
-      "loss": 2.8907,
       "step": 23
     },
     {
-      "epoch": 0.005993631766248361,
-      "grad_norm": 0.7808010578155518,
-      "learning_rate": 6.857142857142858e-05,
-      "loss": 3.0218,
       "step": 24
     },
     {
-      "epoch": 0.006243366423175376,
-      "grad_norm": 0.7767881751060486,
-      "learning_rate": 7.142857142857143e-05,
-      "loss": 2.981,
       "step": 25
     },
     {
-      "epoch": 0.006243366423175376,
-      "eval_loss": 3.0071640014648438,
-      "eval_runtime": 2.6827,
-      "eval_samples_per_second": 18.638,
-      "eval_steps_per_second": 2.982,
       "step": 25
     },
     {
-      "epoch": 0.006493101080102391,
-      "grad_norm": 0.9288599491119385,
-      "learning_rate": 7.428571428571429e-05,
-      "loss": 2.9497,
       "step": 26
     },
     {
-      "epoch": 0.006742835737029406,
-      "grad_norm": 0.7947930097579956,
-      "learning_rate": 7.714285714285715e-05,
-      "loss": 2.8892,
       "step": 27
     },
     {
-      "epoch": 0.006992570393956421,
-      "grad_norm": 0.9469573497772217,
-      "learning_rate": 8e-05,
-      "loss": 3.2229,
       "step": 28
     },
     {
-      "epoch": 0.0072423050508834364,
-      "grad_norm": 0.820366382598877,
-      "learning_rate": 8.285714285714287e-05,
-      "loss": 2.8336,
       "step": 29
     },
     {
-      "epoch": 0.0074920397078104516,
-      "grad_norm": 0.7744409441947937,
-      "learning_rate": 8.571428571428571e-05,
-      "loss": 2.7295,
       "step": 30
     },
     {
-      "epoch": 0.007741774364737467,
-      "grad_norm": 0.890326201915741,
-      "learning_rate": 8.857142857142857e-05,
-      "loss": 2.8798,
       "step": 31
     },
     {
-      "epoch": 0.007991509021664482,
-      "grad_norm": 0.9005088210105896,
-      "learning_rate": 9.142857142857143e-05,
-      "loss": 2.9725,
       "step": 32
     },
     {
-      "epoch": 0.008241243678591496,
-      "grad_norm": 0.9345659017562866,
-      "learning_rate": 9.428571428571429e-05,
-      "loss": 2.9779,
       "step": 33
     },
     {
-      "epoch": 0.008490978335518512,
-      "grad_norm": 0.8667367100715637,
-      "learning_rate": 9.714285714285715e-05,
-      "loss": 2.7893,
       "step": 34
     },
     {
-      "epoch": 0.008740712992445526,
-      "grad_norm": 0.8754308819770813,
-      "learning_rate": 0.0001,
-      "loss": 2.9642,
       "step": 35
     },
     {
-      "epoch": 0.008990447649372542,
-      "grad_norm": 0.9180012941360474,
-      "learning_rate": 0.00010285714285714286,
-      "loss": 2.7238,
       "step": 36
     },
     {
-      "epoch": 0.009240182306299556,
-      "grad_norm": 0.8926249146461487,
-      "learning_rate": 0.00010571428571428572,
-      "loss": 2.7661,
       "step": 37
     },
     {
-      "epoch": 0.009489916963226572,
-      "grad_norm": 0.8704882264137268,
-      "learning_rate": 0.00010857142857142856,
-      "loss": 2.9965,
       "step": 38
     },
     {
-      "epoch": 0.009739651620153587,
-      "grad_norm": 0.8835905194282532,
-      "learning_rate": 0.00011142857142857144,
-      "loss": 2.7978,
       "step": 39
     },
     {
-      "epoch": 0.009989386277080603,
-      "grad_norm": 0.8821542263031006,
-      "learning_rate": 0.00011428571428571428,
-      "loss": 2.8393,
       "step": 40
     },
     {
-      "epoch": 0.010239120934007617,
-      "grad_norm": 1.0209946632385254,
-      "learning_rate": 0.00011714285714285715,
-      "loss": 2.9775,
       "step": 41
     },
     {
-      "epoch": 0.010488855590934631,
-      "grad_norm": 0.8903468251228333,
-      "learning_rate": 0.00012,
-      "loss": 2.8332,
       "step": 42
     },
     {
-      "epoch": 0.010738590247861647,
-      "grad_norm": 1.0161865949630737,
-      "learning_rate": 0.00012285714285714287,
-      "loss": 2.9317,
       "step": 43
     },
     {
-      "epoch": 0.010988324904788661,
-      "grad_norm": 0.9254084229469299,
-      "learning_rate": 0.00012571428571428572,
-      "loss": 2.8001,
       "step": 44
     },
     {
-      "epoch": 0.011238059561715677,
-      "grad_norm": 0.8981170654296875,
-      "learning_rate": 0.00012857142857142858,
-      "loss": 3.0302,
       "step": 45
     },
     {
-      "epoch": 0.011487794218642692,
-      "grad_norm": 0.9837843775749207,
-      "learning_rate": 0.00013142857142857143,
-      "loss": 2.6885,
       "step": 46
     },
     {
-      "epoch": 0.011737528875569708,
-      "grad_norm": 0.9914678335189819,
-      "learning_rate": 0.00013428571428571428,
-      "loss": 2.9183,
       "step": 47
     },
     {
-      "epoch": 0.011987263532496722,
-      "grad_norm": 1.0084929466247559,
-      "learning_rate": 0.00013714285714285716,
-      "loss": 2.8724,
       "step": 48
     },
     {
-      "epoch": 0.012236998189423738,
-      "grad_norm": 1.538197636604309,
-      "learning_rate": 0.00014,
-      "loss": 2.7356,
       "step": 49
     },
     {
-      "epoch": 0.012486732846350752,
-      "grad_norm": 2.431025743484497,
-      "learning_rate": 0.00014285714285714287,
-      "loss": 2.8119,
       "step": 50
     },
     {
-      "epoch": 0.012486732846350752,
-      "eval_loss": 2.811203956604004,
-      "eval_runtime": 2.6823,
-      "eval_samples_per_second": 18.641,
-      "eval_steps_per_second": 2.983,
       "step": 50
-    },
-    {
-      "epoch": 0.012736467503277768,
-      "grad_norm": 1.0722112655639648,
-      "learning_rate": 0.00014571428571428572,
-      "loss": 3.1445,
-      "step": 51
-    },
-    {
-      "epoch": 0.012986202160204782,
-      "grad_norm": 1.0569753646850586,
-      "learning_rate": 0.00014857142857142857,
-      "loss": 2.9575,
-      "step": 52
-    },
-    {
-      "epoch": 0.013235936817131798,
-      "grad_norm": 0.8477814197540283,
-      "learning_rate": 0.00015142857142857143,
-      "loss": 2.9096,
-      "step": 53
-    },
-    {
-      "epoch": 0.013485671474058812,
-      "grad_norm": 0.7475413084030151,
-      "learning_rate": 0.0001542857142857143,
-      "loss": 2.7139,
-      "step": 54
-    },
-    {
-      "epoch": 0.013735406130985827,
-      "grad_norm": 0.6939762234687805,
-      "learning_rate": 0.00015714285714285716,
-      "loss": 2.9446,
-      "step": 55
-    },
-    {
-      "epoch": 0.013985140787912843,
-      "grad_norm": 0.772847056388855,
-      "learning_rate": 0.00016,
-      "loss": 2.7692,
-      "step": 56
-    },
-    {
-      "epoch": 0.014234875444839857,
-      "grad_norm": 0.7479884028434753,
-      "learning_rate": 0.00016285714285714287,
-      "loss": 2.7092,
-      "step": 57
-    },
-    {
-      "epoch": 0.014484610101766873,
-      "grad_norm": 0.7816758155822754,
-      "learning_rate": 0.00016571428571428575,
-      "loss": 2.8473,
-      "step": 58
-    },
-    {
-      "epoch": 0.014734344758693887,
-      "grad_norm": 0.7092522382736206,
-      "learning_rate": 0.00016857142857142857,
-      "loss": 2.706,
-      "step": 59
-    },
-    {
-      "epoch": 0.014984079415620903,
-      "grad_norm": 0.6997970938682556,
-      "learning_rate": 0.00017142857142857143,
-      "loss": 2.8594,
-      "step": 60
-    },
-    {
-      "epoch": 0.015233814072547917,
-      "grad_norm": 0.654046356678009,
-      "learning_rate": 0.0001742857142857143,
-      "loss": 2.7968,
-      "step": 61
-    },
-    {
-      "epoch": 0.015483548729474933,
-      "grad_norm": 0.6976904273033142,
-      "learning_rate": 0.00017714285714285713,
-      "loss": 2.8346,
-      "step": 62
-    },
-    {
-      "epoch": 0.01573328338640195,
-      "grad_norm": 0.7366370558738708,
-      "learning_rate": 0.00018,
-      "loss": 2.8191,
-      "step": 63
-    },
-    {
-      "epoch": 0.015983018043328964,
-      "grad_norm": 0.7815442085266113,
-      "learning_rate": 0.00018285714285714286,
-      "loss": 2.8074,
-      "step": 64
-    },
-    {
-      "epoch": 0.016232752700255978,
-      "grad_norm": 0.7780041694641113,
-      "learning_rate": 0.00018571428571428572,
-      "loss": 2.7443,
-      "step": 65
-    },
-    {
-      "epoch": 0.016482487357182992,
-      "grad_norm": 0.6779807806015015,
-      "learning_rate": 0.00018857142857142857,
-      "loss": 2.6378,
-      "step": 66
-    },
-    {
-      "epoch": 0.01673222201411001,
-      "grad_norm": 0.6851223111152649,
-      "learning_rate": 0.00019142857142857145,
-      "loss": 2.4737,
-      "step": 67
-    },
-    {
-      "epoch": 0.016981956671037024,
-      "grad_norm": 0.741107702255249,
-      "learning_rate": 0.0001942857142857143,
-      "loss": 2.708,
-      "step": 68
-    },
-    {
-      "epoch": 0.017231691327964038,
-      "grad_norm": 0.7790065407752991,
-      "learning_rate": 0.00019714285714285716,
-      "loss": 2.7973,
-      "step": 69
-    },
-    {
-      "epoch": 0.017481425984891052,
-      "grad_norm": 0.8207802176475525,
-      "learning_rate": 0.0002,
-      "loss": 2.7328,
-      "step": 70
-    },
-    {
-      "epoch": 0.017731160641818067,
-      "grad_norm": 0.8045083284378052,
-      "learning_rate": 0.00019999984264028182,
-      "loss": 2.6503,
-      "step": 71
-    },
-    {
-      "epoch": 0.017980895298745084,
-      "grad_norm": 0.8912211060523987,
-      "learning_rate": 0.0001999993705616775,
-      "loss": 2.6638,
-      "step": 72
-    },
-    {
-      "epoch": 0.0182306299556721,
-      "grad_norm": 0.8193833231925964,
-      "learning_rate": 0.0001999985837658379,
-      "loss": 2.3789,
-      "step": 73
-    },
-    {
-      "epoch": 0.018480364612599113,
-      "grad_norm": 0.8749593496322632,
-      "learning_rate": 0.0001999974822555143,
-      "loss": 2.6006,
-      "step": 74
-    },
-    {
-      "epoch": 0.018730099269526127,
-      "grad_norm": 0.911247730255127,
-      "learning_rate": 0.00019999606603455857,
-      "loss": 2.6721,
-      "step": 75
-    },
-    {
-      "epoch": 0.018730099269526127,
-      "eval_loss": 2.6788198947906494,
-      "eval_runtime": 2.6857,
-      "eval_samples_per_second": 18.617,
-      "eval_steps_per_second": 2.979,
-      "step": 75
-    },
-    {
-      "epoch": 0.018979833926453145,
-      "grad_norm": 0.9761448502540588,
-      "learning_rate": 0.00019999433510792307,
-      "loss": 2.7212,
-      "step": 76
-    },
-    {
-      "epoch": 0.01922956858338016,
-      "grad_norm": 0.7784520983695984,
-      "learning_rate": 0.00019999228948166064,
-      "loss": 2.7151,
-      "step": 77
-    },
-    {
-      "epoch": 0.019479303240307173,
-      "grad_norm": 0.8097946047782898,
-      "learning_rate": 0.00019998992916292463,
-      "loss": 2.4256,
-      "step": 78
-    },
-    {
-      "epoch": 0.019729037897234188,
-      "grad_norm": 0.8229548931121826,
-      "learning_rate": 0.00019998725415996875,
-      "loss": 2.482,
-      "step": 79
-    },
-    {
-      "epoch": 0.019978772554161205,
-      "grad_norm": 0.8396543860435486,
-      "learning_rate": 0.00019998426448214718,
-      "loss": 2.5676,
-      "step": 80
-    },
-    {
-      "epoch": 0.02022850721108822,
-      "grad_norm": 0.84672611951828,
-      "learning_rate": 0.0001999809601399145,
-      "loss": 2.4699,
-      "step": 81
-    },
-    {
-      "epoch": 0.020478241868015234,
-      "grad_norm": 0.8323167562484741,
-      "learning_rate": 0.0001999773411448256,
-      "loss": 2.6894,
-      "step": 82
-    },
-    {
-      "epoch": 0.020727976524942248,
-      "grad_norm": 0.9671497344970703,
-      "learning_rate": 0.00019997340750953566,
-      "loss": 2.6653,
-      "step": 83
-    },
-    {
-      "epoch": 0.020977711181869262,
-      "grad_norm": 1.0757114887237549,
-      "learning_rate": 0.00019996915924780015,
-      "loss": 2.6165,
-      "step": 84
-    },
-    {
-      "epoch": 0.02122744583879628,
-      "grad_norm": 0.8748140335083008,
-      "learning_rate": 0.00019996459637447477,
-      "loss": 2.582,
-      "step": 85
-    },
-    {
-      "epoch": 0.021477180495723294,
-      "grad_norm": 0.8593396544456482,
-      "learning_rate": 0.0001999597189055153,
-      "loss": 2.4936,
-      "step": 86
-    },
-    {
-      "epoch": 0.02172691515265031,
-      "grad_norm": 0.933760941028595,
-      "learning_rate": 0.00019995452685797773,
-      "loss": 2.8435,
-      "step": 87
-    },
-    {
-      "epoch": 0.021976649809577323,
-      "grad_norm": 0.8785303235054016,
-      "learning_rate": 0.00019994902025001802,
-      "loss": 2.4451,
-      "step": 88
-    },
-    {
-      "epoch": 0.02222638446650434,
-      "grad_norm": 0.8894078731536865,
-      "learning_rate": 0.0001999431991008921,
-      "loss": 2.5532,
-      "step": 89
-    },
-    {
-      "epoch": 0.022476119123431355,
-      "grad_norm": 0.9705137610435486,
-      "learning_rate": 0.00019993706343095588,
-      "loss": 2.7539,
-      "step": 90
-    },
-    {
-      "epoch": 0.02272585378035837,
-      "grad_norm": 1.0564568042755127,
-      "learning_rate": 0.0001999306132616651,
-      "loss": 2.7694,
-      "step": 91
-    },
-    {
-      "epoch": 0.022975588437285383,
-      "grad_norm": 1.0100560188293457,
-      "learning_rate": 0.00019992384861557515,
-      "loss": 2.6056,
-      "step": 92
-    },
-    {
-      "epoch": 0.0232253230942124,
-      "grad_norm": 1.0753816366195679,
-      "learning_rate": 0.00019991676951634132,
-      "loss": 2.4921,
-      "step": 93
-    },
-    {
-      "epoch": 0.023475057751139415,
-      "grad_norm": 0.9264618158340454,
-      "learning_rate": 0.00019990937598871834,
-      "loss": 2.6055,
-      "step": 94
-    },
-    {
-      "epoch": 0.02372479240806643,
-      "grad_norm": 1.0126386880874634,
-      "learning_rate": 0.00019990166805856048,
-      "loss": 2.4358,
-      "step": 95
-    },
-    {
-      "epoch": 0.023974527064993444,
-      "grad_norm": 0.9737821817398071,
-      "learning_rate": 0.0001998936457528215,
-      "loss": 2.7028,
-      "step": 96
-    },
-    {
-      "epoch": 0.024224261721920458,
-      "grad_norm": 1.0489351749420166,
-      "learning_rate": 0.00019988530909955448,
-      "loss": 2.6114,
-      "step": 97
-    },
-    {
-      "epoch": 0.024473996378847476,
-      "grad_norm": 1.1334600448608398,
-      "learning_rate": 0.00019987665812791166,
-      "loss": 2.8804,
-      "step": 98
-    },
-    {
-      "epoch": 0.02472373103577449,
-      "grad_norm": 1.2668412923812866,
-      "learning_rate": 0.0001998676928681445,
-      "loss": 2.9214,
-      "step": 99
-    },
-    {
-      "epoch": 0.024973465692701504,
-      "grad_norm": 2.203848361968994,
-      "learning_rate": 0.0001998584133516035,
-      "loss": 2.9326,
-      "step": 100
-    },
-    {
-      "epoch": 0.024973465692701504,
-      "eval_loss": 2.6431591510772705,
-      "eval_runtime": 2.6774,
-      "eval_samples_per_second": 18.675,
-      "eval_steps_per_second": 2.988,
-      "step": 100
-    },
-    {
-      "epoch": 0.02522320034962852,
-      "grad_norm": 0.9147284030914307,
-      "learning_rate": 0.00019984881961073798,
-      "loss": 2.6948,
-      "step": 101
-    },
-    {
-      "epoch": 0.025472935006555536,
-      "grad_norm": 0.8852336406707764,
-      "learning_rate": 0.00019983891167909616,
-      "loss": 2.5529,
-      "step": 102
-    },
-    {
-      "epoch": 0.02572266966348255,
-      "grad_norm": 0.7818748354911804,
-      "learning_rate": 0.00019982868959132492,
-      "loss": 2.7862,
-      "step": 103
-    },
-    {
-      "epoch": 0.025972404320409564,
-      "grad_norm": 0.6491966247558594,
-      "learning_rate": 0.00019981815338316968,
-      "loss": 2.6352,
-      "step": 104
-    },
-    {
-      "epoch": 0.02622213897733658,
-      "grad_norm": 0.6427002549171448,
-      "learning_rate": 0.00019980730309147434,
-      "loss": 2.7408,
-      "step": 105
-    },
-    {
-      "epoch": 0.026471873634263596,
-      "grad_norm": 0.6891926527023315,
-      "learning_rate": 0.00019979613875418107,
-      "loss": 2.6471,
-      "step": 106
-    },
-    {
-      "epoch": 0.02672160829119061,
-      "grad_norm": 0.8082403540611267,
-      "learning_rate": 0.00019978466041033026,
-      "loss": 2.5825,
-      "step": 107
-    },
-    {
-      "epoch": 0.026971342948117625,
-      "grad_norm": 0.7740591764450073,
-      "learning_rate": 0.00019977286810006034,
-      "loss": 2.9007,
-      "step": 108
-    },
-    {
-      "epoch": 0.02722107760504464,
-      "grad_norm": 0.7396791577339172,
-      "learning_rate": 0.00019976076186460764,
-      "loss": 2.6945,
-      "step": 109
-    },
-    {
-      "epoch": 0.027470812261971653,
-      "grad_norm": 0.6720578670501709,
-      "learning_rate": 0.00019974834174630622,
-      "loss": 2.7139,
-      "step": 110
-    },
-    {
-      "epoch": 0.02772054691889867,
-      "grad_norm": 0.8105450868606567,
-      "learning_rate": 0.0001997356077885878,
-      "loss": 2.7112,
-      "step": 111
-    },
-    {
-      "epoch": 0.027970281575825685,
-      "grad_norm": 0.7144357562065125,
-      "learning_rate": 0.00019972256003598153,
-      "loss": 2.3851,
-      "step": 112
-    },
-    {
-      "epoch": 0.0282200162327527,
-      "grad_norm": 0.7595510482788086,
-      "learning_rate": 0.00019970919853411385,
-      "loss": 2.6396,
-      "step": 113
-    },
-    {
-      "epoch": 0.028469750889679714,
-      "grad_norm": 0.7179970145225525,
-      "learning_rate": 0.0001996955233297084,
-      "loss": 2.3639,
-      "step": 114
-    },
-    {
-      "epoch": 0.02871948554660673,
-      "grad_norm": 0.7306939959526062,
-      "learning_rate": 0.00019968153447058576,
-      "loss": 2.5145,
-      "step": 115
-    },
-    {
-      "epoch": 0.028969220203533746,
-      "grad_norm": 0.7519063353538513,
-      "learning_rate": 0.00019966723200566327,
-      "loss": 2.4479,
-      "step": 116
-    },
-    {
-      "epoch": 0.02921895486046076,
-      "grad_norm": 0.8461882472038269,
-      "learning_rate": 0.00019965261598495502,
-      "loss": 2.5828,
-      "step": 117
-    },
-    {
-      "epoch": 0.029468689517387774,
-      "grad_norm": 0.7643318772315979,
-      "learning_rate": 0.00019963768645957152,
-      "loss": 2.5821,
-      "step": 118
-    },
-    {
-      "epoch": 0.029718424174314792,
-      "grad_norm": 0.7931061387062073,
-      "learning_rate": 0.00019962244348171958,
-      "loss": 2.5467,
-      "step": 119
-    },
-    {
-      "epoch": 0.029968158831241806,
-      "grad_norm": 0.7353018522262573,
-      "learning_rate": 0.00019960688710470205,
-      "loss": 2.5064,
-      "step": 120
-    },
-    {
-      "epoch": 0.03021789348816882,
-      "grad_norm": 0.7961385846138,
-      "learning_rate": 0.0001995910173829178,
-      "loss": 2.2525,
-      "step": 121
-    },
-    {
-      "epoch": 0.030467628145095835,
-      "grad_norm": 0.8227445483207703,
-      "learning_rate": 0.00019957483437186137,
-      "loss": 2.3514,
-      "step": 122
-    },
-    {
-      "epoch": 0.030717362802022852,
-      "grad_norm": 0.8792641758918762,
-      "learning_rate": 0.0001995583381281229,
-      "loss": 2.6704,
-      "step": 123
-    },
-    {
-      "epoch": 0.030967097458949867,
-      "grad_norm": 0.8991889357566833,
-      "learning_rate": 0.0001995415287093877,
-      "loss": 2.7915,
-      "step": 124
-    },
-    {
-      "epoch": 0.03121683211587688,
-      "grad_norm": 0.8456166982650757,
-      "learning_rate": 0.00019952440617443647,
-      "loss": 2.5482,
-      "step": 125
-    },
-    {
-      "epoch": 0.03121683211587688,
-      "eval_loss": 2.5769076347351074,
-      "eval_runtime": 2.6825,
-      "eval_samples_per_second": 18.639,
-      "eval_steps_per_second": 2.982,
-      "step": 125
-    },
-    {
-      "epoch": 0.0314665667728039,
-      "grad_norm": 0.9480443000793457,
-      "learning_rate": 0.0001995069705831446,
-      "loss": 2.6215,
-      "step": 126
-    },
-    {
-      "epoch": 0.03171630142973091,
-      "grad_norm": 0.8263009786605835,
-      "learning_rate": 0.00019948922199648235,
-      "loss": 2.5919,
-      "step": 127
-    },
-    {
-      "epoch": 0.03196603608665793,
-      "grad_norm": 0.7829933166503906,
-      "learning_rate": 0.00019947116047651448,
-      "loss": 2.4247,
-      "step": 128
-    },
-    {
-      "epoch": 0.03221577074358494,
-      "grad_norm": 0.7913976311683655,
-      "learning_rate": 0.00019945278608639994,
-      "loss": 2.5516,
-      "step": 129
-    },
-    {
-      "epoch": 0.032465505400511956,
-      "grad_norm": 0.8564252257347107,
-      "learning_rate": 0.00019943409889039188,
-      "loss": 2.3555,
-      "step": 130
-    },
-    {
-      "epoch": 0.03271524005743897,
-      "grad_norm": 0.809696614742279,
-      "learning_rate": 0.0001994150989538371,
-      "loss": 2.4586,
-      "step": 131
-    },
-    {
-      "epoch": 0.032964974714365984,
-      "grad_norm": 0.8882852792739868,
-      "learning_rate": 0.0001993957863431763,
-      "loss": 2.459,
-      "step": 132
-    },
-    {
-      "epoch": 0.033214709371293,
-      "grad_norm": 0.9187607765197754,
-      "learning_rate": 0.0001993761611259434,
-      "loss": 2.6238,
-      "step": 133
-    },
-    {
-      "epoch": 0.03346444402822002,
-      "grad_norm": 0.8577990531921387,
-      "learning_rate": 0.00019935622337076536,
-      "loss": 2.5431,
-      "step": 134
-    },
-    {
-      "epoch": 0.033714178685147034,
-      "grad_norm": 0.8696761727333069,
-      "learning_rate": 0.00019933597314736228,
-      "loss": 2.5734,
-      "step": 135
-    },
-    {
-      "epoch": 0.03396391334207405,
-      "grad_norm": 0.828972578048706,
-      "learning_rate": 0.0001993154105265468,
-      "loss": 2.6162,
-      "step": 136
-    },
-    {
-      "epoch": 0.03421364799900106,
-      "grad_norm": 0.9171616435050964,
-      "learning_rate": 0.00019929453558022392,
-      "loss": 2.4986,
-      "step": 137
-    },
-    {
-      "epoch": 0.034463382655928076,
-      "grad_norm": 0.8592952489852905,
-      "learning_rate": 0.00019927334838139098,
-      "loss": 2.4374,
-      "step": 138
-    },
-    {
-      "epoch": 0.03471311731285509,
-      "grad_norm": 0.969752311706543,
-      "learning_rate": 0.00019925184900413705,
-      "loss": 2.3063,
-      "step": 139
-    },
-    {
-      "epoch": 0.034962851969782105,
-      "grad_norm": 0.863645076751709,
-      "learning_rate": 0.00019923003752364297,
-      "loss": 2.3734,
-      "step": 140
-    },
-    {
-      "epoch": 0.03521258662670912,
-      "grad_norm": 1.038589596748352,
-      "learning_rate": 0.00019920791401618088,
-      "loss": 2.6056,
-      "step": 141
-    },
-    {
-      "epoch": 0.03546232128363613,
-      "grad_norm": 1.0344328880310059,
-      "learning_rate": 0.00019918547855911413,
-      "loss": 2.5048,
-      "step": 142
-    },
-    {
-      "epoch": 0.035712055940563155,
-      "grad_norm": 0.883296012878418,
-      "learning_rate": 0.0001991627312308969,
-      "loss": 2.4579,
-      "step": 143
-    },
-    {
-      "epoch": 0.03596179059749017,
-      "grad_norm": 0.9329782724380493,
-      "learning_rate": 0.00019913967211107386,
-      "loss": 2.4917,
-      "step": 144
-    },
-    {
-      "epoch": 0.03621152525441718,
-      "grad_norm": 0.9287551641464233,
-      "learning_rate": 0.00019911630128028012,
-      "loss": 2.648,
-      "step": 145
-    },
-    {
-      "epoch": 0.0364612599113442,
-      "grad_norm": 0.9368601441383362,
-      "learning_rate": 0.00019909261882024065,
-      "loss": 2.4704,
-      "step": 146
-    },
-    {
-      "epoch": 0.03671099456827121,
-      "grad_norm": 0.9918820261955261,
-      "learning_rate": 0.00019906862481377033,
-      "loss": 2.3632,
-      "step": 147
-    },
-    {
-      "epoch": 0.036960729225198226,
-      "grad_norm": 1.0322192907333374,
-      "learning_rate": 0.0001990443193447733,
-      "loss": 2.5039,
-      "step": 148
-    },
-    {
-      "epoch": 0.03721046388212524,
-      "grad_norm": 1.307169795036316,
-      "learning_rate": 0.000199019702498243,
-      "loss": 2.483,
-      "step": 149
-    },
-    {
-      "epoch": 0.037460198539052254,
-      "grad_norm": 2.3513553142547607,
-      "learning_rate": 0.00019899477436026157,
-      "loss": 2.3276,
-      "step": 150
-    },
-    {
-      "epoch": 0.037460198539052254,
-      "eval_loss": 2.570774555206299,
-      "eval_runtime": 2.6997,
-      "eval_samples_per_second": 18.521,
-      "eval_steps_per_second": 2.963,
-      "step": 150
     }
   ],
   "logging_steps": 1,
-  "max_steps": 1750,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 50,
@@ -1141,8 +409,8 @@
       "attributes": {}
     }
   },
-  "total_flos": 5.089011062695526e+16,
-  "train_batch_size": 7,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 2.556696653366089,
+  "best_model_checkpoint": "miner_id_24/checkpoint-50",
+  "epoch": 0.014271060964188556,
   "eval_steps": 25,
+  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
   "log_history": [
     {
+      "epoch": 0.0002854212192837711,
+      "grad_norm": 0.9852302670478821,
+      "learning_rate": 5.7142857142857145e-06,
+      "loss": 3.0904,
       "step": 1
     },
     {
+      "epoch": 0.0002854212192837711,
+      "eval_loss": 3.2887184619903564,
+      "eval_runtime": 6.5686,
+      "eval_samples_per_second": 7.612,
+      "eval_steps_per_second": 7.612,
       "step": 1
     },
     {
+      "epoch": 0.0005708424385675422,
+      "grad_norm": 0.8380288481712341,
+      "learning_rate": 1.1428571428571429e-05,
+      "loss": 3.1387,
       "step": 2
     },
     {
+      "epoch": 0.0008562636578513134,
+      "grad_norm": 0.8498082160949707,
+      "learning_rate": 1.7142857142857145e-05,
+      "loss": 3.1155,
       "step": 3
     },
     {
+      "epoch": 0.0011416848771350844,
+      "grad_norm": 0.932189404964447,
+      "learning_rate": 2.2857142857142858e-05,
+      "loss": 3.1839,
       "step": 4
     },
     {
+      "epoch": 0.0014271060964188556,
+      "grad_norm": 0.8401004076004028,
+      "learning_rate": 2.857142857142857e-05,
+      "loss": 3.149,
       "step": 5
     },
     {
+      "epoch": 0.0017125273157026267,
+      "grad_norm": 0.8718746900558472,
+      "learning_rate": 3.428571428571429e-05,
+      "loss": 3.1855,
       "step": 6
     },
     {
+      "epoch": 0.001997948534986398,
+      "grad_norm": 0.8892223238945007,
+      "learning_rate": 4e-05,
+      "loss": 3.0531,
       "step": 7
     },
     {
+      "epoch": 0.002283369754270169,
+      "grad_norm": 0.8379232287406921,
+      "learning_rate": 4.5714285714285716e-05,
+      "loss": 3.042,
       "step": 8
     },
     {
+      "epoch": 0.00256879097355394,
+      "grad_norm": 0.8372194170951843,
+      "learning_rate": 5.142857142857143e-05,
+      "loss": 2.9784,
       "step": 9
     },
     {
+      "epoch": 0.002854212192837711,
+      "grad_norm": 0.7960942983627319,
+      "learning_rate": 5.714285714285714e-05,
+      "loss": 3.1879,
       "step": 10
     },
     {
+      "epoch": 0.0031396334121214825,
+      "grad_norm": 0.8907091021537781,
+      "learning_rate": 6.285714285714286e-05,
+      "loss": 3.0808,
       "step": 11
     },
     {
+      "epoch": 0.0034250546314052535,
+      "grad_norm": 0.6043815016746521,
+      "learning_rate": 6.857142857142858e-05,
+      "loss": 2.9428,
       "step": 12
     },
     {
+      "epoch": 0.003710475850689025,
+      "grad_norm": 0.564944863319397,
+      "learning_rate": 7.428571428571429e-05,
+      "loss": 3.0157,
       "step": 13
     },
     {
+      "epoch": 0.003995897069972796,
+      "grad_norm": 0.5410389304161072,
+      "learning_rate": 8e-05,
+      "loss": 2.9157,
       "step": 14
     },
     {
+      "epoch": 0.004281318289256567,
+      "grad_norm": 0.5174809098243713,
+      "learning_rate": 8.571428571428571e-05,
+      "loss": 2.7908,
       "step": 15
     },
     {
+      "epoch": 0.004566739508540338,
+      "grad_norm": 0.5930260419845581,
+      "learning_rate": 9.142857142857143e-05,
+      "loss": 2.8472,
       "step": 16
     },
     {
+      "epoch": 0.0048521607278241095,
+      "grad_norm": 0.6121829152107239,
+      "learning_rate": 9.714285714285715e-05,
+      "loss": 2.8535,
       "step": 17
     },
     {
+      "epoch": 0.00513758194710788,
+      "grad_norm": 0.5915908813476562,
+      "learning_rate": 0.00010285714285714286,
+      "loss": 2.6747,
       "step": 18
     },
     {
+      "epoch": 0.005423003166391651,
+      "grad_norm": 0.5632700324058533,
+      "learning_rate": 0.00010857142857142856,
+      "loss": 2.6425,
       "step": 19
     },
     {
+      "epoch": 0.005708424385675422,
+      "grad_norm": 0.5865013599395752,
+      "learning_rate": 0.00011428571428571428,
+      "loss": 2.8892,
       "step": 20
     },
     {
+      "epoch": 0.005993845604959194,
+      "grad_norm": 0.6056029796600342,
+      "learning_rate": 0.00012,
+      "loss": 2.9813,
       "step": 21
     },
     {
+      "epoch": 0.006279266824242965,
+      "grad_norm": 0.5435022711753845,
+      "learning_rate": 0.00012571428571428572,
+      "loss": 2.6502,
       "step": 22
     },
     {
+      "epoch": 0.006564688043526736,
+      "grad_norm": 0.5630282163619995,
+      "learning_rate": 0.00013142857142857143,
+      "loss": 2.6238,
       "step": 23
     },
     {
+      "epoch": 0.006850109262810507,
+      "grad_norm": 0.6111990213394165,
+      "learning_rate": 0.00013714285714285716,
+      "loss": 2.7437,
       "step": 24
     },
     {
+      "epoch": 0.007135530482094278,
+      "grad_norm": 0.5657336115837097,
+      "learning_rate": 0.00014285714285714287,
+      "loss": 2.6898,
       "step": 25
     },
     {
+      "epoch": 0.007135530482094278,
+      "eval_loss": 2.7006003856658936,
+      "eval_runtime": 6.6882,
+      "eval_samples_per_second": 7.476,
+      "eval_steps_per_second": 7.476,
       "step": 25
     },
     {
+      "epoch": 0.00742095170137805,
+      "grad_norm": 0.5724318623542786,
+      "learning_rate": 0.00014857142857142857,
+      "loss": 2.6936,
       "step": 26
     },
     {
+      "epoch": 0.007706372920661821,
+      "grad_norm": 0.47816792130470276,
+      "learning_rate": 0.0001542857142857143,
+      "loss": 2.6312,
       "step": 27
     },
     {
+      "epoch": 0.007991794139945592,
+      "grad_norm": 0.5189191102981567,
+      "learning_rate": 0.00016,
+      "loss": 2.8591,
       "step": 28
     },
     {
+      "epoch": 0.008277215359229362,
+      "grad_norm": 0.5170201063156128,
+      "learning_rate": 0.00016571428571428575,
+      "loss": 2.5753,
       "step": 29
     },
     {
+      "epoch": 0.008562636578513133,
+      "grad_norm": 0.4780126214027405,
+      "learning_rate": 0.00017142857142857143,
+      "loss": 2.5154,
       "step": 30
     },
     {
+      "epoch": 0.008848057797796904,
+      "grad_norm": 0.46953848004341125,
+      "learning_rate": 0.00017714285714285713,
+      "loss": 2.5638,
       "step": 31
     },
     {
+      "epoch": 0.009133479017080675,
+      "grad_norm": 0.5048589706420898,
+      "learning_rate": 0.00018285714285714286,
+      "loss": 2.6244,
       "step": 32
     },
     {
+      "epoch": 0.009418900236364448,
+      "grad_norm": 0.5477012395858765,
+      "learning_rate": 0.00018857142857142857,
+      "loss": 2.7039,
       "step": 33
     },
     {
+      "epoch": 0.009704321455648219,
+      "grad_norm": 0.510737955570221,
+      "learning_rate": 0.0001942857142857143,
+      "loss": 2.6319,
       "step": 34
     },
     {
+      "epoch": 0.00998974267493199,
+      "grad_norm": 0.5701810121536255,
+      "learning_rate": 0.0002,
+      "loss": 2.6441,
       "step": 35
     },
     {
+      "epoch": 0.01027516389421576,
+      "grad_norm": 0.49409958720207214,
+      "learning_rate": 0.00019999938960115114,
+      "loss": 2.5378,
       "step": 36
     },
     {
+      "epoch": 0.010560585113499532,
+      "grad_norm": 0.5363864302635193,
+      "learning_rate": 0.0001999975584128843,
+      "loss": 2.4543,
       "step": 37
     },
     {
+      "epoch": 0.010846006332783303,
+      "grad_norm": 0.511552095413208,
+      "learning_rate": 0.00019999450646003843,
+      "loss": 2.6913,
       "step": 38
     },
     {
+      "epoch": 0.011131427552067074,
+      "grad_norm": 0.568918764591217,
+      "learning_rate": 0.0001999902337840116,
+      "loss": 2.5901,
       "step": 39
     },
     {
+      "epoch": 0.011416848771350845,
+      "grad_norm": 0.5946168899536133,
+      "learning_rate": 0.00019998474044276,
+      "loss": 2.6054,
       "step": 40
     },
     {
+      "epoch": 0.011702269990634616,
+      "grad_norm": 0.6025540232658386,
+      "learning_rate": 0.00019997802651079778,
+      "loss": 2.7296,
       "step": 41
     },
     {
+      "epoch": 0.011987691209918388,
+      "grad_norm": 0.589484453201294,
+      "learning_rate": 0.00019997009207919545,
+      "loss": 2.6412,
       "step": 42
     },
     {
+      "epoch": 0.01227311242920216,
+      "grad_norm": 0.6304107308387756,
+      "learning_rate": 0.00019996093725557898,
+      "loss": 2.6191,
       "step": 43
     },
     {
+      "epoch": 0.01255853364848593,
+      "grad_norm": 0.6326958537101746,
+      "learning_rate": 0.00019995056216412824,
+      "loss": 2.5749,
       "step": 44
     },
     {
+      "epoch": 0.012843954867769701,
+      "grad_norm": 0.6734350919723511,
+      "learning_rate": 0.0001999389669455753,
+      "loss": 2.7463,
       "step": 45
     },
     {
+      "epoch": 0.013129376087053472,
+      "grad_norm": 0.6757857203483582,
+      "learning_rate": 0.00019992615175720257,
+      "loss": 2.4429,
       "step": 46
     },
     {
+      "epoch": 0.013414797306337243,
+      "grad_norm": 0.687453031539917,
+      "learning_rate": 0.00019991211677284062,
+      "loss": 2.595,
       "step": 47
     },
     {
+      "epoch": 0.013700218525621014,
+      "grad_norm": 0.7763611078262329,
+      "learning_rate": 0.00019989686218286587,
+      "loss": 2.7578,
       "step": 48
     },
     {
+      "epoch": 0.013985639744904785,
+      "grad_norm": 0.9888719916343689,
+      "learning_rate": 0.00019988038819419797,
+      "loss": 2.5648,
       "step": 49
     },
     {
+      "epoch": 0.014271060964188556,
+      "grad_norm": 1.6425894498825073,
+      "learning_rate": 0.00019986269503029697,
+      "loss": 2.5132,
       "step": 50
     },
     {
+      "epoch": 0.014271060964188556,
+      "eval_loss": 2.556696653366089,
+      "eval_runtime": 6.6833,
+      "eval_samples_per_second": 7.481,
+      "eval_steps_per_second": 7.481,
       "step": 50
     }
   ],
   "logging_steps": 1,
+  "max_steps": 888,
   "num_input_tokens_seen": 0,
   "num_train_epochs": 1,
   "save_steps": 50,
       "attributes": {}
     }
   },
+  "total_flos": 5.593179412129382e+16,
+  "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null
 }

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:149f14fb729cab896091dbb95460bb98759373752ba272a6ae1f5cde47ccec9e
 size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:2c10884f4be4e91add9795afd80f7b25e35f322adac12d6b37b6d44aa702d50c
 size 6776