Training in progress, step 166, checkpoint

Browse files

Files changed (6) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +592 -3

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:183bec216f162855d8196fd7cf94fded4640ff06d48effb208c6796a7c31450f
 size 216151256

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9e7aff42b36fe14e95ece06193160112474b8a29fc3680ce273c922ca5686f6
 size 216151256

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:37f0da2f48deae95a8eac8e2ea0c5f79ebb6089ae60fae8a45e2a45959193cab
 size 432640054

 version https://git-lfs.github.com/spec/v1
+oid sha256:2e21f976a284dd81c64396ec6b6206079943029f7c09ac486e503562b06e47e6
 size 432640054

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:da219fe1bf032ad9359b76003d71096a223611f94927e798bf577253282a8180
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb3607b5839cda7054779e8f957cbf2db3456879873cc4e34eac04cbf33f5db8
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23f39055b0ed21e2804d587dafb8a5710bb91a89aeb68f9ee9a9bdecb4f6c223
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:fd635f6ad590a43a7a075b3fb4377adaa95cf2d835f115014607cf181d2b6449
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f2300eb0a85a826f84a38ebe148c80a476986dcda0381a18dc5644264e1ec5bb
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e3070c5337425657c2fec031251a5e4e8042c43dd7a5d4d7f77fa453b02282be
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.25009416195856876,
   "eval_steps": 83,
-  "global_step": 83,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -604,6 +604,595 @@
       "eval_samples_per_second": 6.015,
       "eval_steps_per_second": 1.506,
       "step": 83
     }
   ],
   "logging_steps": 1,
@@ -623,7 +1212,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.73439358382506e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5001883239171375,
   "eval_steps": 83,
+  "global_step": 166,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 6.015,
       "eval_steps_per_second": 1.506,
       "step": 83
+    },
+    {
+      "epoch": 0.25310734463276835,
+      "grad_norm": 1.4990551471710205,
+      "learning_rate": 9.643097751841854e-05,
+      "loss": 0.5189,
+      "step": 84
+    },
+    {
+      "epoch": 0.256120527306968,
+      "grad_norm": 1.4080356359481812,
+      "learning_rate": 9.622067162048112e-05,
+      "loss": 0.5112,
+      "step": 85
+    },
+    {
+      "epoch": 0.2591337099811676,
+      "grad_norm": 1.3661057949066162,
+      "learning_rate": 9.600458850350588e-05,
+      "loss": 0.4688,
+      "step": 86
+    },
+    {
+      "epoch": 0.2621468926553672,
+      "grad_norm": 1.570552945137024,
+      "learning_rate": 9.578275517617645e-05,
+      "loss": 0.5058,
+      "step": 87
+    },
+    {
+      "epoch": 0.26516007532956687,
+      "grad_norm": 1.6037708520889282,
+      "learning_rate": 9.555519936590738e-05,
+      "loss": 0.5201,
+      "step": 88
+    },
+    {
+      "epoch": 0.26817325800376646,
+      "grad_norm": 1.5268930196762085,
+      "learning_rate": 9.532194951537838e-05,
+      "loss": 0.4661,
+      "step": 89
+    },
+    {
+      "epoch": 0.2711864406779661,
+      "grad_norm": 1.7837523221969604,
+      "learning_rate": 9.508303477897924e-05,
+      "loss": 0.5005,
+      "step": 90
+    },
+    {
+      "epoch": 0.27419962335216574,
+      "grad_norm": 1.3590326309204102,
+      "learning_rate": 9.483848501916578e-05,
+      "loss": 0.3866,
+      "step": 91
+    },
+    {
+      "epoch": 0.27721280602636533,
+      "grad_norm": 1.5031671524047852,
+      "learning_rate": 9.458833080272722e-05,
+      "loss": 0.3559,
+      "step": 92
+    },
+    {
+      "epoch": 0.280225988700565,
+      "grad_norm": 1.2212880849838257,
+      "learning_rate": 9.433260339696563e-05,
+      "loss": 0.3586,
+      "step": 93
+    },
+    {
+      "epoch": 0.2832391713747646,
+      "grad_norm": 1.8385019302368164,
+      "learning_rate": 9.407133476578778e-05,
+      "loss": 0.4775,
+      "step": 94
+    },
+    {
+      "epoch": 0.2862523540489642,
+      "grad_norm": 2.6899161338806152,
+      "learning_rate": 9.38045575657098e-05,
+      "loss": 0.6809,
+      "step": 95
+    },
+    {
+      "epoch": 0.28926553672316385,
+      "grad_norm": 3.9981398582458496,
+      "learning_rate": 9.353230514177552e-05,
+      "loss": 0.8967,
+      "step": 96
+    },
+    {
+      "epoch": 0.29227871939736344,
+      "grad_norm": 3.7616143226623535,
+      "learning_rate": 9.325461152338846e-05,
+      "loss": 0.9173,
+      "step": 97
+    },
+    {
+      "epoch": 0.2952919020715631,
+      "grad_norm": 3.3938989639282227,
+      "learning_rate": 9.297151142005851e-05,
+      "loss": 0.7849,
+      "step": 98
+    },
+    {
+      "epoch": 0.2983050847457627,
+      "grad_norm": 3.3373446464538574,
+      "learning_rate": 9.268304021706349e-05,
+      "loss": 0.6619,
+      "step": 99
+    },
+    {
+      "epoch": 0.3013182674199623,
+      "grad_norm": 4.476459503173828,
+      "learning_rate": 9.23892339710263e-05,
+      "loss": 0.7758,
+      "step": 100
+    },
+    {
+      "epoch": 0.30433145009416196,
+      "grad_norm": 2.75358510017395,
+      "learning_rate": 9.209012940540805e-05,
+      "loss": 0.7565,
+      "step": 101
+    },
+    {
+      "epoch": 0.3073446327683616,
+      "grad_norm": 2.192662000656128,
+      "learning_rate": 9.178576390591802e-05,
+      "loss": 0.6634,
+      "step": 102
+    },
+    {
+      "epoch": 0.3103578154425612,
+      "grad_norm": 2.3334836959838867,
+      "learning_rate": 9.147617551584066e-05,
+      "loss": 0.6961,
+      "step": 103
+    },
+    {
+      "epoch": 0.31337099811676083,
+      "grad_norm": 1.9057625532150269,
+      "learning_rate": 9.116140293128051e-05,
+      "loss": 0.5762,
+      "step": 104
+    },
+    {
+      "epoch": 0.3163841807909605,
+      "grad_norm": 1.5543274879455566,
+      "learning_rate": 9.084148549632547e-05,
+      "loss": 0.5249,
+      "step": 105
+    },
+    {
+      "epoch": 0.31939736346516007,
+      "grad_norm": 1.3116902112960815,
+      "learning_rate": 9.051646319812918e-05,
+      "loss": 0.4895,
+      "step": 106
+    },
+    {
+      "epoch": 0.3224105461393597,
+      "grad_norm": 1.6137094497680664,
+      "learning_rate": 9.018637666191283e-05,
+      "loss": 0.5036,
+      "step": 107
+    },
+    {
+      "epoch": 0.3254237288135593,
+      "grad_norm": 1.4955766201019287,
+      "learning_rate": 8.985126714588738e-05,
+      "loss": 0.4571,
+      "step": 108
+    },
+    {
+      "epoch": 0.32843691148775894,
+      "grad_norm": 1.5371748208999634,
+      "learning_rate": 8.951117653609666e-05,
+      "loss": 0.4958,
+      "step": 109
+    },
+    {
+      "epoch": 0.3314500941619586,
+      "grad_norm": 1.2266839742660522,
+      "learning_rate": 8.916614734118184e-05,
+      "loss": 0.4171,
+      "step": 110
+    },
+    {
+      "epoch": 0.3344632768361582,
+      "grad_norm": 1.21657133102417,
+      "learning_rate": 8.881622268706825e-05,
+      "loss": 0.421,
+      "step": 111
+    },
+    {
+      "epoch": 0.3374764595103578,
+      "grad_norm": 1.2184901237487793,
+      "learning_rate": 8.8461446311575e-05,
+      "loss": 0.4307,
+      "step": 112
+    },
+    {
+      "epoch": 0.34048964218455746,
+      "grad_norm": 1.5124021768569946,
+      "learning_rate": 8.810186255894803e-05,
+      "loss": 0.4865,
+      "step": 113
+    },
+    {
+      "epoch": 0.34350282485875705,
+      "grad_norm": 1.078994870185852,
+      "learning_rate": 8.773751637431748e-05,
+      "loss": 0.3592,
+      "step": 114
+    },
+    {
+      "epoch": 0.3465160075329567,
+      "grad_norm": 1.2173560857772827,
+      "learning_rate": 8.736845329807993e-05,
+      "loss": 0.3757,
+      "step": 115
+    },
+    {
+      "epoch": 0.3495291902071563,
+      "grad_norm": 1.4223103523254395,
+      "learning_rate": 8.69947194602061e-05,
+      "loss": 0.4002,
+      "step": 116
+    },
+    {
+      "epoch": 0.3525423728813559,
+      "grad_norm": 1.2369580268859863,
+      "learning_rate": 8.66163615744751e-05,
+      "loss": 0.3891,
+      "step": 117
+    },
+    {
+      "epoch": 0.35555555555555557,
+      "grad_norm": 1.2306034564971924,
+      "learning_rate": 8.623342693263548e-05,
+      "loss": 0.3176,
+      "step": 118
+    },
+    {
+      "epoch": 0.35856873822975516,
+      "grad_norm": 1.20809805393219,
+      "learning_rate": 8.584596339849417e-05,
+      "loss": 0.3715,
+      "step": 119
+    },
+    {
+      "epoch": 0.3615819209039548,
+      "grad_norm": 1.59524405002594,
+      "learning_rate": 8.545401940193392e-05,
+      "loss": 0.4539,
+      "step": 120
+    },
+    {
+      "epoch": 0.36459510357815444,
+      "grad_norm": 2.4288361072540283,
+      "learning_rate": 8.505764393285984e-05,
+      "loss": 0.7094,
+      "step": 121
+    },
+    {
+      "epoch": 0.36760828625235403,
+      "grad_norm": 2.587125778198242,
+      "learning_rate": 8.46568865350762e-05,
+      "loss": 0.7052,
+      "step": 122
+    },
+    {
+      "epoch": 0.3706214689265537,
+      "grad_norm": 3.610764980316162,
+      "learning_rate": 8.425179730009368e-05,
+      "loss": 0.6835,
+      "step": 123
+    },
+    {
+      "epoch": 0.3736346516007533,
+      "grad_norm": 2.254451274871826,
+      "learning_rate": 8.384242686086848e-05,
+      "loss": 0.5733,
+      "step": 124
+    },
+    {
+      "epoch": 0.3766478342749529,
+      "grad_norm": 3.2182092666625977,
+      "learning_rate": 8.342882638547351e-05,
+      "loss": 0.7416,
+      "step": 125
+    },
+    {
+      "epoch": 0.37966101694915255,
+      "grad_norm": 2.0895962715148926,
+      "learning_rate": 8.301104757070274e-05,
+      "loss": 0.611,
+      "step": 126
+    },
+    {
+      "epoch": 0.38267419962335214,
+      "grad_norm": 1.9307582378387451,
+      "learning_rate": 8.258914263560971e-05,
+      "loss": 0.6099,
+      "step": 127
+    },
+    {
+      "epoch": 0.3856873822975518,
+      "grad_norm": 1.7885206937789917,
+      "learning_rate": 8.216316431498028e-05,
+      "loss": 0.4832,
+      "step": 128
+    },
+    {
+      "epoch": 0.3887005649717514,
+      "grad_norm": 1.2265185117721558,
+      "learning_rate": 8.173316585274145e-05,
+      "loss": 0.4042,
+      "step": 129
+    },
+    {
+      "epoch": 0.391713747645951,
+      "grad_norm": 1.369534969329834,
+      "learning_rate": 8.129920099530607e-05,
+      "loss": 0.4681,
+      "step": 130
+    },
+    {
+      "epoch": 0.39472693032015066,
+      "grad_norm": 1.340951681137085,
+      "learning_rate": 8.086132398485524e-05,
+      "loss": 0.4775,
+      "step": 131
+    },
+    {
+      "epoch": 0.3977401129943503,
+      "grad_norm": 1.1047234535217285,
+      "learning_rate": 8.041958955255814e-05,
+      "loss": 0.4508,
+      "step": 132
+    },
+    {
+      "epoch": 0.4007532956685499,
+      "grad_norm": 1.0403156280517578,
+      "learning_rate": 7.99740529117313e-05,
+      "loss": 0.4217,
+      "step": 133
+    },
+    {
+      "epoch": 0.40376647834274954,
+      "grad_norm": 0.9500618577003479,
+      "learning_rate": 7.952476975093729e-05,
+      "loss": 0.34,
+      "step": 134
+    },
+    {
+      "epoch": 0.4067796610169492,
+      "grad_norm": 1.1021428108215332,
+      "learning_rate": 7.907179622702408e-05,
+      "loss": 0.392,
+      "step": 135
+    },
+    {
+      "epoch": 0.40979284369114877,
+      "grad_norm": 1.2623156309127808,
+      "learning_rate": 7.861518895810596e-05,
+      "loss": 0.4238,
+      "step": 136
+    },
+    {
+      "epoch": 0.4128060263653484,
+      "grad_norm": 1.395652413368225,
+      "learning_rate": 7.815500501648653e-05,
+      "loss": 0.4211,
+      "step": 137
+    },
+    {
+      "epoch": 0.415819209039548,
+      "grad_norm": 1.3175368309020996,
+      "learning_rate": 7.769130192152538e-05,
+      "loss": 0.415,
+      "step": 138
+    },
+    {
+      "epoch": 0.41883239171374764,
+      "grad_norm": 1.3882197141647339,
+      "learning_rate": 7.722413763244838e-05,
+      "loss": 0.422,
+      "step": 139
+    },
+    {
+      "epoch": 0.4218455743879473,
+      "grad_norm": 1.396023154258728,
+      "learning_rate": 7.675357054110336e-05,
+      "loss": 0.466,
+      "step": 140
+    },
+    {
+      "epoch": 0.4248587570621469,
+      "grad_norm": 1.0779083967208862,
+      "learning_rate": 7.627965946466166e-05,
+      "loss": 0.3576,
+      "step": 141
+    },
+    {
+      "epoch": 0.4278719397363465,
+      "grad_norm": 1.2511008977890015,
+      "learning_rate": 7.580246363826621e-05,
+      "loss": 0.301,
+      "step": 142
+    },
+    {
+      "epoch": 0.43088512241054616,
+      "grad_norm": 1.13119375705719,
+      "learning_rate": 7.532204270762786e-05,
+      "loss": 0.3332,
+      "step": 143
+    },
+    {
+      "epoch": 0.43389830508474575,
+      "grad_norm": 2.0195682048797607,
+      "learning_rate": 7.483845672156998e-05,
+      "loss": 0.6475,
+      "step": 144
+    },
+    {
+      "epoch": 0.4369114877589454,
+      "grad_norm": 2.429945230484009,
+      "learning_rate": 7.435176612452286e-05,
+      "loss": 0.7177,
+      "step": 145
+    },
+    {
+      "epoch": 0.439924670433145,
+      "grad_norm": 3.0756828784942627,
+      "learning_rate": 7.386203174896872e-05,
+      "loss": 0.741,
+      "step": 146
+    },
+    {
+      "epoch": 0.4429378531073446,
+      "grad_norm": 3.7236998081207275,
+      "learning_rate": 7.336931480783801e-05,
+      "loss": 0.7999,
+      "step": 147
+    },
+    {
+      "epoch": 0.44595103578154427,
+      "grad_norm": 2.7121517658233643,
+      "learning_rate": 7.287367688685835e-05,
+      "loss": 0.6044,
+      "step": 148
+    },
+    {
+      "epoch": 0.44896421845574386,
+      "grad_norm": 3.661588668823242,
+      "learning_rate": 7.237517993685678e-05,
+      "loss": 0.5553,
+      "step": 149
+    },
+    {
+      "epoch": 0.4519774011299435,
+      "grad_norm": 4.68520975112915,
+      "learning_rate": 7.187388626601637e-05,
+      "loss": 0.411,
+      "step": 150
+    },
+    {
+      "epoch": 0.45499058380414314,
+      "grad_norm": 1.866217017173767,
+      "learning_rate": 7.136985853208824e-05,
+      "loss": 0.5442,
+      "step": 151
+    },
+    {
+      "epoch": 0.45800376647834273,
+      "grad_norm": 1.6526014804840088,
+      "learning_rate": 7.086315973455981e-05,
+      "loss": 0.5071,
+      "step": 152
+    },
+    {
+      "epoch": 0.4610169491525424,
+      "grad_norm": 1.3213937282562256,
+      "learning_rate": 7.035385320678036e-05,
+      "loss": 0.4598,
+      "step": 153
+    },
+    {
+      "epoch": 0.464030131826742,
+      "grad_norm": 0.959452211856842,
+      "learning_rate": 6.984200260804484e-05,
+      "loss": 0.3485,
+      "step": 154
+    },
+    {
+      "epoch": 0.4670433145009416,
+      "grad_norm": 1.0355703830718994,
+      "learning_rate": 6.932767191563703e-05,
+      "loss": 0.3648,
+      "step": 155
+    },
+    {
+      "epoch": 0.47005649717514125,
+      "grad_norm": 0.9991386532783508,
+      "learning_rate": 6.881092541683278e-05,
+      "loss": 0.3535,
+      "step": 156
+    },
+    {
+      "epoch": 0.47306967984934084,
+      "grad_norm": 1.0915963649749756,
+      "learning_rate": 6.829182770086474e-05,
+      "loss": 0.3682,
+      "step": 157
+    },
+    {
+      "epoch": 0.4760828625235405,
+      "grad_norm": 0.9837580323219299,
+      "learning_rate": 6.777044365084907e-05,
+      "loss": 0.3703,
+      "step": 158
+    },
+    {
+      "epoch": 0.47909604519774013,
+      "grad_norm": 1.258581280708313,
+      "learning_rate": 6.724683843567568e-05,
+      "loss": 0.4104,
+      "step": 159
+    },
+    {
+      "epoch": 0.4821092278719397,
+      "grad_norm": 0.832224428653717,
+      "learning_rate": 6.672107750186255e-05,
+      "loss": 0.2934,
+      "step": 160
+    },
+    {
+      "epoch": 0.48512241054613936,
+      "grad_norm": 0.881106436252594,
+      "learning_rate": 6.619322656537552e-05,
+      "loss": 0.3127,
+      "step": 161
+    },
+    {
+      "epoch": 0.488135593220339,
+      "grad_norm": 1.257350206375122,
+      "learning_rate": 6.566335160341424e-05,
+      "loss": 0.3804,
+      "step": 162
+    },
+    {
+      "epoch": 0.4911487758945386,
+      "grad_norm": 1.6826411485671997,
+      "learning_rate": 6.513151884616556e-05,
+      "loss": 0.4807,
+      "step": 163
+    },
+    {
+      "epoch": 0.49416195856873824,
+      "grad_norm": 1.31766676902771,
+      "learning_rate": 6.459779476852528e-05,
+      "loss": 0.3872,
+      "step": 164
+    },
+    {
+      "epoch": 0.4971751412429379,
+      "grad_norm": 1.438594102859497,
+      "learning_rate": 6.406224608178932e-05,
+      "loss": 0.3868,
+      "step": 165
+    },
+    {
+      "epoch": 0.5001883239171375,
+      "grad_norm": 1.198364496231079,
+      "learning_rate": 6.352493972531534e-05,
+      "loss": 0.3361,
+      "step": 166
+    },
+    {
+      "epoch": 0.5001883239171375,
+      "eval_loss": NaN,
+      "eval_runtime": 93.1419,
+      "eval_samples_per_second": 6.002,
+      "eval_steps_per_second": 1.503,
+      "step": 166
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 5.46878716765012e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null