plip commited on
Commit
797cb10
1 Parent(s): cf7a449

Training in progress, step 120000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d58137113791019cd3d543973897cce79aa9a3f674e715923996321558c96b13
3
  size 202194449
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3290fe37a91479d6ab7f5943f720695a90f016b8ef2878f4eacc19dec412ccde
3
  size 202194449
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f59c2ef88b3bc3cd7bdeb23ee182242569e553f7337d36848967067fc05c479
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2afb485a315cb6d1cbb995c2e4e8a5bda0967bae5d1ca0bd5b33331f6f3244fc
3
  size 102501541
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4cf719b3415ea1dec9edabeceb77eb2a8f1be572542366e9bda15c1bceda980
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4cf719b3415ea1dec9edabeceb77eb2a8f1be572542366e9bda15c1bceda980
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4cf719b3415ea1dec9edabeceb77eb2a8f1be572542366e9bda15c1bceda980
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4cf719b3415ea1dec9edabeceb77eb2a8f1be572542366e9bda15c1bceda980
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4cf719b3415ea1dec9edabeceb77eb2a8f1be572542366e9bda15c1bceda980
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4cf719b3415ea1dec9edabeceb77eb2a8f1be572542366e9bda15c1bceda980
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4cf719b3415ea1dec9edabeceb77eb2a8f1be572542366e9bda15c1bceda980
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247f1776d14aa6bf405e1cbe9b3a20de5077a4212ea5f0a4e1ce51dbf43de0f2
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4cf719b3415ea1dec9edabeceb77eb2a8f1be572542366e9bda15c1bceda980
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a36392859753540b5ff28a6690e0fb35c1157de322529d1ae210898db91ddda7
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60ddf5c33f60196860027b40e8443017b8ed479cc5dfb73f8a076fe07f546fd4
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.562967450313381,
5
- "global_step": 110000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -2206,11 +2206,211 @@
2206
  "eval_samples_per_second": 1534.54,
2207
  "eval_steps_per_second": 24.435,
2208
  "step": 110000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2209
  }
2210
  ],
2211
  "max_steps": 500000,
2212
  "num_train_epochs": 12,
2213
- "total_flos": 3.5143535325376865e+21,
2214
  "trial_name": null,
2215
  "trial_params": null
2216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.7959644912509614,
5
+ "global_step": 120000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
2206
  "eval_samples_per_second": 1534.54,
2207
  "eval_steps_per_second": 24.435,
2208
  "step": 110000
2209
+ },
2210
+ {
2211
+ "epoch": 2.57,
2212
+ "learning_rate": 0.0002774275491977922,
2213
+ "loss": 0.2993,
2214
+ "step": 110500
2215
+ },
2216
+ {
2217
+ "epoch": 2.59,
2218
+ "learning_rate": 0.0002771699479464853,
2219
+ "loss": 0.2991,
2220
+ "step": 111000
2221
+ },
2222
+ {
2223
+ "epoch": 2.59,
2224
+ "eval_loss": 0.28305351734161377,
2225
+ "eval_runtime": 1.4425,
2226
+ "eval_samples_per_second": 1523.692,
2227
+ "eval_steps_per_second": 24.263,
2228
+ "step": 111000
2229
+ },
2230
+ {
2231
+ "epoch": 2.6,
2232
+ "learning_rate": 0.00027691101066420104,
2233
+ "loss": 0.2993,
2234
+ "step": 111500
2235
+ },
2236
+ {
2237
+ "epoch": 2.61,
2238
+ "learning_rate": 0.0002766507401826361,
2239
+ "loss": 0.2987,
2240
+ "step": 112000
2241
+ },
2242
+ {
2243
+ "epoch": 2.61,
2244
+ "eval_loss": 0.2818717956542969,
2245
+ "eval_runtime": 1.4177,
2246
+ "eval_samples_per_second": 1550.356,
2247
+ "eval_steps_per_second": 24.687,
2248
+ "step": 112000
2249
+ },
2250
+ {
2251
+ "epoch": 2.62,
2252
+ "learning_rate": 0.0002763891393480666,
2253
+ "loss": 0.2989,
2254
+ "step": 112500
2255
+ },
2256
+ {
2257
+ "epoch": 2.63,
2258
+ "learning_rate": 0.0002761262110213175,
2259
+ "loss": 0.2985,
2260
+ "step": 113000
2261
+ },
2262
+ {
2263
+ "epoch": 2.63,
2264
+ "eval_loss": 0.283016562461853,
2265
+ "eval_runtime": 1.4225,
2266
+ "eval_samples_per_second": 1545.128,
2267
+ "eval_steps_per_second": 24.604,
2268
+ "step": 113000
2269
+ },
2270
+ {
2271
+ "epoch": 2.64,
2272
+ "learning_rate": 0.00027586195807773083,
2273
+ "loss": 0.2989,
2274
+ "step": 113500
2275
+ },
2276
+ {
2277
+ "epoch": 2.66,
2278
+ "learning_rate": 0.00027559638340713435,
2279
+ "loss": 0.2981,
2280
+ "step": 114000
2281
+ },
2282
+ {
2283
+ "epoch": 2.66,
2284
+ "eval_loss": 0.2824258804321289,
2285
+ "eval_runtime": 1.412,
2286
+ "eval_samples_per_second": 1556.637,
2287
+ "eval_steps_per_second": 24.787,
2288
+ "step": 114000
2289
+ },
2290
+ {
2291
+ "epoch": 2.67,
2292
+ "learning_rate": 0.00027532948991381025,
2293
+ "loss": 0.2976,
2294
+ "step": 114500
2295
+ },
2296
+ {
2297
+ "epoch": 2.68,
2298
+ "learning_rate": 0.00027506128051646287,
2299
+ "loss": 0.2978,
2300
+ "step": 115000
2301
+ },
2302
+ {
2303
+ "epoch": 2.68,
2304
+ "eval_loss": 0.2790142595767975,
2305
+ "eval_runtime": 1.436,
2306
+ "eval_samples_per_second": 1530.608,
2307
+ "eval_steps_per_second": 24.373,
2308
+ "step": 115000
2309
+ },
2310
+ {
2311
+ "epoch": 2.69,
2312
+ "learning_rate": 0.00027479175814818733,
2313
+ "loss": 0.2982,
2314
+ "step": 115500
2315
+ },
2316
+ {
2317
+ "epoch": 2.7,
2318
+ "learning_rate": 0.000274520925756437,
2319
+ "loss": 0.2973,
2320
+ "step": 116000
2321
+ },
2322
+ {
2323
+ "epoch": 2.7,
2324
+ "eval_loss": 0.27925553917884827,
2325
+ "eval_runtime": 1.4557,
2326
+ "eval_samples_per_second": 1509.89,
2327
+ "eval_steps_per_second": 24.043,
2328
+ "step": 116000
2329
+ },
2330
+ {
2331
+ "epoch": 2.71,
2332
+ "learning_rate": 0.00027424878630299157,
2333
+ "loss": 0.2971,
2334
+ "step": 116500
2335
+ },
2336
+ {
2337
+ "epoch": 2.73,
2338
+ "learning_rate": 0.0002739753427639244,
2339
+ "loss": 0.2977,
2340
+ "step": 117000
2341
+ },
2342
+ {
2343
+ "epoch": 2.73,
2344
+ "eval_loss": 0.2807333171367645,
2345
+ "eval_runtime": 1.4375,
2346
+ "eval_samples_per_second": 1529.048,
2347
+ "eval_steps_per_second": 24.348,
2348
+ "step": 117000
2349
+ },
2350
+ {
2351
+ "epoch": 2.74,
2352
+ "learning_rate": 0.0002737005981295704,
2353
+ "loss": 0.2973,
2354
+ "step": 117500
2355
+ },
2356
+ {
2357
+ "epoch": 2.75,
2358
+ "learning_rate": 0.0002734245554044927,
2359
+ "loss": 0.2969,
2360
+ "step": 118000
2361
+ },
2362
+ {
2363
+ "epoch": 2.75,
2364
+ "eval_loss": 0.28166475892066956,
2365
+ "eval_runtime": 1.4453,
2366
+ "eval_samples_per_second": 1520.838,
2367
+ "eval_steps_per_second": 24.217,
2368
+ "step": 118000
2369
+ },
2370
+ {
2371
+ "epoch": 2.76,
2372
+ "learning_rate": 0.0002731472176074504,
2373
+ "loss": 0.2967,
2374
+ "step": 118500
2375
+ },
2376
+ {
2377
+ "epoch": 2.77,
2378
+ "learning_rate": 0.0002728685877713653,
2379
+ "loss": 0.2967,
2380
+ "step": 119000
2381
+ },
2382
+ {
2383
+ "epoch": 2.77,
2384
+ "eval_loss": 0.2808319330215454,
2385
+ "eval_runtime": 1.4174,
2386
+ "eval_samples_per_second": 1550.705,
2387
+ "eval_steps_per_second": 24.693,
2388
+ "step": 119000
2389
+ },
2390
+ {
2391
+ "epoch": 2.78,
2392
+ "learning_rate": 0.0002725886689432884,
2393
+ "loss": 0.2965,
2394
+ "step": 119500
2395
+ },
2396
+ {
2397
+ "epoch": 2.8,
2398
+ "learning_rate": 0.0002723074641843674,
2399
+ "loss": 0.2965,
2400
+ "step": 120000
2401
+ },
2402
+ {
2403
+ "epoch": 2.8,
2404
+ "eval_loss": 0.27865585684776306,
2405
+ "eval_runtime": 1.4415,
2406
+ "eval_samples_per_second": 1524.849,
2407
+ "eval_steps_per_second": 24.281,
2408
+ "step": 120000
2409
  }
2410
  ],
2411
  "max_steps": 500000,
2412
  "num_train_epochs": 12,
2413
+ "total_flos": 3.8338406713121864e+21,
2414
  "trial_name": null,
2415
  "trial_params": null
2416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f59c2ef88b3bc3cd7bdeb23ee182242569e553f7337d36848967067fc05c479
3
  size 102501541
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2afb485a315cb6d1cbb995c2e4e8a5bda0967bae5d1ca0bd5b33331f6f3244fc
3
  size 102501541