mtzig commited on
Commit
8745c83
·
verified ·
1 Parent(s): 1b43884

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3089ee5752591a99a34512cd610e5fd1c9cbde6f2e5e96052322709db58f3c20
3
  size 13648688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58970c7b1499a51e52f13d34fc84f1be155b729c5dc01535b4fd3b471893cc7c
3
  size 13648688
last-checkpoint/global_step400/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b15663e9002bfadcd2118544805c26092cb6ec0f666d9bc6587d6401abd21bd5
3
+ size 20450800
last-checkpoint/global_step400/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef866156013d48c080d2301148283ca8543927968bc28db94f0b4473bdc8acc6
3
+ size 20450800
last-checkpoint/global_step400/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad9b9bb4dfcce0be4cee272860f5b8be7d9a8be61239e5282130fe726f4adae2
3
+ size 20450800
last-checkpoint/global_step400/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5910c9d8c18991184c3eebf90eac60e2ca4a073d9aefb2105dd96e9b2836ac1
3
+ size 20450800
last-checkpoint/global_step400/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5625e40f0bab8897b6c022d047ec4e1b309f6c5683c29a5d18f3f6b49d2fc2f
3
+ size 152238
last-checkpoint/global_step400/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1e31e7163b707480ad478f8e6c8378875c9442dd275ed39a38db3f4141626ed
3
+ size 152238
last-checkpoint/global_step400/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e01df639f8ed07332533c55b3ba24300b71cc1b94368675bb8537e39c0bcbf7
3
+ size 152238
last-checkpoint/global_step400/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3c88b6a29d249139ed50a0ee329aa72d539d54ce570568131b5fd790a11a890
3
+ size 152238
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step300
 
1
+ global_step400
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:680c0f067459bb4efdac849ce093e2226bf3c2332330a52eb68acec721890eea
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abaf0e70a6661521eb40188cdadbb09fcda9f6e1ac539eef99db2b1bc5a7ba52
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8261bb0773aaaaaf837917ac2d74751a1b07817c980444e7109f977082d4d80
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:376c466d70aa79c6b0bb9fc6cc87d2e449a16493d5d1155107e37872dcdc22dc
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ba4d6439beb986cf1f95fd682e03fa5844ac212a382301bdd1a868bcc67c311
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:865c5a6a69a0b6acfd26560edcb10f0694871429483ae64bee81aba12e73a0b0
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c17d861750a27e832ec9cf9a840f42cdc22319da36842441a78feca72092cef2
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8848755180edfd38eee9486edbe1a58572435a9c200f8a462726bb43540dcbf5
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c0f31e0dd8f0cd1067395334590c80ca29a3a9a42118ffbf479961406c7bb0b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b217a5016949cb5cd10bb5c4b090e10b845f27963a84cf5bdc1f1d94facb5b3c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.3688902551490931,
5
  "eval_steps": 40,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2203,6 +2203,742 @@
2203
  "learning_rate": 1.5922960037532057e-05,
2204
  "loss": 0.281,
2205
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2206
  }
2207
  ],
2208
  "logging_steps": 1,
@@ -2222,7 +2958,7 @@
2222
  "attributes": {}
2223
  }
2224
  },
2225
- "total_flos": 295277918322688.0,
2226
  "train_batch_size": 4,
2227
  "trial_name": null,
2228
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.49185367353212417,
5
  "eval_steps": 40,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2203
  "learning_rate": 1.5922960037532057e-05,
2204
  "loss": 0.281,
2205
  "step": 300
2206
+ },
2207
+ {
2208
+ "epoch": 0.37011988933292345,
2209
+ "grad_norm": 0.36134533123850165,
2210
+ "learning_rate": 1.588827828521133e-05,
2211
+ "loss": 0.1997,
2212
+ "step": 301
2213
+ },
2214
+ {
2215
+ "epoch": 0.37134952351675377,
2216
+ "grad_norm": 0.40175135868371165,
2217
+ "learning_rate": 1.585348777703486e-05,
2218
+ "loss": 0.2047,
2219
+ "step": 302
2220
+ },
2221
+ {
2222
+ "epoch": 0.3725791577005841,
2223
+ "grad_norm": 0.3610033083703328,
2224
+ "learning_rate": 1.581858915557953e-05,
2225
+ "loss": 0.2037,
2226
+ "step": 303
2227
+ },
2228
+ {
2229
+ "epoch": 0.3738087918844144,
2230
+ "grad_norm": 0.42325801209258107,
2231
+ "learning_rate": 1.5783583065419054e-05,
2232
+ "loss": 0.1871,
2233
+ "step": 304
2234
+ },
2235
+ {
2236
+ "epoch": 0.3750384260682447,
2237
+ "grad_norm": 0.28821395654955023,
2238
+ "learning_rate": 1.5748470153112093e-05,
2239
+ "loss": 0.1934,
2240
+ "step": 305
2241
+ },
2242
+ {
2243
+ "epoch": 0.376268060252075,
2244
+ "grad_norm": 0.4052634070520442,
2245
+ "learning_rate": 1.57132510671903e-05,
2246
+ "loss": 0.2351,
2247
+ "step": 306
2248
+ },
2249
+ {
2250
+ "epoch": 0.3774976944359053,
2251
+ "grad_norm": 0.48496008691724585,
2252
+ "learning_rate": 1.5677926458146327e-05,
2253
+ "loss": 0.2527,
2254
+ "step": 307
2255
+ },
2256
+ {
2257
+ "epoch": 0.3787273286197356,
2258
+ "grad_norm": 0.4026838585087856,
2259
+ "learning_rate": 1.5642496978421842e-05,
2260
+ "loss": 0.2554,
2261
+ "step": 308
2262
+ },
2263
+ {
2264
+ "epoch": 0.37995696280356595,
2265
+ "grad_norm": 0.34534876497819145,
2266
+ "learning_rate": 1.560696328239547e-05,
2267
+ "loss": 0.1656,
2268
+ "step": 309
2269
+ },
2270
+ {
2271
+ "epoch": 0.38118659698739626,
2272
+ "grad_norm": 0.2751030687579328,
2273
+ "learning_rate": 1.5571326026370676e-05,
2274
+ "loss": 0.162,
2275
+ "step": 310
2276
+ },
2277
+ {
2278
+ "epoch": 0.3824162311712266,
2279
+ "grad_norm": 0.38621022206094485,
2280
+ "learning_rate": 1.5535585868563688e-05,
2281
+ "loss": 0.2212,
2282
+ "step": 311
2283
+ },
2284
+ {
2285
+ "epoch": 0.38364586535505685,
2286
+ "grad_norm": 0.3039979678378701,
2287
+ "learning_rate": 1.5499743469091303e-05,
2288
+ "loss": 0.2413,
2289
+ "step": 312
2290
+ },
2291
+ {
2292
+ "epoch": 0.38487549953888717,
2293
+ "grad_norm": 0.31620852109871317,
2294
+ "learning_rate": 1.5463799489958727e-05,
2295
+ "loss": 0.1701,
2296
+ "step": 313
2297
+ },
2298
+ {
2299
+ "epoch": 0.3861051337227175,
2300
+ "grad_norm": 0.3036007341488487,
2301
+ "learning_rate": 1.542775459504732e-05,
2302
+ "loss": 0.1718,
2303
+ "step": 314
2304
+ },
2305
+ {
2306
+ "epoch": 0.3873347679065478,
2307
+ "grad_norm": 0.4288199829474487,
2308
+ "learning_rate": 1.5391609450102346e-05,
2309
+ "loss": 0.2237,
2310
+ "step": 315
2311
+ },
2312
+ {
2313
+ "epoch": 0.3885644020903781,
2314
+ "grad_norm": 0.3798815943379263,
2315
+ "learning_rate": 1.5355364722720674e-05,
2316
+ "loss": 0.2177,
2317
+ "step": 316
2318
+ },
2319
+ {
2320
+ "epoch": 0.38979403627420844,
2321
+ "grad_norm": 0.30645321053750946,
2322
+ "learning_rate": 1.5319021082338458e-05,
2323
+ "loss": 0.2356,
2324
+ "step": 317
2325
+ },
2326
+ {
2327
+ "epoch": 0.3910236704580387,
2328
+ "grad_norm": 0.3377360946626629,
2329
+ "learning_rate": 1.5282579200218762e-05,
2330
+ "loss": 0.21,
2331
+ "step": 318
2332
+ },
2333
+ {
2334
+ "epoch": 0.392253304641869,
2335
+ "grad_norm": 0.3645715594028636,
2336
+ "learning_rate": 1.5246039749439159e-05,
2337
+ "loss": 0.1733,
2338
+ "step": 319
2339
+ },
2340
+ {
2341
+ "epoch": 0.39348293882569935,
2342
+ "grad_norm": 0.29596291916382467,
2343
+ "learning_rate": 1.5209403404879305e-05,
2344
+ "loss": 0.1505,
2345
+ "step": 320
2346
+ },
2347
+ {
2348
+ "epoch": 0.39348293882569935,
2349
+ "eval_accuracy": 0.8181818181818182,
2350
+ "eval_f1": 0.5853658536585366,
2351
+ "eval_loss": 0.40882813930511475,
2352
+ "eval_precision": 0.75,
2353
+ "eval_recall": 0.48,
2354
+ "eval_runtime": 23.2002,
2355
+ "eval_samples_per_second": 2.155,
2356
+ "eval_steps_per_second": 0.172,
2357
+ "step": 320
2358
+ },
2359
+ {
2360
+ "epoch": 0.39471257300952967,
2361
+ "grad_norm": 0.4078103453153395,
2362
+ "learning_rate": 1.5172670843208477e-05,
2363
+ "loss": 0.2415,
2364
+ "step": 321
2365
+ },
2366
+ {
2367
+ "epoch": 0.39594220719336,
2368
+ "grad_norm": 0.3270612364847978,
2369
+ "learning_rate": 1.5135842742873077e-05,
2370
+ "loss": 0.2059,
2371
+ "step": 322
2372
+ },
2373
+ {
2374
+ "epoch": 0.3971718413771903,
2375
+ "grad_norm": 0.3309279944550533,
2376
+ "learning_rate": 1.5098919784084083e-05,
2377
+ "loss": 0.1569,
2378
+ "step": 323
2379
+ },
2380
+ {
2381
+ "epoch": 0.3984014755610206,
2382
+ "grad_norm": 0.49875353395381333,
2383
+ "learning_rate": 1.5061902648804503e-05,
2384
+ "loss": 0.2477,
2385
+ "step": 324
2386
+ },
2387
+ {
2388
+ "epoch": 0.3996311097448509,
2389
+ "grad_norm": 0.3787412609953064,
2390
+ "learning_rate": 1.502479202073678e-05,
2391
+ "loss": 0.179,
2392
+ "step": 325
2393
+ },
2394
+ {
2395
+ "epoch": 0.4008607439286812,
2396
+ "grad_norm": 0.3919230580283582,
2397
+ "learning_rate": 1.4987588585310154e-05,
2398
+ "loss": 0.2249,
2399
+ "step": 326
2400
+ },
2401
+ {
2402
+ "epoch": 0.4020903781125115,
2403
+ "grad_norm": 0.40414791953417883,
2404
+ "learning_rate": 1.4950293029668004e-05,
2405
+ "loss": 0.1772,
2406
+ "step": 327
2407
+ },
2408
+ {
2409
+ "epoch": 0.40332001229634185,
2410
+ "grad_norm": 0.3778228440356831,
2411
+ "learning_rate": 1.4912906042655164e-05,
2412
+ "loss": 0.208,
2413
+ "step": 328
2414
+ },
2415
+ {
2416
+ "epoch": 0.40454964648017216,
2417
+ "grad_norm": 0.4107607344666035,
2418
+ "learning_rate": 1.4875428314805195e-05,
2419
+ "loss": 0.1716,
2420
+ "step": 329
2421
+ },
2422
+ {
2423
+ "epoch": 0.4057792806640025,
2424
+ "grad_norm": 0.3671542265326293,
2425
+ "learning_rate": 1.483786053832763e-05,
2426
+ "loss": 0.1805,
2427
+ "step": 330
2428
+ },
2429
+ {
2430
+ "epoch": 0.40700891484783275,
2431
+ "grad_norm": 0.404957322338137,
2432
+ "learning_rate": 1.4800203407095194e-05,
2433
+ "loss": 0.1842,
2434
+ "step": 331
2435
+ },
2436
+ {
2437
+ "epoch": 0.40823854903166307,
2438
+ "grad_norm": 0.48189780697782403,
2439
+ "learning_rate": 1.4762457616630972e-05,
2440
+ "loss": 0.2277,
2441
+ "step": 332
2442
+ },
2443
+ {
2444
+ "epoch": 0.4094681832154934,
2445
+ "grad_norm": 0.30773052153479974,
2446
+ "learning_rate": 1.4724623864095595e-05,
2447
+ "loss": 0.1833,
2448
+ "step": 333
2449
+ },
2450
+ {
2451
+ "epoch": 0.4106978173993237,
2452
+ "grad_norm": 0.4223729302065043,
2453
+ "learning_rate": 1.4686702848274328e-05,
2454
+ "loss": 0.2219,
2455
+ "step": 334
2456
+ },
2457
+ {
2458
+ "epoch": 0.411927451583154,
2459
+ "grad_norm": 0.2902399210528883,
2460
+ "learning_rate": 1.4648695269564182e-05,
2461
+ "loss": 0.1785,
2462
+ "step": 335
2463
+ },
2464
+ {
2465
+ "epoch": 0.41315708576698434,
2466
+ "grad_norm": 0.297435572010037,
2467
+ "learning_rate": 1.461060182996098e-05,
2468
+ "loss": 0.2441,
2469
+ "step": 336
2470
+ },
2471
+ {
2472
+ "epoch": 0.4143867199508146,
2473
+ "grad_norm": 0.4498936613858582,
2474
+ "learning_rate": 1.4572423233046386e-05,
2475
+ "loss": 0.1765,
2476
+ "step": 337
2477
+ },
2478
+ {
2479
+ "epoch": 0.4156163541346449,
2480
+ "grad_norm": 0.476353253435308,
2481
+ "learning_rate": 1.4534160183974908e-05,
2482
+ "loss": 0.1711,
2483
+ "step": 338
2484
+ },
2485
+ {
2486
+ "epoch": 0.41684598831847525,
2487
+ "grad_norm": 0.4136384364300188,
2488
+ "learning_rate": 1.4495813389460875e-05,
2489
+ "loss": 0.213,
2490
+ "step": 339
2491
+ },
2492
+ {
2493
+ "epoch": 0.41807562250230557,
2494
+ "grad_norm": 0.457702192015087,
2495
+ "learning_rate": 1.4457383557765385e-05,
2496
+ "loss": 0.2056,
2497
+ "step": 340
2498
+ },
2499
+ {
2500
+ "epoch": 0.4193052566861359,
2501
+ "grad_norm": 0.32316243545279294,
2502
+ "learning_rate": 1.4418871398683227e-05,
2503
+ "loss": 0.2445,
2504
+ "step": 341
2505
+ },
2506
+ {
2507
+ "epoch": 0.4205348908699662,
2508
+ "grad_norm": 0.3952237252439469,
2509
+ "learning_rate": 1.4380277623529766e-05,
2510
+ "loss": 0.1789,
2511
+ "step": 342
2512
+ },
2513
+ {
2514
+ "epoch": 0.42176452505379647,
2515
+ "grad_norm": 0.45979858964325293,
2516
+ "learning_rate": 1.4341602945127806e-05,
2517
+ "loss": 0.225,
2518
+ "step": 343
2519
+ },
2520
+ {
2521
+ "epoch": 0.4229941592376268,
2522
+ "grad_norm": 0.456535265546009,
2523
+ "learning_rate": 1.4302848077794427e-05,
2524
+ "loss": 0.2245,
2525
+ "step": 344
2526
+ },
2527
+ {
2528
+ "epoch": 0.4242237934214571,
2529
+ "grad_norm": 0.3244747375904321,
2530
+ "learning_rate": 1.426401373732779e-05,
2531
+ "loss": 0.1801,
2532
+ "step": 345
2533
+ },
2534
+ {
2535
+ "epoch": 0.4254534276052874,
2536
+ "grad_norm": 0.40300301462416604,
2537
+ "learning_rate": 1.422510064099391e-05,
2538
+ "loss": 0.2212,
2539
+ "step": 346
2540
+ },
2541
+ {
2542
+ "epoch": 0.42668306178911775,
2543
+ "grad_norm": 0.5264557343629197,
2544
+ "learning_rate": 1.4186109507513425e-05,
2545
+ "loss": 0.2202,
2546
+ "step": 347
2547
+ },
2548
+ {
2549
+ "epoch": 0.42791269597294807,
2550
+ "grad_norm": 0.4350217170052354,
2551
+ "learning_rate": 1.4147041057048303e-05,
2552
+ "loss": 0.2061,
2553
+ "step": 348
2554
+ },
2555
+ {
2556
+ "epoch": 0.4291423301567784,
2557
+ "grad_norm": 0.6810095448654682,
2558
+ "learning_rate": 1.4107896011188546e-05,
2559
+ "loss": 0.1782,
2560
+ "step": 349
2561
+ },
2562
+ {
2563
+ "epoch": 0.43037196434060865,
2564
+ "grad_norm": 0.25807154769833757,
2565
+ "learning_rate": 1.4068675092938872e-05,
2566
+ "loss": 0.156,
2567
+ "step": 350
2568
+ },
2569
+ {
2570
+ "epoch": 0.43160159852443897,
2571
+ "grad_norm": 0.29519781987132376,
2572
+ "learning_rate": 1.4029379026705352e-05,
2573
+ "loss": 0.2078,
2574
+ "step": 351
2575
+ },
2576
+ {
2577
+ "epoch": 0.4328312327082693,
2578
+ "grad_norm": 0.4580606963993864,
2579
+ "learning_rate": 1.3990008538282027e-05,
2580
+ "loss": 0.2024,
2581
+ "step": 352
2582
+ },
2583
+ {
2584
+ "epoch": 0.4340608668920996,
2585
+ "grad_norm": 0.4014942247225734,
2586
+ "learning_rate": 1.3950564354837512e-05,
2587
+ "loss": 0.1801,
2588
+ "step": 353
2589
+ },
2590
+ {
2591
+ "epoch": 0.4352905010759299,
2592
+ "grad_norm": 0.40131332657814234,
2593
+ "learning_rate": 1.391104720490156e-05,
2594
+ "loss": 0.214,
2595
+ "step": 354
2596
+ },
2597
+ {
2598
+ "epoch": 0.43652013525976024,
2599
+ "grad_norm": 0.3560030996515948,
2600
+ "learning_rate": 1.387145781835161e-05,
2601
+ "loss": 0.2126,
2602
+ "step": 355
2603
+ },
2604
+ {
2605
+ "epoch": 0.4377497694435905,
2606
+ "grad_norm": 0.3729681090326996,
2607
+ "learning_rate": 1.3831796926399295e-05,
2608
+ "loss": 0.2055,
2609
+ "step": 356
2610
+ },
2611
+ {
2612
+ "epoch": 0.43897940362742083,
2613
+ "grad_norm": 0.4172829333159122,
2614
+ "learning_rate": 1.3792065261576953e-05,
2615
+ "loss": 0.2326,
2616
+ "step": 357
2617
+ },
2618
+ {
2619
+ "epoch": 0.44020903781125115,
2620
+ "grad_norm": 0.30998074509681783,
2621
+ "learning_rate": 1.3752263557724088e-05,
2622
+ "loss": 0.1633,
2623
+ "step": 358
2624
+ },
2625
+ {
2626
+ "epoch": 0.44143867199508147,
2627
+ "grad_norm": 0.345268920537541,
2628
+ "learning_rate": 1.3712392549973814e-05,
2629
+ "loss": 0.1765,
2630
+ "step": 359
2631
+ },
2632
+ {
2633
+ "epoch": 0.4426683061789118,
2634
+ "grad_norm": 0.3508481090946454,
2635
+ "learning_rate": 1.3672452974739278e-05,
2636
+ "loss": 0.1752,
2637
+ "step": 360
2638
+ },
2639
+ {
2640
+ "epoch": 0.4426683061789118,
2641
+ "eval_accuracy": 0.786096256684492,
2642
+ "eval_f1": 0.42857142857142855,
2643
+ "eval_loss": 0.4385937452316284,
2644
+ "eval_precision": 0.75,
2645
+ "eval_recall": 0.3,
2646
+ "eval_runtime": 23.4097,
2647
+ "eval_samples_per_second": 2.136,
2648
+ "eval_steps_per_second": 0.171,
2649
+ "step": 360
2650
+ },
2651
+ {
2652
+ "epoch": 0.4438979403627421,
2653
+ "grad_norm": 0.41404798323100117,
2654
+ "learning_rate": 1.3632445569700078e-05,
2655
+ "loss": 0.1745,
2656
+ "step": 361
2657
+ },
2658
+ {
2659
+ "epoch": 0.44512757454657237,
2660
+ "grad_norm": 0.39572877694784087,
2661
+ "learning_rate": 1.3592371073788595e-05,
2662
+ "loss": 0.216,
2663
+ "step": 362
2664
+ },
2665
+ {
2666
+ "epoch": 0.4463572087304027,
2667
+ "grad_norm": 0.3927819000312662,
2668
+ "learning_rate": 1.355223022717639e-05,
2669
+ "loss": 0.164,
2670
+ "step": 363
2671
+ },
2672
+ {
2673
+ "epoch": 0.447586842914233,
2674
+ "grad_norm": 0.36355749807156507,
2675
+ "learning_rate": 1.3512023771260507e-05,
2676
+ "loss": 0.2439,
2677
+ "step": 364
2678
+ },
2679
+ {
2680
+ "epoch": 0.4488164770980633,
2681
+ "grad_norm": 0.37694180103294717,
2682
+ "learning_rate": 1.347175244864979e-05,
2683
+ "loss": 0.2009,
2684
+ "step": 365
2685
+ },
2686
+ {
2687
+ "epoch": 0.45004611128189365,
2688
+ "grad_norm": 0.3566619922589067,
2689
+ "learning_rate": 1.3431417003151162e-05,
2690
+ "loss": 0.2045,
2691
+ "step": 366
2692
+ },
2693
+ {
2694
+ "epoch": 0.45127574546572397,
2695
+ "grad_norm": 0.36473757399388024,
2696
+ "learning_rate": 1.3391018179755886e-05,
2697
+ "loss": 0.1711,
2698
+ "step": 367
2699
+ },
2700
+ {
2701
+ "epoch": 0.4525053796495543,
2702
+ "grad_norm": 0.3559479590045658,
2703
+ "learning_rate": 1.3350556724625809e-05,
2704
+ "loss": 0.2061,
2705
+ "step": 368
2706
+ },
2707
+ {
2708
+ "epoch": 0.45373501383338455,
2709
+ "grad_norm": 0.34195723156033303,
2710
+ "learning_rate": 1.3310033385079589e-05,
2711
+ "loss": 0.1761,
2712
+ "step": 369
2713
+ },
2714
+ {
2715
+ "epoch": 0.45496464801721487,
2716
+ "grad_norm": 0.5770960005405298,
2717
+ "learning_rate": 1.3269448909578866e-05,
2718
+ "loss": 0.227,
2719
+ "step": 370
2720
+ },
2721
+ {
2722
+ "epoch": 0.4561942822010452,
2723
+ "grad_norm": 0.6125705002122824,
2724
+ "learning_rate": 1.3228804047714462e-05,
2725
+ "loss": 0.2351,
2726
+ "step": 371
2727
+ },
2728
+ {
2729
+ "epoch": 0.4574239163848755,
2730
+ "grad_norm": 0.48363918457013794,
2731
+ "learning_rate": 1.3188099550192537e-05,
2732
+ "loss": 0.1847,
2733
+ "step": 372
2734
+ },
2735
+ {
2736
+ "epoch": 0.4586535505687058,
2737
+ "grad_norm": 0.6610021081352014,
2738
+ "learning_rate": 1.31473361688207e-05,
2739
+ "loss": 0.2129,
2740
+ "step": 373
2741
+ },
2742
+ {
2743
+ "epoch": 0.45988318475253614,
2744
+ "grad_norm": 0.4341786311855446,
2745
+ "learning_rate": 1.3106514656494147e-05,
2746
+ "loss": 0.2426,
2747
+ "step": 374
2748
+ },
2749
+ {
2750
+ "epoch": 0.4611128189363664,
2751
+ "grad_norm": 0.3013643837535933,
2752
+ "learning_rate": 1.3065635767181748e-05,
2753
+ "loss": 0.1596,
2754
+ "step": 375
2755
+ },
2756
+ {
2757
+ "epoch": 0.46234245312019673,
2758
+ "grad_norm": 0.29963722757790967,
2759
+ "learning_rate": 1.302470025591211e-05,
2760
+ "loss": 0.1821,
2761
+ "step": 376
2762
+ },
2763
+ {
2764
+ "epoch": 0.46357208730402705,
2765
+ "grad_norm": 0.5856707955354147,
2766
+ "learning_rate": 1.2983708878759655e-05,
2767
+ "loss": 0.2024,
2768
+ "step": 377
2769
+ },
2770
+ {
2771
+ "epoch": 0.46480172148785737,
2772
+ "grad_norm": 0.37812820220102683,
2773
+ "learning_rate": 1.2942662392830632e-05,
2774
+ "loss": 0.2049,
2775
+ "step": 378
2776
+ },
2777
+ {
2778
+ "epoch": 0.4660313556716877,
2779
+ "grad_norm": 0.41966854124526104,
2780
+ "learning_rate": 1.290156155624914e-05,
2781
+ "loss": 0.227,
2782
+ "step": 379
2783
+ },
2784
+ {
2785
+ "epoch": 0.467260989855518,
2786
+ "grad_norm": 0.39476761658009546,
2787
+ "learning_rate": 1.286040712814314e-05,
2788
+ "loss": 0.1552,
2789
+ "step": 380
2790
+ },
2791
+ {
2792
+ "epoch": 0.46849062403934827,
2793
+ "grad_norm": 0.4381036125394883,
2794
+ "learning_rate": 1.2819199868630419e-05,
2795
+ "loss": 0.1686,
2796
+ "step": 381
2797
+ },
2798
+ {
2799
+ "epoch": 0.4697202582231786,
2800
+ "grad_norm": 0.2834280308233097,
2801
+ "learning_rate": 1.2777940538804545e-05,
2802
+ "loss": 0.1292,
2803
+ "step": 382
2804
+ },
2805
+ {
2806
+ "epoch": 0.4709498924070089,
2807
+ "grad_norm": 0.3708781081449464,
2808
+ "learning_rate": 1.2736629900720832e-05,
2809
+ "loss": 0.1575,
2810
+ "step": 383
2811
+ },
2812
+ {
2813
+ "epoch": 0.4721795265908392,
2814
+ "grad_norm": 0.37089620983594307,
2815
+ "learning_rate": 1.2695268717382242e-05,
2816
+ "loss": 0.1923,
2817
+ "step": 384
2818
+ },
2819
+ {
2820
+ "epoch": 0.47340916077466955,
2821
+ "grad_norm": 0.37850202105410397,
2822
+ "learning_rate": 1.2653857752725305e-05,
2823
+ "loss": 0.1751,
2824
+ "step": 385
2825
+ },
2826
+ {
2827
+ "epoch": 0.47463879495849987,
2828
+ "grad_norm": 0.29465463630363,
2829
+ "learning_rate": 1.2612397771606015e-05,
2830
+ "loss": 0.1792,
2831
+ "step": 386
2832
+ },
2833
+ {
2834
+ "epoch": 0.47586842914233013,
2835
+ "grad_norm": 0.35698339352314057,
2836
+ "learning_rate": 1.2570889539785683e-05,
2837
+ "loss": 0.1325,
2838
+ "step": 387
2839
+ },
2840
+ {
2841
+ "epoch": 0.47709806332616045,
2842
+ "grad_norm": 0.43664575464965527,
2843
+ "learning_rate": 1.2529333823916807e-05,
2844
+ "loss": 0.1764,
2845
+ "step": 388
2846
+ },
2847
+ {
2848
+ "epoch": 0.47832769750999077,
2849
+ "grad_norm": 0.4130236851643136,
2850
+ "learning_rate": 1.2487731391528919e-05,
2851
+ "loss": 0.2261,
2852
+ "step": 389
2853
+ },
2854
+ {
2855
+ "epoch": 0.4795573316938211,
2856
+ "grad_norm": 0.37099838030443905,
2857
+ "learning_rate": 1.2446083011014389e-05,
2858
+ "loss": 0.187,
2859
+ "step": 390
2860
+ },
2861
+ {
2862
+ "epoch": 0.4807869658776514,
2863
+ "grad_norm": 0.241662388149488,
2864
+ "learning_rate": 1.2404389451614253e-05,
2865
+ "loss": 0.1671,
2866
+ "step": 391
2867
+ },
2868
+ {
2869
+ "epoch": 0.4820166000614817,
2870
+ "grad_norm": 0.40938373429888714,
2871
+ "learning_rate": 1.2362651483403985e-05,
2872
+ "loss": 0.1861,
2873
+ "step": 392
2874
+ },
2875
+ {
2876
+ "epoch": 0.48324623424531205,
2877
+ "grad_norm": 0.3623538192567374,
2878
+ "learning_rate": 1.2320869877279297e-05,
2879
+ "loss": 0.1743,
2880
+ "step": 393
2881
+ },
2882
+ {
2883
+ "epoch": 0.4844758684291423,
2884
+ "grad_norm": 0.4437845258381219,
2885
+ "learning_rate": 1.2279045404941883e-05,
2886
+ "loss": 0.2337,
2887
+ "step": 394
2888
+ },
2889
+ {
2890
+ "epoch": 0.48570550261297263,
2891
+ "grad_norm": 0.3749955717955905,
2892
+ "learning_rate": 1.2237178838885168e-05,
2893
+ "loss": 0.2027,
2894
+ "step": 395
2895
+ },
2896
+ {
2897
+ "epoch": 0.48693513679680295,
2898
+ "grad_norm": 0.3505048992054775,
2899
+ "learning_rate": 1.2195270952380052e-05,
2900
+ "loss": 0.1571,
2901
+ "step": 396
2902
+ },
2903
+ {
2904
+ "epoch": 0.48816477098063327,
2905
+ "grad_norm": 0.3713688621968288,
2906
+ "learning_rate": 1.215332251946061e-05,
2907
+ "loss": 0.1915,
2908
+ "step": 397
2909
+ },
2910
+ {
2911
+ "epoch": 0.4893944051644636,
2912
+ "grad_norm": 0.4834162644214324,
2913
+ "learning_rate": 1.2111334314909811e-05,
2914
+ "loss": 0.2389,
2915
+ "step": 398
2916
+ },
2917
+ {
2918
+ "epoch": 0.4906240393482939,
2919
+ "grad_norm": 0.38954625041703195,
2920
+ "learning_rate": 1.2069307114245197e-05,
2921
+ "loss": 0.1682,
2922
+ "step": 399
2923
+ },
2924
+ {
2925
+ "epoch": 0.49185367353212417,
2926
+ "grad_norm": 0.46232563297646273,
2927
+ "learning_rate": 1.2027241693704567e-05,
2928
+ "loss": 0.2382,
2929
+ "step": 400
2930
+ },
2931
+ {
2932
+ "epoch": 0.49185367353212417,
2933
+ "eval_accuracy": 0.8128342245989305,
2934
+ "eval_f1": 0.5454545454545454,
2935
+ "eval_loss": 0.41859376430511475,
2936
+ "eval_precision": 0.7777777777777778,
2937
+ "eval_recall": 0.42,
2938
+ "eval_runtime": 22.7158,
2939
+ "eval_samples_per_second": 2.201,
2940
+ "eval_steps_per_second": 0.176,
2941
+ "step": 400
2942
  }
2943
  ],
2944
  "logging_steps": 1,
 
2958
  "attributes": {}
2959
  }
2960
  },
2961
+ "total_flos": 393623730028544.0,
2962
  "train_batch_size": 4,
2963
  "trial_name": null,
2964
  "trial_params": null