mika5883 commited on
Commit
f258ec4
1 Parent(s): 4af1b53

Training in progress, step 178500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d586d26b3bc11753f90bfdd44b6176d6e5f7c73010c731f933ccbacbe1da0d18
3
  size 891644712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f576c297c0eae7ededb3822e0fd97b204d6a332ff9c3b8d7ccd818ebf391761
3
  size 891644712
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb31ef9b4e992bea78c8e73acaf0ac0b55d421befd26f3a71b566a864a42e688
3
  size 1783444357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e4dd6b31039b01a958cd2e90946071f91e4689e9d9aca8c7e434cc905cde9ca
3
  size 1783444357
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9c578aa1d7020e4e5db26d4f55be115b74b2ac40930ca1256424dc52199113f
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9e95bfd6b2f51d2614c1dceffb80aed647095f9f81402665bc8a637aab51a24
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4f9081a0528121361fc8797ecc1e7c82e2cf08ea59233116797e8af7b086270
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:207898f141fbde837147332552977457f374b23313c4b963d8a62a4e111187ed
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1072,
5
  "eval_steps": 500,
6
- "global_step": 173000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2429,6 +2429,83 @@
2429
  "learning_rate": 2.233024e-05,
2430
  "loss": 0.2686,
2431
  "step": 173000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2432
  }
2433
  ],
2434
  "logging_steps": 500,
@@ -2448,7 +2525,7 @@
2448
  "attributes": {}
2449
  }
2450
  },
2451
- "total_flos": 8.4279772053504e+17,
2452
  "train_batch_size": 64,
2453
  "trial_name": null,
2454
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1424,
5
  "eval_steps": 500,
6
+ "global_step": 178500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2429
  "learning_rate": 2.233024e-05,
2430
  "loss": 0.2686,
2431
  "step": 173000
2432
+ },
2433
+ {
2434
+ "epoch": 1.1104,
2435
+ "grad_norm": 0.6357282996177673,
2436
+ "learning_rate": 2.225024e-05,
2437
+ "loss": 0.2639,
2438
+ "step": 173500
2439
+ },
2440
+ {
2441
+ "epoch": 1.1136,
2442
+ "grad_norm": 0.5262208580970764,
2443
+ "learning_rate": 2.2170400000000004e-05,
2444
+ "loss": 0.2641,
2445
+ "step": 174000
2446
+ },
2447
+ {
2448
+ "epoch": 1.1168,
2449
+ "grad_norm": 0.6878075003623962,
2450
+ "learning_rate": 2.20904e-05,
2451
+ "loss": 0.2654,
2452
+ "step": 174500
2453
+ },
2454
+ {
2455
+ "epoch": 1.12,
2456
+ "grad_norm": 0.5332186222076416,
2457
+ "learning_rate": 2.2010400000000002e-05,
2458
+ "loss": 0.2638,
2459
+ "step": 175000
2460
+ },
2461
+ {
2462
+ "epoch": 1.1232,
2463
+ "grad_norm": 0.5562476515769958,
2464
+ "learning_rate": 2.19304e-05,
2465
+ "loss": 0.2648,
2466
+ "step": 175500
2467
+ },
2468
+ {
2469
+ "epoch": 1.1264,
2470
+ "grad_norm": 0.5924221277236938,
2471
+ "learning_rate": 2.18504e-05,
2472
+ "loss": 0.2627,
2473
+ "step": 176000
2474
+ },
2475
+ {
2476
+ "epoch": 1.1296,
2477
+ "grad_norm": 0.5250386595726013,
2478
+ "learning_rate": 2.17704e-05,
2479
+ "loss": 0.2619,
2480
+ "step": 176500
2481
+ },
2482
+ {
2483
+ "epoch": 1.1328,
2484
+ "grad_norm": 0.7426069378852844,
2485
+ "learning_rate": 2.16904e-05,
2486
+ "loss": 0.2628,
2487
+ "step": 177000
2488
+ },
2489
+ {
2490
+ "epoch": 1.1360000000000001,
2491
+ "grad_norm": 0.4925951063632965,
2492
+ "learning_rate": 2.16104e-05,
2493
+ "loss": 0.2661,
2494
+ "step": 177500
2495
+ },
2496
+ {
2497
+ "epoch": 1.1392,
2498
+ "grad_norm": 0.5707270503044128,
2499
+ "learning_rate": 2.15304e-05,
2500
+ "loss": 0.2622,
2501
+ "step": 178000
2502
+ },
2503
+ {
2504
+ "epoch": 1.1424,
2505
+ "grad_norm": 0.5793021321296692,
2506
+ "learning_rate": 2.14504e-05,
2507
+ "loss": 0.2671,
2508
+ "step": 178500
2509
  }
2510
  ],
2511
  "logging_steps": 500,
 
2525
  "attributes": {}
2526
  }
2527
  },
2528
+ "total_flos": 8.6959186771968e+17,
2529
  "train_batch_size": 64,
2530
  "trial_name": null,
2531
  "trial_params": null