mika5883 commited on
Commit
99ba450
1 Parent(s): b13acb6

Training in progress, step 185500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2baaeb2b23d09e31adf3ef5585f3e6639b56c07f476dcaafc27f4ae5d87b820
3
  size 891644712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb364ce891de5f4a56d29465d8827aee9763f63c827eed28e9f08a6879a4e0e8
3
  size 891644712
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:31c9234b22f8508a6d0d4122d44d2c6d1d2e6e2fee5b0b2e55791454aea923e8
3
  size 1783444357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13bfc74584d67284695477358d19a44d719880c08f366f3d9f3dd6557e56e613
3
  size 1783444357
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:823c43fb8cb66b96b5deeb2005d1b02ad22bf7e9aa812aa31a158370b966082c
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3abb4906081313365fe9d8e0e48faa531b110753b43df2422beeff510f4869fe
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82e2c4a62066e0491348b34524e07a98c2175669eb301b7705f9a7c6d9af8189
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c743dd89240c66051c085e23b5c9372172e564216f22be729c3e3e69fdfbbada
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.168,
5
  "eval_steps": 500,
6
- "global_step": 182500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2562,6 +2562,48 @@
2562
  "learning_rate": 2.081056e-05,
2563
  "loss": 0.2611,
2564
  "step": 182500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2565
  }
2566
  ],
2567
  "logging_steps": 500,
@@ -2581,7 +2623,7 @@
2581
  "attributes": {}
2582
  }
2583
  },
2584
- "total_flos": 8.890785202176e+17,
2585
  "train_batch_size": 64,
2586
  "trial_name": null,
2587
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1872,
5
  "eval_steps": 500,
6
+ "global_step": 185500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2562
  "learning_rate": 2.081056e-05,
2563
  "loss": 0.2611,
2564
  "step": 182500
2565
+ },
2566
+ {
2567
+ "epoch": 1.1712,
2568
+ "grad_norm": 0.5887815356254578,
2569
+ "learning_rate": 2.073056e-05,
2570
+ "loss": 0.2632,
2571
+ "step": 183000
2572
+ },
2573
+ {
2574
+ "epoch": 1.1743999999999999,
2575
+ "grad_norm": 0.6037270426750183,
2576
+ "learning_rate": 2.0650560000000002e-05,
2577
+ "loss": 0.2645,
2578
+ "step": 183500
2579
+ },
2580
+ {
2581
+ "epoch": 1.1776,
2582
+ "grad_norm": 0.636946439743042,
2583
+ "learning_rate": 2.057072e-05,
2584
+ "loss": 0.2628,
2585
+ "step": 184000
2586
+ },
2587
+ {
2588
+ "epoch": 1.1808,
2589
+ "grad_norm": 0.5285276770591736,
2590
+ "learning_rate": 2.049072e-05,
2591
+ "loss": 0.2629,
2592
+ "step": 184500
2593
+ },
2594
+ {
2595
+ "epoch": 1.184,
2596
+ "grad_norm": 0.4634397625923157,
2597
+ "learning_rate": 2.041072e-05,
2598
+ "loss": 0.2615,
2599
+ "step": 185000
2600
+ },
2601
+ {
2602
+ "epoch": 1.1872,
2603
+ "grad_norm": 0.5693604946136475,
2604
+ "learning_rate": 2.033072e-05,
2605
+ "loss": 0.2619,
2606
+ "step": 185500
2607
  }
2608
  ],
2609
  "logging_steps": 500,
 
2623
  "attributes": {}
2624
  }
2625
  },
2626
+ "total_flos": 9.0369350959104e+17,
2627
  "train_batch_size": 64,
2628
  "trial_name": null,
2629
  "trial_params": null