mika5883 commited on
Commit
7593598
1 Parent(s): 5a8ddb3

Training in progress, step 36000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0931cfc0502a489316d773e7e3fbc87d1bf87d22980010d8e5debce1a389ab5a
3
  size 891644712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94ce7d00b3b1ef166fe747fd7b6c23182e77237f5b57569ed0937385e73e4eb0
3
  size 891644712
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68488d7f6b13bf4345eed8b0bddb03bc654697dc1a0ce2e2c44855609a96ee79
3
  size 1783444357
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55b22c65c93511f0da0294e2300deedeb3c5ffabff2881e99b8673baaf86ca81
3
  size 1783444357
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2668009309a225b4528d1c2be158a46d8643edce60db33568885f84d9153d0f5
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aeb790f34b48d9bf3b86bc7bfcbcd5e61233f6de9cf9a2d4634826b6f96d5f0
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c659c1c793579d80447241ae65c0f0bd61aec30b0f2e6c845263cb4857f12c85
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37bed98491ff3050ce572a36a899b02a6ec7498a5e4d6d63b97b9516bfd16419
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2208,
5
  "eval_steps": 500,
6
- "global_step": 34500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -490,6 +490,27 @@
490
  "learning_rate": 4.4481920000000007e-05,
491
  "loss": 0.3637,
492
  "step": 34500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
493
  }
494
  ],
495
  "logging_steps": 500,
@@ -509,7 +530,7 @@
509
  "attributes": {}
510
  }
511
  },
512
- "total_flos": 1.6807237779456e+17,
513
  "train_batch_size": 64,
514
  "trial_name": null,
515
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2304,
5
  "eval_steps": 500,
6
+ "global_step": 36000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
490
  "learning_rate": 4.4481920000000007e-05,
491
  "loss": 0.3637,
492
  "step": 34500
493
+ },
494
+ {
495
+ "epoch": 0.224,
496
+ "grad_norm": 0.7771899700164795,
497
+ "learning_rate": 4.440192e-05,
498
+ "loss": 0.3648,
499
+ "step": 35000
500
+ },
501
+ {
502
+ "epoch": 0.2272,
503
+ "grad_norm": 0.6887528300285339,
504
+ "learning_rate": 4.432192e-05,
505
+ "loss": 0.3562,
506
+ "step": 35500
507
+ },
508
+ {
509
+ "epoch": 0.2304,
510
+ "grad_norm": 0.7471407055854797,
511
+ "learning_rate": 4.424192e-05,
512
+ "loss": 0.3639,
513
+ "step": 36000
514
  }
515
  ],
516
  "logging_steps": 500,
 
530
  "attributes": {}
531
  }
532
  },
533
+ "total_flos": 1.7537987248128e+17,
534
  "train_batch_size": 64,
535
  "trial_name": null,
536
  "trial_params": null