ashanhr commited on
Commit
7008d8f
·
verified ·
1 Parent(s): 1b9068c

Training in progress, step 53300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:633d033e0feaf0bf4322349a4c2c21f2d9cac42e95486f6edc465534c3e94d22
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00ef485d074352aca5556b0497b480b64f5a597428f78f4530b27b6743398719
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f396277c9eced74d208c1a0a4c9b4fc9e72369b2bd0928d8da6ef34b6dce864c
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43478402ec9b98f774cd2d2f4bfd0486130906c01dad84a75d6574a44e13c030
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba3188b916823703b8f1761ccc19c30690dec44a4be3f89bd70b12f8dabc77e8
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b1c06a4215a8a7ee627724ddb5dc01d00b82023b0098d64dc5a28a3f3f9a399
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db752bd188273ee158b3e145391265b4293483bb7f634f864afe75f2821bf9a1
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b75e5a022877b84c4104618a8d23ec7b90fdc7aadaa0cd12f67cfded1c78a671
3
+ size 14631
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed67a7cf08915cbe3b030bce4e92226fc3b82cf21f5936d3ab71491cd41ba3f4
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23380186d0e4c7eec5667700cc9af150fab32a4e15bb4b1323f659a444bfb4c0
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 22.353188802357398,
5
  "eval_steps": 100,
6
- "global_step": 53100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8503,6 +8503,38 @@
8503
  "eval_samples_per_second": 25.328,
8504
  "eval_steps_per_second": 3.167,
8505
  "step": 53100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8506
  }
8507
  ],
8508
  "logging_steps": 100,
@@ -8510,7 +8542,7 @@
8510
  "num_input_tokens_seen": 0,
8511
  "num_train_epochs": 30,
8512
  "save_steps": 100,
8513
- "total_flos": 5.818817544770568e+20,
8514
  "train_batch_size": 8,
8515
  "trial_name": null,
8516
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 22.43738160387287,
5
  "eval_steps": 100,
6
+ "global_step": 53300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8503
  "eval_samples_per_second": 25.328,
8504
  "eval_steps_per_second": 3.167,
8505
  "step": 53100
8506
+ },
8507
+ {
8508
+ "epoch": 22.4,
8509
+ "grad_norm": 1.3863327503204346,
8510
+ "learning_rate": 1.2773144876325088e-05,
8511
+ "loss": 0.2997,
8512
+ "step": 53200
8513
+ },
8514
+ {
8515
+ "epoch": 22.4,
8516
+ "eval_cer": 0.33839359039536976,
8517
+ "eval_loss": 2.4660918712615967,
8518
+ "eval_runtime": 423.5986,
8519
+ "eval_samples_per_second": 22.375,
8520
+ "eval_steps_per_second": 2.797,
8521
+ "step": 53200
8522
+ },
8523
+ {
8524
+ "epoch": 22.44,
8525
+ "grad_norm": 1.9845781326293945,
8526
+ "learning_rate": 1.2702473498233216e-05,
8527
+ "loss": 0.2999,
8528
+ "step": 53300
8529
+ },
8530
+ {
8531
+ "epoch": 22.44,
8532
+ "eval_cer": 0.3362524930585429,
8533
+ "eval_loss": 2.4160196781158447,
8534
+ "eval_runtime": 387.3168,
8535
+ "eval_samples_per_second": 24.471,
8536
+ "eval_steps_per_second": 3.06,
8537
+ "step": 53300
8538
  }
8539
  ],
8540
  "logging_steps": 100,
 
8542
  "num_input_tokens_seen": 0,
8543
  "num_train_epochs": 30,
8544
  "save_steps": 100,
8545
+ "total_flos": 5.840653170107694e+20,
8546
  "train_batch_size": 8,
8547
  "trial_name": null,
8548
  "trial_params": null