ashanhr commited on
Commit
a8aa7f8
1 Parent(s): 9526260

Training in progress, step 40800, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b03dc9e0469a4511d3960e8ccc32ec9be77fb87dc6998ca5230f5d24b80dd574
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75f6409a44328ccff19c64caff488439da9273da3c101449fa3123336a4468ea
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8ef085b7b6748391bb90ac62d5548dc82db571deca605fc3e13a91144e2ddcb
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dcbe4d068720daaaee2a8da1dbc2bbe9c056e9518e3461c7459f79006631080
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfa7234bcf7f52f0002950ee4c95f584023eaebde1bd2b931543505530d4bf62
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33d5708694aaf8845bf750d7154f0271164aa685b76c098f6dd40029eeb5662f
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c2dc3633beab3a54043bc13682c039467daffe3d3f1de119b8bf092c5378995
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72f3d277caa97e791c619eba8f2b402ab01caedcd3e1225af2b74b9180ced546
3
+ size 14631
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e15e995c71854cd5ff2c0e03c1bb6ee2b6ba36e7be4d986833bffdd5d14ed7fc
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c4ef4e4f4492564f8cc89617cc7f93e8c2f6998eb6eef00126c81220e404a6
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 17.091138707640496,
5
  "eval_steps": 100,
6
- "global_step": 40600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6503,6 +6503,38 @@
6503
  "eval_samples_per_second": 25.091,
6504
  "eval_steps_per_second": 3.137,
6505
  "step": 40600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6506
  }
6507
  ],
6508
  "logging_steps": 100,
@@ -6510,7 +6542,7 @@
6510
  "num_input_tokens_seen": 0,
6511
  "num_train_epochs": 30,
6512
  "save_steps": 100,
6513
- "total_flos": 4.449729914752364e+20,
6514
  "train_batch_size": 8,
6515
  "trial_name": null,
6516
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 17.175331509155967,
5
  "eval_steps": 100,
6
+ "global_step": 40800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6503
  "eval_samples_per_second": 25.091,
6504
  "eval_steps_per_second": 3.137,
6505
  "step": 40600
6506
+ },
6507
+ {
6508
+ "epoch": 17.13,
6509
+ "grad_norm": 7.590035915374756,
6510
+ "learning_rate": 2.1603533568904594e-05,
6511
+ "loss": 0.6376,
6512
+ "step": 40700
6513
+ },
6514
+ {
6515
+ "epoch": 17.13,
6516
+ "eval_cer": 0.36229762230651913,
6517
+ "eval_loss": 2.3001914024353027,
6518
+ "eval_runtime": 393.9708,
6519
+ "eval_samples_per_second": 24.058,
6520
+ "eval_steps_per_second": 3.008,
6521
+ "step": 40700
6522
+ },
6523
+ {
6524
+ "epoch": 17.18,
6525
+ "grad_norm": 3.8723325729370117,
6526
+ "learning_rate": 2.1532862190812723e-05,
6527
+ "loss": 0.6424,
6528
+ "step": 40800
6529
+ },
6530
+ {
6531
+ "epoch": 17.18,
6532
+ "eval_cer": 0.3627106878886238,
6533
+ "eval_loss": 1.9817591905593872,
6534
+ "eval_runtime": 380.3308,
6535
+ "eval_samples_per_second": 24.92,
6536
+ "eval_steps_per_second": 3.116,
6537
+ "step": 40800
6538
  }
6539
  ],
6540
  "logging_steps": 100,
 
6542
  "num_input_tokens_seen": 0,
6543
  "num_train_epochs": 30,
6544
  "save_steps": 100,
6545
+ "total_flos": 4.4722373414464927e+20,
6546
  "train_batch_size": 8,
6547
  "trial_name": null,
6548
  "trial_params": null