ashanhr commited on
Commit
7bd87f6
1 Parent(s): 5d8e529

Training in progress, step 56300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ab4065c03764effc8670cef485e7172138b847944296ddc1d4ac63592bb57b3
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1932436298b8fb538e4cf80bed7163defd092b6328eb4bbc212e7fb308846d2
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18c6c84b58be65267870df3ab069b46c0ec0719004b480fb2a5a41f9b2c0d5b5
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc9f7d35cf87748eddf9bf58f61c5ba700e389c31489d78a290697c06c313346
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83b49b95053c311733c21b0f1a9bbdd3669c30e7fc3caf207b5862e66e32be6c
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ec88f79f8a5c3ad18d543cea65ddcfd2fe9aa517c23d778574b571f8236405d
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00cb33b6fce6dba04a33d05c1cb2d3d6d099ac011324c13a509e8848d1669b14
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a56a788fea959cad8157c6c6dc5342a179f5cfb4ced637d221d4c0e4c02d4c3c
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1305d66ef8f2afa8554deae5dd51afcd829f2485cde50c618b4fe4df3ce6276
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b6a938385f789102516706256ee22f9b98b3e9613d4f67c8346e35dc285e21c
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 23.616080825089455,
5
  "eval_steps": 100,
6
- "global_step": 56100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8983,6 +8983,38 @@
8983
  "eval_samples_per_second": 23.624,
8984
  "eval_steps_per_second": 2.954,
8985
  "step": 56100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8986
  }
8987
  ],
8988
  "logging_steps": 100,
@@ -8990,7 +9022,7 @@
8990
  "num_input_tokens_seen": 0,
8991
  "num_train_epochs": 30,
8992
  "save_steps": 100,
8993
- "total_flos": 6.14754805865386e+20,
8994
  "train_batch_size": 8,
8995
  "trial_name": null,
8996
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 23.700273626604925,
5
  "eval_steps": 100,
6
+ "global_step": 56300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8983
  "eval_samples_per_second": 23.624,
8984
  "eval_steps_per_second": 2.954,
8985
  "step": 56100
8986
+ },
8987
+ {
8988
+ "epoch": 23.66,
8989
+ "grad_norm": 1.4341765642166138,
8990
+ "learning_rate": 1.0653003533568904e-05,
8991
+ "loss": 0.26,
8992
+ "step": 56200
8993
+ },
8994
+ {
8995
+ "epoch": 23.66,
8996
+ "eval_cer": 0.33526260216651677,
8997
+ "eval_loss": 2.6399149894714355,
8998
+ "eval_runtime": 433.4463,
8999
+ "eval_samples_per_second": 21.867,
9000
+ "eval_steps_per_second": 2.734,
9001
+ "step": 56200
9002
+ },
9003
+ {
9004
+ "epoch": 23.7,
9005
+ "grad_norm": 1.7979743480682373,
9006
+ "learning_rate": 1.0582332155477032e-05,
9007
+ "loss": 0.2688,
9008
+ "step": 56300
9009
+ },
9010
+ {
9011
+ "epoch": 23.7,
9012
+ "eval_cer": 0.3343387040006257,
9013
+ "eval_loss": 2.507894515991211,
9014
+ "eval_runtime": 402.3559,
9015
+ "eval_samples_per_second": 23.556,
9016
+ "eval_steps_per_second": 2.945,
9017
+ "step": 56300
9018
  }
9019
  ],
9020
  "logging_steps": 100,
 
9022
  "num_input_tokens_seen": 0,
9023
  "num_train_epochs": 30,
9024
  "save_steps": 100,
9025
+ "total_flos": 6.169297958480331e+20,
9026
  "train_batch_size": 8,
9027
  "trial_name": null,
9028
  "trial_params": null