ashanhr commited on
Commit
0450d52
1 Parent(s): b7b0455

Training in progress, step 50500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:720bc977d62e7816011389161c4eeae7e71a5e15fa6414fc84828a3a030a05e6
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16c8e88b32eee615dcdd83273bd0c194d60a246661903076fda75220e2d10c57
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f5d803a6ecbd25b4508f6830cb0d354f3164cf36de87bc4c942336f1d723990
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edd6f8a0eb8dcf9ba10c41e341f88389e7c3db368a255563153dee54ccc680c3
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:941eea433be3c1f9adb0f47751c327703c05efe14921d20365ff7c6534eecd09
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d8741d79b5be03765e57061f191fb50d4ed43fb5803cf2b764313f8dcdd7e48
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cbe5fab90a476cca692ca727115b096b7062ef579d4f32a4ad2921bb938c28b
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:382ea96905c8f14235ed215a03e93292273bd28d66f86e796f318632bcd7335e
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08a28e7b27d1cecd73113f4062cdd78f7500a461c5bdb8769059e4729aaa72d3
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f01039206f45f4a9f82763ac9b1c4be8f2cba26114400c57368ed9f798a154b
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 21.17448958114081,
5
  "eval_steps": 100,
6
- "global_step": 50300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8055,6 +8055,38 @@
8055
  "eval_samples_per_second": 24.292,
8056
  "eval_steps_per_second": 3.037,
8057
  "step": 50300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8058
  }
8059
  ],
8060
  "logging_steps": 100,
@@ -8062,7 +8094,7 @@
8062
  "num_input_tokens_seen": 0,
8063
  "num_train_epochs": 30,
8064
  "save_steps": 100,
8065
- "total_flos": 5.5126604494545053e+20,
8066
  "train_batch_size": 8,
8067
  "trial_name": null,
8068
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 21.258682382656282,
5
  "eval_steps": 100,
6
+ "global_step": 50500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8055
  "eval_samples_per_second": 24.292,
8056
  "eval_steps_per_second": 3.037,
8057
  "step": 50300
8058
+ },
8059
+ {
8060
+ "epoch": 21.22,
8061
+ "grad_norm": 4.574779987335205,
8062
+ "learning_rate": 1.475123674911661e-05,
8063
+ "loss": 0.3408,
8064
+ "step": 50400
8065
+ },
8066
+ {
8067
+ "epoch": 21.22,
8068
+ "eval_cer": 0.34143414414766726,
8069
+ "eval_loss": 2.403733491897583,
8070
+ "eval_runtime": 415.1727,
8071
+ "eval_samples_per_second": 22.829,
8072
+ "eval_steps_per_second": 2.854,
8073
+ "step": 50400
8074
+ },
8075
+ {
8076
+ "epoch": 21.26,
8077
+ "grad_norm": 1.5709911584854126,
8078
+ "learning_rate": 1.4680565371024735e-05,
8079
+ "loss": 0.3363,
8080
+ "step": 50500
8081
+ },
8082
+ {
8083
+ "epoch": 21.26,
8084
+ "eval_cer": 0.3419229791560752,
8085
+ "eval_loss": 2.4089136123657227,
8086
+ "eval_runtime": 388.419,
8087
+ "eval_samples_per_second": 24.401,
8088
+ "eval_steps_per_second": 3.051,
8089
+ "step": 50500
8090
  }
8091
  ],
8092
  "logging_steps": 100,
 
8094
  "num_input_tokens_seen": 0,
8095
  "num_train_epochs": 30,
8096
  "save_steps": 100,
8097
+ "total_flos": 5.5342994893395676e+20,
8098
  "train_batch_size": 8,
8099
  "trial_name": null,
8100
  "trial_params": null