ashanhr commited on
Commit
b5f6529
1 Parent(s): 9c50d7b

Training in progress, step 57100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb2c1b8e9be3ed4e5e58861e55bb2300fe594045ed5ac8701cb863f8c64b3b59
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e346f6031cce705b7a51a894f6f6378890db177912b7be7998e56d05733b2858
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a227592c640ebf19876714d2dee94144050fa04295abb4b60c4591e30937ca39
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c1028245122ba4f0c3074dff84fb7696439ffd310961493c9af148d1751f0d0
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:497ed74a4b175aaa80b7d9a67f8e0c9244db6631882ee06a6b637d0107236884
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1480bf7de8a85342639e631f5104ad44bcd60ccdd626b0e1def4e6b1302d49f4
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be286c7fb1c7bc11fa342e9bfd940e27a4b41b2d41eb275264375a0cad4252cb
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2354b0cf33dcdb8cc3fac9417682e1384f8242d5c4698533d6cd32a821a72d6
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff5227b1a7ff1352d31f4f09975564547391e6e8a0c885a0c20e698eab002a80
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4c15ce344ebb0358d0e2efa0d9ff824c7520b3efdb90c37d82fe6283bcb7100
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 23.952852031151338,
5
  "eval_steps": 100,
6
- "global_step": 56900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9111,6 +9111,38 @@
9111
  "eval_samples_per_second": 23.654,
9112
  "eval_steps_per_second": 2.957,
9113
  "step": 56900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9114
  }
9115
  ],
9116
  "logging_steps": 100,
@@ -9118,7 +9150,7 @@
9118
  "num_input_tokens_seen": 0,
9119
  "num_train_epochs": 30,
9120
  "save_steps": 100,
9121
- "total_flos": 6.23499754043143e+20,
9122
  "train_batch_size": 8,
9123
  "trial_name": null,
9124
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 24.03704483266681,
5
  "eval_steps": 100,
6
+ "global_step": 57100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9111
  "eval_samples_per_second": 23.654,
9112
  "eval_steps_per_second": 2.957,
9113
  "step": 56900
9114
+ },
9115
+ {
9116
+ "epoch": 23.99,
9117
+ "grad_norm": 1.9022639989852905,
9118
+ "learning_rate": 1.0087632508833924e-05,
9119
+ "loss": 0.2657,
9120
+ "step": 57000
9121
+ },
9122
+ {
9123
+ "epoch": 23.99,
9124
+ "eval_cer": 0.33296507762699934,
9125
+ "eval_loss": 2.5121145248413086,
9126
+ "eval_runtime": 423.803,
9127
+ "eval_samples_per_second": 22.364,
9128
+ "eval_steps_per_second": 2.796,
9129
+ "step": 57000
9130
+ },
9131
+ {
9132
+ "epoch": 24.04,
9133
+ "grad_norm": 1.854609727859497,
9134
+ "learning_rate": 1.001696113074205e-05,
9135
+ "loss": 0.2253,
9136
+ "step": 57100
9137
+ },
9138
+ {
9139
+ "epoch": 24.04,
9140
+ "eval_cer": 0.3338425364670916,
9141
+ "eval_loss": 2.654775381088257,
9142
+ "eval_runtime": 395.2094,
9143
+ "eval_samples_per_second": 23.982,
9144
+ "eval_steps_per_second": 2.998,
9145
+ "step": 57100
9146
  }
9147
  ],
9148
  "logging_steps": 100,
 
9150
  "num_input_tokens_seen": 0,
9151
  "num_train_epochs": 30,
9152
  "save_steps": 100,
9153
+ "total_flos": 6.257522860637754e+20,
9154
  "train_batch_size": 8,
9155
  "trial_name": null,
9156
  "trial_params": null