ashanhr commited on
Commit
d99fea2
1 Parent(s): a68fb13

Training in progress, step 800, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:653ef63911d6161b88b3f9f32df8efdb866ecf55d97fc3febdbed2c5450a3c6e
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87323aa246e5525c967b36e0a7119ff06b3213e7668c1f5846d8420c500de1a2
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e64cd2b2c62bdc327ea06a22049bd82a06e5ae8faac9465953e7b68978aad26
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d68b084be2ded48e2587a63e4b50b52268b9c95e406eb01dbcf32f79501fadf3
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67fcec210fd34c6bf199fbb322d43b9953bbb316718492511d8d57134ad24c1a
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eebd57b0d4504fb08b3859d2e5922af006f7b80e276d54f3a75630a03f57cc0
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3be477733fd09e276311bb65c01459e6a102c492ba2de21b23224cea823214d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:573e592efc8bd12844dd0aa8efeae01c2194994d68d8419691db30231224e469
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2039042aec5b0843d31e4b45065f8954fece58ce2d7d372ebe3c6462b2a5b07c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8cf4161a8d89b4d12d1aa80ad587062e7bdf9b94508fc5ae11c0dc8e85a1cabf
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2525784045464113,
5
  "eval_steps": 100,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -103,6 +103,38 @@
103
  "eval_samples_per_second": 27.426,
104
  "eval_steps_per_second": 3.429,
105
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  }
107
  ],
108
  "logging_steps": 100,
@@ -110,7 +142,7 @@
110
  "num_input_tokens_seen": 0,
111
  "num_train_epochs": 30,
112
  "save_steps": 100,
113
- "total_flos": 6.597028028856881e+18,
114
  "train_batch_size": 8,
115
  "trial_name": null,
116
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.3367712060618817,
5
  "eval_steps": 100,
6
+ "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
103
  "eval_samples_per_second": 27.426,
104
  "eval_steps_per_second": 3.429,
105
  "step": 600
106
+ },
107
+ {
108
+ "epoch": 0.29,
109
+ "grad_norm": 4.292990207672119,
110
+ "learning_rate": 4.986289752650177e-05,
111
+ "loss": 4.1075,
112
+ "step": 700
113
+ },
114
+ {
115
+ "epoch": 0.29,
116
+ "eval_cer": 0.6181098705564898,
117
+ "eval_loss": 3.3849761486053467,
118
+ "eval_runtime": 353.3198,
119
+ "eval_samples_per_second": 26.826,
120
+ "eval_steps_per_second": 3.354,
121
+ "step": 700
122
+ },
123
+ {
124
+ "epoch": 0.34,
125
+ "grad_norm": 4.32611083984375,
126
+ "learning_rate": 4.97922261484099e-05,
127
+ "loss": 2.6907,
128
+ "step": 800
129
+ },
130
+ {
131
+ "epoch": 0.34,
132
+ "eval_cer": 0.6191510891243988,
133
+ "eval_loss": 2.5412750244140625,
134
+ "eval_runtime": 343.4967,
135
+ "eval_samples_per_second": 27.593,
136
+ "eval_steps_per_second": 3.45,
137
+ "step": 800
138
  }
139
  ],
140
  "logging_steps": 100,
 
142
  "num_input_tokens_seen": 0,
143
  "num_train_epochs": 30,
144
  "save_steps": 100,
145
+ "total_flos": 8.803126663244048e+18,
146
  "train_batch_size": 8,
147
  "trial_name": null,
148
  "trial_params": null