ashanhr commited on
Commit
ff56300
1 Parent(s): 4116b38

Training in progress, step 14300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c687e1d18d37b98cc3685c02de3ed0c4572f0e8600102167b26d1c1780a6edaf
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5944066b223cfd00c404cf3a6499df6bf1156481f8e46db2244f54b83303cf94
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a83bc02a2443eeb53634fea4453c212f298686f5c1891b5c5cb4209798e8aaa5
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b270efdfcaabe2c52fc6c456f6d62179665c124b6a42e9797a2f9dffe4b1c0
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e964c133b070afbed08f3ca32ca2670faf41bdc59e39b051dcf0fc33af0121a
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:702b99df31af42d1120ccb356b372e5108301018183cd956500f1cbd4ca79c15
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5416a7b3a6d3cb418f6138347bcbabc33a0662041a00f66de497a1c8ca331973
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ecbc500923ed650d6f32ad8c69af7efe7dabee68519bf7ac279027b01c64989
3
+ size 14631
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54e7a079f92a1bd85f80ccfb5ee98154d535f91421f908722bb451cef9b9d8b4
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:417d56dc8d90d6f81699627cbe27ea28c719a2a1f4ac895f35d5afcaf9387b09
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.935592506840665,
5
  "eval_steps": 100,
6
- "global_step": 14100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2263,6 +2263,38 @@
2263
  "eval_samples_per_second": 24.699,
2264
  "eval_steps_per_second": 3.088,
2265
  "step": 14100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2266
  }
2267
  ],
2268
  "logging_steps": 100,
@@ -2270,7 +2302,7 @@
2270
  "num_input_tokens_seen": 0,
2271
  "num_train_epochs": 30,
2272
  "save_steps": 100,
2273
- "total_flos": 1.5450260168151686e+20,
2274
  "train_batch_size": 8,
2275
  "trial_name": null,
2276
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.019785308356136,
5
  "eval_steps": 100,
6
+ "global_step": 14300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2263
  "eval_samples_per_second": 24.699,
2264
  "eval_steps_per_second": 3.088,
2265
  "step": 14100
2266
+ },
2267
+ {
2268
+ "epoch": 5.98,
2269
+ "grad_norm": 2.196765184402466,
2270
+ "learning_rate": 4.032579505300353e-05,
2271
+ "loss": 2.0249,
2272
+ "step": 14200
2273
+ },
2274
+ {
2275
+ "epoch": 5.98,
2276
+ "eval_cer": 0.4723050525986469,
2277
+ "eval_loss": 1.9896740913391113,
2278
+ "eval_runtime": 382.0154,
2279
+ "eval_samples_per_second": 24.811,
2280
+ "eval_steps_per_second": 3.102,
2281
+ "step": 14200
2282
+ },
2283
+ {
2284
+ "epoch": 6.02,
2285
+ "grad_norm": 2.4070873260498047,
2286
+ "learning_rate": 4.025512367491166e-05,
2287
+ "loss": 2.6354,
2288
+ "step": 14300
2289
+ },
2290
+ {
2291
+ "epoch": 6.02,
2292
+ "eval_cer": 0.46424660748504165,
2293
+ "eval_loss": 1.524404525756836,
2294
+ "eval_runtime": 361.2522,
2295
+ "eval_samples_per_second": 26.237,
2296
+ "eval_steps_per_second": 3.28,
2297
+ "step": 14300
2298
  }
2299
  ],
2300
  "logging_steps": 100,
 
2302
  "num_input_tokens_seen": 0,
2303
  "num_train_epochs": 30,
2304
  "save_steps": 100,
2305
+ "total_flos": 1.5667869779936743e+20,
2306
  "train_batch_size": 8,
2307
  "trial_name": null,
2308
  "trial_params": null