ashanhr commited on
Commit
c6e1f0d
·
verified ·
1 Parent(s): 67dd0ff

Training in progress, step 14700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5944066b223cfd00c404cf3a6499df6bf1156481f8e46db2244f54b83303cf94
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8576ccb82b21bcc2b8addf4916f2f16bf5fac1bf334cb43ac6adf88efbcf106
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91b270efdfcaabe2c52fc6c456f6d62179665c124b6a42e9797a2f9dffe4b1c0
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:650672fda6e5ee3003c98bce94c3d3b2333fe3a6cbc8b34be4d9defa333723aa
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:702b99df31af42d1120ccb356b372e5108301018183cd956500f1cbd4ca79c15
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23908ca485378ebcbd5221e3f9a4dfc2825be11cae4030a2167b67baabd7bf90
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ecbc500923ed650d6f32ad8c69af7efe7dabee68519bf7ac279027b01c64989
3
- size 14631
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e3479363f6845648909d8b217e2aa3767b3ef51cc2decc24f7fd96a7818af1
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:417d56dc8d90d6f81699627cbe27ea28c719a2a1f4ac895f35d5afcaf9387b09
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73db81e910b9ea3c99b0a41dadeb749a85548b72d68b5ef751adf848d3f6ccc4
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.019785308356136,
5
  "eval_steps": 100,
6
- "global_step": 14300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2295,6 +2295,70 @@
2295
  "eval_samples_per_second": 26.237,
2296
  "eval_steps_per_second": 3.28,
2297
  "step": 14300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2298
  }
2299
  ],
2300
  "logging_steps": 100,
@@ -2302,7 +2366,7 @@
2302
  "num_input_tokens_seen": 0,
2303
  "num_train_epochs": 30,
2304
  "save_steps": 100,
2305
- "total_flos": 1.5667869779936743e+20,
2306
  "train_batch_size": 8,
2307
  "trial_name": null,
2308
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.188170911387076,
5
  "eval_steps": 100,
6
+ "global_step": 14700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2295
  "eval_samples_per_second": 26.237,
2296
  "eval_steps_per_second": 3.28,
2297
  "step": 14300
2298
+ },
2299
+ {
2300
+ "epoch": 6.06,
2301
+ "grad_norm": 2.6403534412384033,
2302
+ "learning_rate": 4.018445229681979e-05,
2303
+ "loss": 1.4007,
2304
+ "step": 14400
2305
+ },
2306
+ {
2307
+ "epoch": 6.06,
2308
+ "eval_cer": 0.4687561104376051,
2309
+ "eval_loss": 2.6799304485321045,
2310
+ "eval_runtime": 377.8259,
2311
+ "eval_samples_per_second": 25.086,
2312
+ "eval_steps_per_second": 3.136,
2313
+ "step": 14400
2314
+ },
2315
+ {
2316
+ "epoch": 6.1,
2317
+ "grad_norm": 16.431684494018555,
2318
+ "learning_rate": 4.011378091872792e-05,
2319
+ "loss": 1.3673,
2320
+ "step": 14500
2321
+ },
2322
+ {
2323
+ "epoch": 6.1,
2324
+ "eval_cer": 0.4627141097336827,
2325
+ "eval_loss": 2.739257335662842,
2326
+ "eval_runtime": 365.917,
2327
+ "eval_samples_per_second": 25.902,
2328
+ "eval_steps_per_second": 3.238,
2329
+ "step": 14500
2330
+ },
2331
+ {
2332
+ "epoch": 6.15,
2333
+ "grad_norm": 1.2545260190963745,
2334
+ "learning_rate": 4.0043109540636045e-05,
2335
+ "loss": 1.398,
2336
+ "step": 14600
2337
+ },
2338
+ {
2339
+ "epoch": 6.15,
2340
+ "eval_cer": 0.46276054905948144,
2341
+ "eval_loss": 2.623035430908203,
2342
+ "eval_runtime": 380.5058,
2343
+ "eval_samples_per_second": 24.909,
2344
+ "eval_steps_per_second": 3.114,
2345
+ "step": 14600
2346
+ },
2347
+ {
2348
+ "epoch": 6.19,
2349
+ "grad_norm": 4.757970333099365,
2350
+ "learning_rate": 3.9972438162544173e-05,
2351
+ "loss": 1.7177,
2352
+ "step": 14700
2353
+ },
2354
+ {
2355
+ "epoch": 6.19,
2356
+ "eval_cer": 0.4671600641351531,
2357
+ "eval_loss": 3.1201841831207275,
2358
+ "eval_runtime": 364.6269,
2359
+ "eval_samples_per_second": 25.994,
2360
+ "eval_steps_per_second": 3.25,
2361
+ "step": 14700
2362
  }
2363
  ],
2364
  "logging_steps": 100,
 
2366
  "num_input_tokens_seen": 0,
2367
  "num_train_epochs": 30,
2368
  "save_steps": 100,
2369
+ "total_flos": 1.6107102446089286e+20,
2370
  "train_batch_size": 8,
2371
  "trial_name": null,
2372
  "trial_params": null