ashanhr commited on
Commit
b0c18c7
·
verified ·
1 Parent(s): b2620f7

Training in progress, step 9500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e53d952088e21ee7c18f6fee4b8d7769b753af22ca10b63a69e3f8de2b65b0be
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a32dc99462660290df84c8528dc9eb39190be3d3c57683e7877e90642c878438
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76dd5b4a8826aefc4d28f9aba3108c5d9d8a9bb5ddbd6054bc88418f54521ed3
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49f127aa66da05aa9808cb65c326d54eb1f62011422d2b38dbc32ab973d98742
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bb833d4bc9327bd0e01ea33487a3bf19e25b2de7bacf92eb09217f6b4297dc2
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0521f879a75eea1399e7ebb7725746dc98364d1c9d3582f6a0a4284d965d9d7c
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7b3c74ecbda77c6abb056be85c9b612f31948352a984f5f4ba2d90f864dc773
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf6988d2899e29ce3c7ea0ad046e917da0407c7d088f3a2102b64354f3b328c9
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:336d1b7b025618b96c60c17b8c9303a9e5e4f82eb8a6c5c17e6b7e590bbf1488
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e0200d46d39b183c05be433e7301e4dbe08836ad4684cdecd5d09269aef39dd
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.914965270469375,
5
  "eval_steps": 100,
6
- "global_step": 9300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1495,6 +1495,38 @@
1495
  "eval_samples_per_second": 26.275,
1496
  "eval_steps_per_second": 3.285,
1497
  "step": 9300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1498
  }
1499
  ],
1500
  "logging_steps": 100,
@@ -1502,7 +1534,7 @@
1502
  "num_input_tokens_seen": 0,
1503
  "num_train_epochs": 30,
1504
  "save_steps": 100,
1505
- "total_flos": 1.0193037614760552e+20,
1506
  "train_batch_size": 8,
1507
  "trial_name": null,
1508
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.999158071984845,
5
  "eval_steps": 100,
6
+ "global_step": 9500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1495
  "eval_samples_per_second": 26.275,
1496
  "eval_steps_per_second": 3.285,
1497
  "step": 9300
1498
+ },
1499
+ {
1500
+ "epoch": 3.96,
1501
+ "grad_norm": 2.788163661956787,
1502
+ "learning_rate": 4.371731448763251e-05,
1503
+ "loss": 2.4278,
1504
+ "step": 9400
1505
+ },
1506
+ {
1507
+ "epoch": 3.96,
1508
+ "eval_cer": 0.5205579562785968,
1509
+ "eval_loss": 2.4102938175201416,
1510
+ "eval_runtime": 376.1786,
1511
+ "eval_samples_per_second": 25.195,
1512
+ "eval_steps_per_second": 3.15,
1513
+ "step": 9400
1514
+ },
1515
+ {
1516
+ "epoch": 4.0,
1517
+ "grad_norm": 2.6063647270202637,
1518
+ "learning_rate": 4.364664310954063e-05,
1519
+ "loss": 1.5116,
1520
+ "step": 9500
1521
+ },
1522
+ {
1523
+ "epoch": 4.0,
1524
+ "eval_cer": 0.4881310859958547,
1525
+ "eval_loss": 1.728480339050293,
1526
+ "eval_runtime": 352.8942,
1527
+ "eval_samples_per_second": 26.858,
1528
+ "eval_steps_per_second": 3.358,
1529
+ "step": 9500
1530
  }
1531
  ],
1532
  "logging_steps": 100,
 
1534
  "num_input_tokens_seen": 0,
1535
  "num_train_epochs": 30,
1536
  "save_steps": 100,
1537
+ "total_flos": 1.0405499901125573e+20,
1538
  "train_batch_size": 8,
1539
  "trial_name": null,
1540
  "trial_params": null