ashanhr commited on
Commit
26237c7
1 Parent(s): 0e36664

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:564ff67db075f6552f8b2c9a427a44f06a34dffd885fdde2d4f59225061e7873
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edeb2c8f7eecbdcec128794e2a75252eb73ef00ff592535cdc581201d826afcb
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:205a9f7c9d4b15d9f42153fb58960bfb1ca1084d97709fe48e90d70a2ae017ab
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4ac4ea9be9dc97b85a293b5fde9854b242b8f071f30fe139f2877ce93fa0174
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0ad49f52d9f578223ab0238e4f6113866b317952c8819c9dcf87275a7a25c8a3
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d949e4fc1be92074318da36259c5ce283bfc3b631cdac55ce6accd8d87e37fcc
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:242af9146abe205408dcebe7e536c015b00e59f86773a85ad3eb899c31bcfafe
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cdc9cb49aead1621d065ebb43afb5447a4c9fa73cdd62cb920c7fc987a11d0c
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:701509704c605855197919f690db06b37c5a699db2539613f95d2b79e97bf5bf
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa72b3b62c432a08ebf14ac0f3e08674ac6c59a03ef18d8c1b246cdb82948a7a
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,28 +1,44 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.042096400757735214,
5
  "eval_steps": 100,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.04,
13
- "grad_norm": 3.7763900756835938,
14
  "learning_rate": 9.5e-06,
15
- "loss": 6.5991,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.04,
20
- "eval_cer": 0.9900155449532674,
21
- "eval_loss": 4.97747278213501,
22
- "eval_runtime": 412.4714,
23
- "eval_samples_per_second": 22.979,
24
- "eval_steps_per_second": 2.873,
25
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  ],
28
  "logging_steps": 100,
@@ -30,7 +46,7 @@
30
  "num_input_tokens_seen": 0,
31
  "num_train_epochs": 30,
32
  "save_steps": 100,
33
- "total_flos": 1.1688413067862653e+18,
34
  "train_batch_size": 8,
35
  "trial_name": null,
36
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.08419280151547043,
5
  "eval_steps": 100,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.04,
13
+ "grad_norm": 2.6430506706237793,
14
  "learning_rate": 9.5e-06,
15
+ "loss": 5.6141,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 0.04,
20
+ "eval_cer": 0.9770320871299519,
21
+ "eval_loss": 2.355087995529175,
22
+ "eval_runtime": 329.8418,
23
+ "eval_samples_per_second": 28.735,
24
+ "eval_steps_per_second": 3.593,
25
  "step": 100
26
+ },
27
+ {
28
+ "epoch": 0.08,
29
+ "grad_norm": 2.838773012161255,
30
+ "learning_rate": 1.9500000000000003e-05,
31
+ "loss": 2.1498,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 0.08,
36
+ "eval_cer": 0.6385162879824802,
37
+ "eval_loss": 1.961960792541504,
38
+ "eval_runtime": 334.0577,
39
+ "eval_samples_per_second": 28.372,
40
+ "eval_steps_per_second": 3.547,
41
+ "step": 200
42
  }
43
  ],
44
  "logging_steps": 100,
 
46
  "num_input_tokens_seen": 0,
47
  "num_train_epochs": 30,
48
  "save_steps": 100,
49
+ "total_flos": 2.248613964510395e+18,
50
  "train_batch_size": 8,
51
  "trial_name": null,
52
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7efe3f4de73273496cd967ad97de26d01bf9654ed8c159b2dd7fd1dfc00e66f6
3
  size 4527
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f043c45f5e1539aec945837bdbb0f4a58df0721de023fb4d0f872211073770a
3
  size 4527