ashanhr commited on
Commit
b210090
·
verified ·
1 Parent(s): b44ee1e

Training in progress, step 49700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f4e034f2e43c0fca95585a1c74b8ba288002aeef4e94f7f869a1f6d24f8b7d8
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1cbd3e55b0179026181d720a477011010ff856f7a126f268ef27f3561d2d134
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a4f8cab60168f6324315199235ff245062dddd3799c0d85af85527ecee3fe8a
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e3666578dccf6f10399ae741f901f6bff10f04f1d3e32956185152c183581c5
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94ac800ef4ed1b5dbe2e129011c90053617c45f04b68159ae45c8bd542d485ca
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22dc109d90b5af6d6ac2a29d4413cf79d645981920f0b93701c9be3341fda743
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a109c2513b60d30766caef58a6a250c617ed41bd13414a63f31b8ac63646d1d0
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02990edbfb0b0fa25d6956ac586de6c5fa7a3f14caf505d91e0c9bba2fc45bab
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f46b18b91d6715a6101674870a673d34999d2c470954bab9b6ef70b9b1b09992
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4bde301d33ddb7b9f708b0f2b4df433fe10f623f71b5b1ed245e09a9c0210de
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 20.837718375078932,
5
  "eval_steps": 100,
6
- "global_step": 49500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7927,6 +7927,38 @@
7927
  "eval_samples_per_second": 24.429,
7928
  "eval_steps_per_second": 3.054,
7929
  "step": 49500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7930
  }
7931
  ],
7932
  "logging_steps": 100,
@@ -7934,7 +7966,7 @@
7934
  "num_input_tokens_seen": 0,
7935
  "num_train_epochs": 30,
7936
  "save_steps": 100,
7937
- "total_flos": 5.423486958333301e+20,
7938
  "train_batch_size": 8,
7939
  "trial_name": null,
7940
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.921911176594403,
5
  "eval_steps": 100,
6
+ "global_step": 49700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7927
  "eval_samples_per_second": 24.429,
7928
  "eval_steps_per_second": 3.054,
7929
  "step": 49500
7930
+ },
7931
+ {
7932
+ "epoch": 20.88,
7933
+ "grad_norm": 2.1260199546813965,
7934
+ "learning_rate": 1.5316607773851592e-05,
7935
+ "loss": 0.4222,
7936
+ "step": 49600
7937
+ },
7938
+ {
7939
+ "epoch": 20.88,
7940
+ "eval_cer": 0.3436339016855031,
7941
+ "eval_loss": 2.4059560298919678,
7942
+ "eval_runtime": 412.8035,
7943
+ "eval_samples_per_second": 22.96,
7944
+ "eval_steps_per_second": 2.871,
7945
+ "step": 49600
7946
+ },
7947
+ {
7948
+ "epoch": 20.92,
7949
+ "grad_norm": 1.9174062013626099,
7950
+ "learning_rate": 1.5245936395759718e-05,
7951
+ "loss": 0.4093,
7952
+ "step": 49700
7953
+ },
7954
+ {
7955
+ "epoch": 20.92,
7956
+ "eval_cer": 0.3437194478119745,
7957
+ "eval_loss": 2.5948760509490967,
7958
+ "eval_runtime": 390.0559,
7959
+ "eval_samples_per_second": 24.299,
7960
+ "eval_steps_per_second": 3.038,
7961
+ "step": 49700
7962
  }
7963
  ],
7964
  "logging_steps": 100,
 
7966
  "num_input_tokens_seen": 0,
7967
  "num_train_epochs": 30,
7968
  "save_steps": 100,
7969
+ "total_flos": 5.4456441634239906e+20,
7970
  "train_batch_size": 8,
7971
  "trial_name": null,
7972
  "trial_params": null