ashanhr commited on
Commit
b85cb8e
1 Parent(s): f9c1a87

Training in progress, step 50700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:16c8e88b32eee615dcdd83273bd0c194d60a246661903076fda75220e2d10c57
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b47f775b647eb4ca79619e9a3f63d2ec8dc0fd2fb449358850abac10534266b
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:edd6f8a0eb8dcf9ba10c41e341f88389e7c3db368a255563153dee54ccc680c3
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:699fbd5992054d81409f0835427766bd93ed83628a4cf3ec9a80f7838edeab17
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d8741d79b5be03765e57061f191fb50d4ed43fb5803cf2b764313f8dcdd7e48
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60be0080dbfcb7f1f5db48bba6f03245ce57196433362cbc3bc3c410c888a3c1
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:382ea96905c8f14235ed215a03e93292273bd28d66f86e796f318632bcd7335e
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef8a6e3b5130077a0b6901be264f4bb7d44393ff9a0c743c3f92a90852087d6a
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f01039206f45f4a9f82763ac9b1c4be8f2cba26114400c57368ed9f798a154b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbb95b52b6618ac8420e23f26e7f97bbeee8449df4c10989dedf902ebfd9c011
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 21.258682382656282,
5
  "eval_steps": 100,
6
- "global_step": 50500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -8087,6 +8087,38 @@
8087
  "eval_samples_per_second": 24.401,
8088
  "eval_steps_per_second": 3.051,
8089
  "step": 50500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8090
  }
8091
  ],
8092
  "logging_steps": 100,
@@ -8094,7 +8126,7 @@
8094
  "num_input_tokens_seen": 0,
8095
  "num_train_epochs": 30,
8096
  "save_steps": 100,
8097
- "total_flos": 5.5342994893395676e+20,
8098
  "train_batch_size": 8,
8099
  "trial_name": null,
8100
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 21.342875184171753,
5
  "eval_steps": 100,
6
+ "global_step": 50700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
8087
  "eval_samples_per_second": 24.401,
8088
  "eval_steps_per_second": 3.051,
8089
  "step": 50500
8090
+ },
8091
+ {
8092
+ "epoch": 21.3,
8093
+ "grad_norm": 4.430927276611328,
8094
+ "learning_rate": 1.4609893992932863e-05,
8095
+ "loss": 0.3461,
8096
+ "step": 50600
8097
+ },
8098
+ {
8099
+ "epoch": 21.3,
8100
+ "eval_cer": 0.34185698642994017,
8101
+ "eval_loss": 2.8909881114959717,
8102
+ "eval_runtime": 408.6465,
8103
+ "eval_samples_per_second": 23.194,
8104
+ "eval_steps_per_second": 2.9,
8105
+ "step": 50600
8106
+ },
8107
+ {
8108
+ "epoch": 21.34,
8109
+ "grad_norm": 1.9088200330734253,
8110
+ "learning_rate": 1.4539222614840989e-05,
8111
+ "loss": 0.3561,
8112
+ "step": 50700
8113
+ },
8114
+ {
8115
+ "epoch": 21.34,
8116
+ "eval_cer": 0.3429764185991944,
8117
+ "eval_loss": 2.6278483867645264,
8118
+ "eval_runtime": 388.0807,
8119
+ "eval_samples_per_second": 24.423,
8120
+ "eval_steps_per_second": 3.053,
8121
+ "step": 50700
8122
  }
8123
  ],
8124
  "logging_steps": 100,
 
8126
  "num_input_tokens_seen": 0,
8127
  "num_train_epochs": 30,
8128
  "save_steps": 100,
8129
+ "total_flos": 5.5559876766241174e+20,
8130
  "train_batch_size": 8,
8131
  "trial_name": null,
8132
  "trial_params": null