ashanhr commited on
Commit
e7dbcc5
1 Parent(s): a04036f

Training in progress, step 19200, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b96997d86855d69f2ad5126d93fcaa54d3bb37195a13dc04ebef709b51c988a
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f044f8baed15a299bbbeca19e6da1b66abd02e56213b73dd0cd66de71200f6d7
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dcc8a135a59102ea5c9d0c77b48315f089af1d4a5eded0462368e4cf5d7d152
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b0931f5bdd0a0ad3c0b44f18bac8516747577559b5999e424f7825b4b498e1d
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3771dca6c0b151af7b4eb4d18b275f53f8d0c4b7179fb5a0fd76724d6b7cf4c1
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7eea6c010049ede732e35337dc92f9c4a9f201fd86647564100ea8b70da22657
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0328bf636a13aba797b6ceefa1ee4d2a305b1300590ae78b037aaff65884a506
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2a35b249b98686868e501117a28d1ffb6fed2a0d74e305b00d878f548181e83
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c89953ed9353ba4a200c5e4beeaf9f9e987021f05c25ac9cfcda9cf4c598560
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b241842c803d56347030233a5319efc06cd25ba775995df97e083f47b84391
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.99831614396969,
5
  "eval_steps": 100,
6
- "global_step": 19000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3047,6 +3047,38 @@
3047
  "eval_samples_per_second": 26.063,
3048
  "eval_steps_per_second": 3.259,
3049
  "step": 19000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3050
  }
3051
  ],
3052
  "logging_steps": 100,
@@ -3054,7 +3086,7 @@
3054
  "num_input_tokens_seen": 0,
3055
  "num_train_epochs": 30,
3056
  "save_steps": 100,
3057
- "total_flos": 2.0812889749495287e+20,
3058
  "train_batch_size": 8,
3059
  "trial_name": null,
3060
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.082508945485161,
5
  "eval_steps": 100,
6
+ "global_step": 19200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3047
  "eval_samples_per_second": 26.063,
3048
  "eval_steps_per_second": 3.259,
3049
  "step": 19000
3050
+ },
3051
+ {
3052
+ "epoch": 8.04,
3053
+ "grad_norm": 3.4124321937561035,
3054
+ "learning_rate": 3.686360424028269e-05,
3055
+ "loss": 1.2656,
3056
+ "step": 19100
3057
+ },
3058
+ {
3059
+ "epoch": 8.04,
3060
+ "eval_cer": 0.4476115521489187,
3061
+ "eval_loss": 2.0440304279327393,
3062
+ "eval_runtime": 381.9207,
3063
+ "eval_samples_per_second": 24.817,
3064
+ "eval_steps_per_second": 3.103,
3065
+ "step": 19100
3066
+ },
3067
+ {
3068
+ "epoch": 8.08,
3069
+ "grad_norm": 4.451693058013916,
3070
+ "learning_rate": 3.679293286219082e-05,
3071
+ "loss": 1.2961,
3072
+ "step": 19200
3073
+ },
3074
+ {
3075
+ "epoch": 8.08,
3076
+ "eval_cer": 0.44471520472410153,
3077
+ "eval_loss": 1.5803910493850708,
3078
+ "eval_runtime": 361.691,
3079
+ "eval_samples_per_second": 26.205,
3080
+ "eval_steps_per_second": 3.276,
3081
+ "step": 19200
3082
  }
3083
  ],
3084
  "logging_steps": 100,
 
3086
  "num_input_tokens_seen": 0,
3087
  "num_train_epochs": 30,
3088
  "save_steps": 100,
3089
+ "total_flos": 2.1042862694965802e+20,
3090
  "train_batch_size": 8,
3091
  "trial_name": null,
3092
  "trial_params": null