ashanhr commited on
Commit
0e0f861
1 Parent(s): 7f68b15

Training in progress, step 19400, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f044f8baed15a299bbbeca19e6da1b66abd02e56213b73dd0cd66de71200f6d7
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39536b07857e96674098336d821ff35749a8ecafb4e064cb918e9fbf3d7ba9c4
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b0931f5bdd0a0ad3c0b44f18bac8516747577559b5999e424f7825b4b498e1d
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d85220a081ebf7ba5f24dcf36d455607c800e07075731fc0438ed7d721838b8
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7eea6c010049ede732e35337dc92f9c4a9f201fd86647564100ea8b70da22657
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2feee5f579bc8ff01696c6cde6f4789c3b844b14e975ef180da8d549e0fe33b
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2a35b249b98686868e501117a28d1ffb6fed2a0d74e305b00d878f548181e83
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b560c73f8c11664695f9a90d9d927dbe3811353bb8caf7abe8872e3b37b7585
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49b241842c803d56347030233a5319efc06cd25ba775995df97e083f47b84391
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ee31d8b929c9d8d36d66b3500c5c4c93de524b8757148ff9ad30057ceaeb53d
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.082508945485161,
5
  "eval_steps": 100,
6
- "global_step": 19200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3079,6 +3079,38 @@
3079
  "eval_samples_per_second": 26.205,
3080
  "eval_steps_per_second": 3.276,
3081
  "step": 19200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3082
  }
3083
  ],
3084
  "logging_steps": 100,
@@ -3086,7 +3118,7 @@
3086
  "num_input_tokens_seen": 0,
3087
  "num_train_epochs": 30,
3088
  "save_steps": 100,
3089
- "total_flos": 2.1042862694965802e+20,
3090
  "train_batch_size": 8,
3091
  "trial_name": null,
3092
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.166701747000632,
5
  "eval_steps": 100,
6
+ "global_step": 19400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3079
  "eval_samples_per_second": 26.205,
3080
  "eval_steps_per_second": 3.276,
3081
  "step": 19200
3082
+ },
3083
+ {
3084
+ "epoch": 8.12,
3085
+ "grad_norm": 2.6323907375335693,
3086
+ "learning_rate": 3.672226148409894e-05,
3087
+ "loss": 1.2874,
3088
+ "step": 19300
3089
+ },
3090
+ {
3091
+ "epoch": 8.12,
3092
+ "eval_cer": 0.4615849008642603,
3093
+ "eval_loss": 1.754168152809143,
3094
+ "eval_runtime": 385.1298,
3095
+ "eval_samples_per_second": 24.61,
3096
+ "eval_steps_per_second": 3.077,
3097
+ "step": 19300
3098
+ },
3099
+ {
3100
+ "epoch": 8.17,
3101
+ "grad_norm": 2.5475683212280273,
3102
+ "learning_rate": 3.665159010600707e-05,
3103
+ "loss": 1.5227,
3104
+ "step": 19400
3105
+ },
3106
+ {
3107
+ "epoch": 8.17,
3108
+ "eval_cer": 0.45406906260998786,
3109
+ "eval_loss": 1.954167127609253,
3110
+ "eval_runtime": 364.6983,
3111
+ "eval_samples_per_second": 25.989,
3112
+ "eval_steps_per_second": 3.249,
3113
+ "step": 19400
3114
  }
3115
  ],
3116
  "logging_steps": 100,
 
3118
  "num_input_tokens_seen": 0,
3119
  "num_train_epochs": 30,
3120
  "save_steps": 100,
3121
+ "total_flos": 2.1255543863355028e+20,
3122
  "train_batch_size": 8,
3123
  "trial_name": null,
3124
  "trial_params": null