ashanhr commited on
Commit
79c64e2
·
verified ·
1 Parent(s): 0eef585

Training in progress, step 44600, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d06ff0f6944c977b24ffca97320d1e1b8f33f7982c7c70a085be08e7a0735b3
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ae988bcf1e80d64fc3c50daaef60bc1bad12fb87f548e09dc40aad3485bc941
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:550419b8cb66535f835e3a3f14342bc79d424335d3d639b4412a2a1dd5996b7f
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d962bf48fdb22f97a263b871d805534e31f88c562fea1fd3091e25ec9fe6fca5
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01ce359e18f7a90dc84095a6557aa54494e079fb7b5316a22a25e176669fe27f
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4f7df543c28f7b1d4e237537e55af3a1c5c2f2ab50783f01bbf2aff73580c05
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6d93266a87661049b9532cd402055d41d22bd07173c685af733fe4ed43cd48d2
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:341280b36e33bee4b43d726478793b13fc021522a0fb4ba6c0b4344e30e91f0e
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8a643c16cd3c86f0ac540d3ef1ce9abf102e6e8b34c059bbc0149468ac8d90c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e165bf74ae366b7834cfc96ebc1c64c1b63d8ca28272d2793ae0769f087d7fdf
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 18.690801936434436,
5
  "eval_steps": 100,
6
- "global_step": 44400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7111,6 +7111,38 @@
7111
  "eval_samples_per_second": 24.603,
7112
  "eval_steps_per_second": 3.076,
7113
  "step": 44400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7114
  }
7115
  ],
7116
  "logging_steps": 100,
@@ -7118,7 +7150,7 @@
7118
  "num_input_tokens_seen": 0,
7119
  "num_train_epochs": 30,
7120
  "save_steps": 100,
7121
- "total_flos": 4.86506326346161e+20,
7122
  "train_batch_size": 8,
7123
  "trial_name": null,
7124
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 18.774994737949907,
5
  "eval_steps": 100,
6
+ "global_step": 44600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7111
  "eval_samples_per_second": 24.603,
7112
  "eval_steps_per_second": 3.076,
7113
  "step": 44400
7114
+ },
7115
+ {
7116
+ "epoch": 18.73,
7117
+ "grad_norm": 2.587009906768799,
7118
+ "learning_rate": 1.8919434628975266e-05,
7119
+ "loss": 0.5442,
7120
+ "step": 44500
7121
+ },
7122
+ {
7123
+ "epoch": 18.73,
7124
+ "eval_cer": 0.35546126471393374,
7125
+ "eval_loss": 2.185746192932129,
7126
+ "eval_runtime": 399.8878,
7127
+ "eval_samples_per_second": 23.702,
7128
+ "eval_steps_per_second": 2.963,
7129
+ "step": 44500
7130
+ },
7131
+ {
7132
+ "epoch": 18.77,
7133
+ "grad_norm": 2.142084836959839,
7134
+ "learning_rate": 1.8849469964664314e-05,
7135
+ "loss": 0.9587,
7136
+ "step": 44600
7137
+ },
7138
+ {
7139
+ "epoch": 18.77,
7140
+ "eval_cer": 0.353083082398029,
7141
+ "eval_loss": 2.363218307495117,
7142
+ "eval_runtime": 384.8343,
7143
+ "eval_samples_per_second": 24.629,
7144
+ "eval_steps_per_second": 3.079,
7145
+ "step": 44600
7146
  }
7147
  ],
7148
  "logging_steps": 100,
 
7150
  "num_input_tokens_seen": 0,
7151
  "num_train_epochs": 30,
7152
  "save_steps": 100,
7153
+ "total_flos": 4.8869923018175506e+20,
7154
  "train_batch_size": 8,
7155
  "trial_name": null,
7156
  "trial_params": null