ashanhr commited on
Commit
4ebbb64
·
verified ·
1 Parent(s): 77eae58

Training in progress, step 38100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:020f93a158168205cca3915538c4ed9109b5ffc2b96fdeb0f1c4d79a07042c55
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad0e4d057edfb8f7f24491b899369f7b93228a9e17e1354d5f644333b4e1576d
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3befc5d87f20164d7ddccdca6c81786e192784cb6ebe0faa3bf6a1782ef9e1bc
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfe756275a5388124bc2228feeaaf7ef82b935e701eb728a6a11413b046929db
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e3dc33eb709a480424a714d2444abb7f1cdabf9f95720d6a97f58d1827cb80e
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d38f9132e2085616601917cca2b9dcaa50f30da1f10838a4c93930581540ab0
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc3db78812f5b1281be37cba1a96865a0e99afcb4f7c40d8dd207953ac5fefc5
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f87a00bf8137824c3bb0753019893bcf791dd44d13ce34dc225bf7f0963a1806
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bbc911d1c134d3a4e026e82aa7a427672312a8867cae577cfbbeccb41bd9874b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbe3a461b22634f6c029dd4cb55ac1a5510596e6da69170fbde554202a3b4b05
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.870343085666175,
5
  "eval_steps": 100,
6
- "global_step": 37700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6039,6 +6039,70 @@
6039
  "eval_samples_per_second": 25.393,
6040
  "eval_steps_per_second": 3.175,
6041
  "step": 37700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6042
  }
6043
  ],
6044
  "logging_steps": 100,
@@ -6046,7 +6110,7 @@
6046
  "num_input_tokens_seen": 0,
6047
  "num_train_epochs": 30,
6048
  "save_steps": 100,
6049
- "total_flos": 4.1315834714403255e+20,
6050
  "train_batch_size": 8,
6051
  "trial_name": null,
6052
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 16.038728688697116,
5
  "eval_steps": 100,
6
+ "global_step": 38100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6039
  "eval_samples_per_second": 25.393,
6040
  "eval_steps_per_second": 3.175,
6041
  "step": 37700
6042
+ },
6043
+ {
6044
+ "epoch": 15.91,
6045
+ "grad_norm": 17.767349243164062,
6046
+ "learning_rate": 2.3652296819787986e-05,
6047
+ "loss": 0.8418,
6048
+ "step": 37800
6049
+ },
6050
+ {
6051
+ "epoch": 15.91,
6052
+ "eval_cer": 0.37160748504164876,
6053
+ "eval_loss": 1.7542308568954468,
6054
+ "eval_runtime": 389.6677,
6055
+ "eval_samples_per_second": 24.323,
6056
+ "eval_steps_per_second": 3.041,
6057
+ "step": 37800
6058
+ },
6059
+ {
6060
+ "epoch": 15.95,
6061
+ "grad_norm": 6.326321601867676,
6062
+ "learning_rate": 2.3581625441696114e-05,
6063
+ "loss": 0.8193,
6064
+ "step": 37900
6065
+ },
6066
+ {
6067
+ "epoch": 15.95,
6068
+ "eval_cer": 0.3725704900082124,
6069
+ "eval_loss": 2.059025764465332,
6070
+ "eval_runtime": 377.283,
6071
+ "eval_samples_per_second": 25.122,
6072
+ "eval_steps_per_second": 3.141,
6073
+ "step": 37900
6074
+ },
6075
+ {
6076
+ "epoch": 16.0,
6077
+ "grad_norm": 2.2139954566955566,
6078
+ "learning_rate": 2.351095406360424e-05,
6079
+ "loss": 0.8378,
6080
+ "step": 38000
6081
+ },
6082
+ {
6083
+ "epoch": 16.0,
6084
+ "eval_cer": 0.37425452661217784,
6085
+ "eval_loss": 1.9064280986785889,
6086
+ "eval_runtime": 411.9001,
6087
+ "eval_samples_per_second": 23.01,
6088
+ "eval_steps_per_second": 2.877,
6089
+ "step": 38000
6090
+ },
6091
+ {
6092
+ "epoch": 16.04,
6093
+ "grad_norm": 2.1273019313812256,
6094
+ "learning_rate": 2.3440282685512367e-05,
6095
+ "loss": 0.7478,
6096
+ "step": 38100
6097
+ },
6098
+ {
6099
+ "epoch": 16.04,
6100
+ "eval_cer": 0.3710159946814751,
6101
+ "eval_loss": 1.7977242469787598,
6102
+ "eval_runtime": 380.1262,
6103
+ "eval_samples_per_second": 24.934,
6104
+ "eval_steps_per_second": 3.117,
6105
+ "step": 38100
6106
  }
6107
  ],
6108
  "logging_steps": 100,
 
6110
  "num_input_tokens_seen": 0,
6111
  "num_train_epochs": 30,
6112
  "save_steps": 100,
6113
+ "total_flos": 4.175077130259511e+20,
6114
  "train_batch_size": 8,
6115
  "trial_name": null,
6116
  "trial_params": null