ashanhr commited on
Commit
2e9a543
·
verified ·
1 Parent(s): cb80c71

Training in progress, step 30900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59250fd9c1a5ce2ea95e8374f4e0a25cf570f1b430322763a33c452a78721126
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a48a3ca955f4c3490521ffa16267bda052fbae1914e6e361907a2828816f0c77
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ee966e20348f3305f52c11ce787ab1d112ba834727e878f4dd30e8df8e37f46
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:982196fc29592e305eafca95afb014310ab8940220bbbb1a5a33250f58ddf315
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79caf774c5464f9f5b97517a7e1be66aa2f2a138193b94d0825747e7e97452ce
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3a59837b584d61da7f6862e8ac72f4bc9bb2b3b48e0a427fa8da7ba5143bf40
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37a3ee2bb07ef33e190c44e2b218c5390d462a8018bd3100170be9a809a772b2
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ea975816aac3d40a8b3d99eca8a891c93c967ac80c950988bae3dd44c17f8cc
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b7c7f8d94d094e5495ec4b02a0a6e224c7e7a8db7f263d195945f5e974673dc
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35cbb8ba5802e44e1ce90c535d286211f64f07ac10efce360c20ea86aa5c0fe7
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 12.92359503262471,
5
  "eval_steps": 100,
6
- "global_step": 30700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4919,6 +4919,38 @@
4919
  "eval_samples_per_second": 25.309,
4920
  "eval_steps_per_second": 3.164,
4921
  "step": 30700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4922
  }
4923
  ],
4924
  "logging_steps": 100,
@@ -4926,7 +4958,7 @@
4926
  "num_input_tokens_seen": 0,
4927
  "num_train_epochs": 30,
4928
  "save_steps": 100,
4929
- "total_flos": 3.363836920139934e+20,
4930
  "train_batch_size": 8,
4931
  "trial_name": null,
4932
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 13.007787834140181,
5
  "eval_steps": 100,
6
+ "global_step": 30900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4919
  "eval_samples_per_second": 25.309,
4920
  "eval_steps_per_second": 3.164,
4921
  "step": 30700
4922
+ },
4923
+ {
4924
+ "epoch": 12.97,
4925
+ "grad_norm": 2.6603596210479736,
4926
+ "learning_rate": 2.8598586572438163e-05,
4927
+ "loss": 1.0564,
4928
+ "step": 30800
4929
+ },
4930
+ {
4931
+ "epoch": 12.97,
4932
+ "eval_cer": 0.40476760783700283,
4933
+ "eval_loss": 2.143598794937134,
4934
+ "eval_runtime": 399.5314,
4935
+ "eval_samples_per_second": 23.723,
4936
+ "eval_steps_per_second": 2.966,
4937
+ "step": 30800
4938
+ },
4939
+ {
4940
+ "epoch": 13.01,
4941
+ "grad_norm": 2.8563497066497803,
4942
+ "learning_rate": 2.8527915194346288e-05,
4943
+ "loss": 1.0411,
4944
+ "step": 30900
4945
+ },
4946
+ {
4947
+ "epoch": 13.01,
4948
+ "eval_cer": 0.40590170505650935,
4949
+ "eval_loss": 2.6135616302490234,
4950
+ "eval_runtime": 370.2003,
4951
+ "eval_samples_per_second": 25.602,
4952
+ "eval_steps_per_second": 3.201,
4953
+ "step": 30900
4954
  }
4955
  ],
4956
  "logging_steps": 100,
 
4958
  "num_input_tokens_seen": 0,
4959
  "num_train_epochs": 30,
4960
  "save_steps": 100,
4961
+ "total_flos": 3.386567819260411e+20,
4962
  "train_batch_size": 8,
4963
  "trial_name": null,
4964
  "trial_params": null