ashanhr commited on
Commit
e08ab68
1 Parent(s): 70c4cb7

Training in progress, step 6800, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b74a50c7b619bf06f6a6e3a27295d976c0da1723ca909e9d96edf34f99bd5752
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57001483176bf82e1c09ea40d1f0147256cbd62333a182c6b5800e360309b8f2
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b18854dd89f7c1b84225587e7c70199be6b69655adc6061c07aecd3fb7da56e6
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a82744c0bc1c99fdf3736480a88beb791948657a4a6a512ff291360e62cfe4a
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61de57f4d7edcb73072837711b87eca53e2d63334776546c815e373def03ba09
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2700da8dca61e768af55482f0e459f8e8580186f45bdbf2c72ed3d4a47b60cc2
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d9f89b3a69be109ef15d21941c1f3bbf54a5f79308369a791688ab91e4003c4c
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7efa4777d4c3271e154dec08b5438b3ff12cbb40bd34cd166043ded385b18863
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e81bd9b3dea7c3d81bdbaecf9908b3ceba5c03eb60cdd3bff38e8bd5cc6b02c0
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c23abbcc2d4c9c22b104e91a5f11344337be1955d4805a3cb1067ad9e60d5245
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.778362450010524,
5
  "eval_steps": 100,
6
- "global_step": 6600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1063,6 +1063,38 @@
1063
  "eval_samples_per_second": 26.399,
1064
  "eval_steps_per_second": 3.301,
1065
  "step": 6600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1066
  }
1067
  ],
1068
  "logging_steps": 100,
@@ -1070,7 +1102,7 @@
1070
  "num_input_tokens_seen": 0,
1071
  "num_train_epochs": 30,
1072
  "save_steps": 100,
1073
- "total_flos": 7.230691505769557e+19,
1074
  "train_batch_size": 8,
1075
  "trial_name": null,
1076
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.8625552515259947,
5
  "eval_steps": 100,
6
+ "global_step": 6800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1063
  "eval_samples_per_second": 26.399,
1064
  "eval_steps_per_second": 3.301,
1065
  "step": 6600
1066
+ },
1067
+ {
1068
+ "epoch": 2.82,
1069
+ "grad_norm": 23.777395248413086,
1070
+ "learning_rate": 4.562473498233216e-05,
1071
+ "loss": 1.5294,
1072
+ "step": 6700
1073
+ },
1074
+ {
1075
+ "epoch": 2.82,
1076
+ "eval_cer": 0.5097864768683275,
1077
+ "eval_loss": 2.758403778076172,
1078
+ "eval_runtime": 375.8984,
1079
+ "eval_samples_per_second": 25.214,
1080
+ "eval_steps_per_second": 3.152,
1081
+ "step": 6700
1082
+ },
1083
+ {
1084
+ "epoch": 2.86,
1085
+ "grad_norm": 5.520616054534912,
1086
+ "learning_rate": 4.555406360424028e-05,
1087
+ "loss": 1.6177,
1088
+ "step": 6800
1089
+ },
1090
+ {
1091
+ "epoch": 2.86,
1092
+ "eval_cer": 0.5417905048687967,
1093
+ "eval_loss": 2.8678946495056152,
1094
+ "eval_runtime": 360.269,
1095
+ "eval_samples_per_second": 26.308,
1096
+ "eval_steps_per_second": 3.289,
1097
+ "step": 6800
1098
  }
1099
  ],
1100
  "logging_steps": 100,
 
1102
  "num_input_tokens_seen": 0,
1103
  "num_train_epochs": 30,
1104
  "save_steps": 100,
1105
+ "total_flos": 7.446996872508875e+19,
1106
  "train_batch_size": 8,
1107
  "trial_name": null,
1108
  "trial_params": null