ashanhr commited on
Commit
02db9db
·
verified ·
1 Parent(s): caf039f

Training in progress, step 7100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57001483176bf82e1c09ea40d1f0147256cbd62333a182c6b5800e360309b8f2
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ffe4a4c3e4e20b6f8d7533a207499d54e18533a2ff9988b5cf140b2c17082c0
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a82744c0bc1c99fdf3736480a88beb791948657a4a6a512ff291360e62cfe4a
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50723aacfe1d09285d3d20c78e7a1e48064440bdd2388888b659a4f9def33f3c
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2700da8dca61e768af55482f0e459f8e8580186f45bdbf2c72ed3d4a47b60cc2
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0b9c0f6c20269a5797deab52d27abffe73bdbbaf470e35e43b903f7ebcadebf
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7efa4777d4c3271e154dec08b5438b3ff12cbb40bd34cd166043ded385b18863
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8a74dd20115cf8c84a288f3d48680cab469a7e69dc1d1632fbee207bbb0aa74
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c23abbcc2d4c9c22b104e91a5f11344337be1955d4805a3cb1067ad9e60d5245
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:233cc1a95fc61bdbfaf2f246e87bbffc2fb4b93155b895b23bcd4b806754f9d4
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.8625552515259947,
5
  "eval_steps": 100,
6
- "global_step": 6800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1095,6 +1095,54 @@
1095
  "eval_samples_per_second": 26.308,
1096
  "eval_steps_per_second": 3.289,
1097
  "step": 6800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1098
  }
1099
  ],
1100
  "logging_steps": 100,
@@ -1102,7 +1150,7 @@
1102
  "num_input_tokens_seen": 0,
1103
  "num_train_epochs": 30,
1104
  "save_steps": 100,
1105
- "total_flos": 7.446996872508875e+19,
1106
  "train_batch_size": 8,
1107
  "trial_name": null,
1108
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.9888444537992003,
5
  "eval_steps": 100,
6
+ "global_step": 7100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1095
  "eval_samples_per_second": 26.308,
1096
  "eval_steps_per_second": 3.289,
1097
  "step": 6800
1098
+ },
1099
+ {
1100
+ "epoch": 2.9,
1101
+ "grad_norm": 12.704540252685547,
1102
+ "learning_rate": 4.5483392226148416e-05,
1103
+ "loss": 2.6892,
1104
+ "step": 6900
1105
+ },
1106
+ {
1107
+ "epoch": 2.9,
1108
+ "eval_cer": 0.5306426225020531,
1109
+ "eval_loss": 2.7854502201080322,
1110
+ "eval_runtime": 372.5558,
1111
+ "eval_samples_per_second": 25.44,
1112
+ "eval_steps_per_second": 3.181,
1113
+ "step": 6900
1114
+ },
1115
+ {
1116
+ "epoch": 2.95,
1117
+ "grad_norm": 3.1302738189697266,
1118
+ "learning_rate": 4.541272084805654e-05,
1119
+ "loss": 3.1467,
1120
+ "step": 7000
1121
+ },
1122
+ {
1123
+ "epoch": 2.95,
1124
+ "eval_cer": 0.5148948027061906,
1125
+ "eval_loss": 2.9890220165252686,
1126
+ "eval_runtime": 359.2287,
1127
+ "eval_samples_per_second": 26.384,
1128
+ "eval_steps_per_second": 3.299,
1129
+ "step": 7000
1130
+ },
1131
+ {
1132
+ "epoch": 2.99,
1133
+ "grad_norm": 1.750848412513733,
1134
+ "learning_rate": 4.5342049469964665e-05,
1135
+ "loss": 2.1825,
1136
+ "step": 7100
1137
+ },
1138
+ {
1139
+ "epoch": 2.99,
1140
+ "eval_cer": 0.5565606546478432,
1141
+ "eval_loss": 3.195390224456787,
1142
+ "eval_runtime": 366.6828,
1143
+ "eval_samples_per_second": 25.848,
1144
+ "eval_steps_per_second": 3.232,
1145
+ "step": 7100
1146
  }
1147
  ],
1148
  "logging_steps": 100,
 
1150
  "num_input_tokens_seen": 0,
1151
  "num_train_epochs": 30,
1152
  "save_steps": 100,
1153
+ "total_flos": 7.774285790233235e+19,
1154
  "train_batch_size": 8,
1155
  "trial_name": null,
1156
  "trial_params": null