bobox commited on
Commit
35da924
·
verified ·
1 Parent(s): 2056c9c

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -555,6 +555,10 @@ You can finetune this model on your own dataset.
555
  | 1.4957 | 696 | 2.7459 | 1.2780 | 2.7294 |
556
  | 1.7457 | 812 | 2.8721 | 0.9296 | 2.2870 |
557
  | 1.9957 | 928 | 2.5066 | 0.6388 | 2.0548 |
 
 
 
 
558
 
559
 
560
  ### Framework Versions
 
555
  | 1.4957 | 696 | 2.7459 | 1.2780 | 2.7294 |
556
  | 1.7457 | 812 | 2.8721 | 0.9296 | 2.2870 |
557
  | 1.9957 | 928 | 2.5066 | 0.6388 | 2.0548 |
558
+ | 2.2414 | 1044 | 2.3223 | 0.5312 | 1.8876 |
559
+ | 2.4914 | 1160 | 2.1771 | 0.4300 | 1.7922 |
560
+ | 2.7414 | 1276 | 2.2549 | 0.3610 | 1.6473 |
561
+ | 2.9914 | 1392 | 2.2168 | 0.2929 | 1.5590 |
562
 
563
 
564
  ### Framework Versions
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ce17548dc0abcf43ff57926fe223e86ff953c3d04d1b482e8a25b8f6b14c059
3
  size 1130520122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecb04608c64b22d6262681835d6c550177e812632d19e8bed563d46fd84e0e69
3
  size 1130520122
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f543b7b25e08dea4691b71c82b05f600955db6b8e954eade9d601f8b183156f1
3
  size 565251810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3ad09b012913d64a2d27765f2b0342e4a0f1edd69a6c8ffd7a83dcee57b952e
3
  size 565251810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d13ecf4cf92281b9f41455432b2500dd25c260a5d0ae61639997c1a731a76a0
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89d0bf755340af85865c74eb993cae626d4eb319120da8fd389f3930be54bbea
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7a1b39f4627d13e5c5634857f196a756ce6ec36192b7bfb79cff9c42c9aa243
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e270a9661cb837eaec0b7e50a864e82bdf74fee38281b90e4447c6c11c7af62d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0043103448275863,
5
  "eval_steps": 116,
6
- "global_step": 932,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -191,6 +191,98 @@
191
  "eval_qnli-contrastive_samples_per_second": 1370.032,
192
  "eval_qnli-contrastive_steps_per_second": 85.627,
193
  "step": 928
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  }
195
  ],
196
  "logging_steps": 116,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0043103448275863,
5
  "eval_steps": 116,
6
+ "global_step": 1398,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
191
  "eval_qnli-contrastive_samples_per_second": 1370.032,
192
  "eval_qnli-contrastive_steps_per_second": 85.627,
193
  "step": 928
194
+ },
195
+ {
196
+ "epoch": 2.2413793103448274,
197
+ "grad_norm": 12.477553367614746,
198
+ "learning_rate": 2.7788810181030676e-06,
199
+ "loss": 2.3223,
200
+ "step": 1044
201
+ },
202
+ {
203
+ "epoch": 2.2413793103448274,
204
+ "eval_nli-pairs_loss": 1.8876054286956787,
205
+ "eval_nli-pairs_runtime": 1.4105,
206
+ "eval_nli-pairs_samples_per_second": 1417.897,
207
+ "eval_nli-pairs_steps_per_second": 88.619,
208
+ "step": 1044
209
+ },
210
+ {
211
+ "epoch": 2.2413793103448274,
212
+ "eval_qnli-contrastive_loss": 0.5312397480010986,
213
+ "eval_qnli-contrastive_runtime": 1.4798,
214
+ "eval_qnli-contrastive_samples_per_second": 1351.505,
215
+ "eval_qnli-contrastive_steps_per_second": 84.469,
216
+ "step": 1044
217
+ },
218
+ {
219
+ "epoch": 2.4913793103448274,
220
+ "grad_norm": 7.06378173828125,
221
+ "learning_rate": 2.5617317540023054e-06,
222
+ "loss": 2.1771,
223
+ "step": 1160
224
+ },
225
+ {
226
+ "epoch": 2.4913793103448274,
227
+ "eval_nli-pairs_loss": 1.7922124862670898,
228
+ "eval_nli-pairs_runtime": 1.392,
229
+ "eval_nli-pairs_samples_per_second": 1436.768,
230
+ "eval_nli-pairs_steps_per_second": 89.798,
231
+ "step": 1160
232
+ },
233
+ {
234
+ "epoch": 2.4913793103448274,
235
+ "eval_qnli-contrastive_loss": 0.4299691915512085,
236
+ "eval_qnli-contrastive_runtime": 1.4683,
237
+ "eval_qnli-contrastive_samples_per_second": 1362.111,
238
+ "eval_qnli-contrastive_steps_per_second": 85.132,
239
+ "step": 1160
240
+ },
241
+ {
242
+ "epoch": 2.7413793103448274,
243
+ "grad_norm": 11.377643585205078,
244
+ "learning_rate": 2.286460925335848e-06,
245
+ "loss": 2.2549,
246
+ "step": 1276
247
+ },
248
+ {
249
+ "epoch": 2.7413793103448274,
250
+ "eval_nli-pairs_loss": 1.647322177886963,
251
+ "eval_nli-pairs_runtime": 1.3347,
252
+ "eval_nli-pairs_samples_per_second": 1498.487,
253
+ "eval_nli-pairs_steps_per_second": 93.655,
254
+ "step": 1276
255
+ },
256
+ {
257
+ "epoch": 2.7413793103448274,
258
+ "eval_qnli-contrastive_loss": 0.36095327138900757,
259
+ "eval_qnli-contrastive_runtime": 1.5309,
260
+ "eval_qnli-contrastive_samples_per_second": 1306.387,
261
+ "eval_qnli-contrastive_steps_per_second": 81.649,
262
+ "step": 1276
263
+ },
264
+ {
265
+ "epoch": 2.9913793103448274,
266
+ "grad_norm": 8.12272834777832,
267
+ "learning_rate": 1.968137471297685e-06,
268
+ "loss": 2.2168,
269
+ "step": 1392
270
+ },
271
+ {
272
+ "epoch": 2.9913793103448274,
273
+ "eval_nli-pairs_loss": 1.5589631795883179,
274
+ "eval_nli-pairs_runtime": 1.2874,
275
+ "eval_nli-pairs_samples_per_second": 1553.463,
276
+ "eval_nli-pairs_steps_per_second": 97.091,
277
+ "step": 1392
278
+ },
279
+ {
280
+ "epoch": 2.9913793103448274,
281
+ "eval_qnli-contrastive_loss": 0.2929060459136963,
282
+ "eval_qnli-contrastive_runtime": 1.4489,
283
+ "eval_qnli-contrastive_samples_per_second": 1380.312,
284
+ "eval_qnli-contrastive_steps_per_second": 86.269,
285
+ "step": 1392
286
  }
287
  ],
288
  "logging_steps": 116,