bobox commited on
Commit
d6796ed
·
verified ·
1 Parent(s): 895c6b1

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -538,18 +538,28 @@ You can finetune this model on your own dataset.
538
  </details>
539
 
540
  ### Training Logs
541
- | Epoch | Step | Training Loss | qnli-contrastive loss | nli-pairs loss |
542
- |:------:|:----:|:-------------:|:---------------------:|:--------------:|
543
- | None | 0 | - | 20.1737 | 4.0959 |
544
- | 0.1001 | 734 | 4.796 | - | - |
545
- | 0.2001 | 1468 | 1.3015 | 0.0358 | 0.9115 |
546
- | 0.3002 | 2202 | 0.89 | - | - |
547
- | 0.4002 | 2936 | 0.716 | 0.0168 | 0.5944 |
548
- | 0.5003 | 3670 | 0.6365 | - | - |
549
- | 0.6003 | 4404 | 0.5883 | 0.0164 | 0.4975 |
550
- | 0.7004 | 5138 | 0.5192 | - | - |
551
- | 0.8004 | 5872 | 0.4961 | 0.0288 | 0.4450 |
552
- | 0.9005 | 6606 | 0.6035 | - | - |
 
 
 
 
 
 
 
 
 
 
553
 
554
 
555
  ### Framework Versions
 
538
  </details>
539
 
540
  ### Training Logs
541
+ | Epoch | Step | Training Loss | qnli-contrastive loss | nli-pairs loss |
542
+ |:------:|:-----:|:-------------:|:---------------------:|:--------------:|
543
+ | None | 0 | - | 20.1737 | 4.0959 |
544
+ | 0.1001 | 734 | 4.796 | - | - |
545
+ | 0.2001 | 1468 | 1.3015 | 0.0358 | 0.9115 |
546
+ | 0.3002 | 2202 | 0.89 | - | - |
547
+ | 0.4002 | 2936 | 0.716 | 0.0168 | 0.5944 |
548
+ | 0.5003 | 3670 | 0.6365 | - | - |
549
+ | 0.6003 | 4404 | 0.5883 | 0.0164 | 0.4975 |
550
+ | 0.7004 | 5138 | 0.5192 | - | - |
551
+ | 0.8004 | 5872 | 0.4961 | 0.0288 | 0.4450 |
552
+ | 0.9005 | 6606 | 0.6035 | - | - |
553
+ | 1.0005 | 7340 | 0.4733 | 0.0110 | 0.4215 |
554
+ | 1.1006 | 8074 | 0.4002 | - | - |
555
+ | 1.2007 | 8808 | 0.3929 | 0.0454 | 0.3796 |
556
+ | 1.3007 | 9542 | 0.3826 | - | - |
557
+ | 1.4008 | 10276 | 0.3522 | 0.0178 | 0.3714 |
558
+ | 1.5008 | 11010 | 0.3627 | - | - |
559
+ | 1.6009 | 11744 | 0.3553 | 0.0257 | 0.3629 |
560
+ | 1.7009 | 12478 | 0.3406 | - | - |
561
+ | 1.8010 | 13212 | 0.3288 | 0.0289 | 0.3575 |
562
+ | 1.9010 | 13946 | 0.4563 | - | - |
563
 
564
 
565
  ### Framework Versions
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a749be1ff609ad2bded40d5b2fb1132d3d648b50ef1b7246d14619faa8c58f8
3
  size 1130520122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:197eac8569a220596590c77575fe0e465e399356472710a19ab8e45585b2ae6e
3
  size 1130520122
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b7a78caf0b7de88dddf8c331c22dc8a0c8a8173693518132a3c2bc00703c2dc
3
  size 565251810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6b8f13689928c18ece3856866a5eabeea8661d8106e9cb8141da0943dbf28da
3
  size 565251810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdc73eeb20f0bc899b26bfb3842397bedfaaf0599682feea9dcc50aa3a5f6766
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd3ebef39758a414d139f600ddff7baf9e47748f2ea2a2338f7aed637daeb655
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c524663492ab13642dabc57fe5921f5cabb07eb2dedd76a5d83a640195afeb24
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4125f2e2f1951242b702b885a9e93bcaa6d5f26d6f59d55fecae3d75e9474f60
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 1468,
6
- "global_step": 7336,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -134,6 +134,156 @@
134
  "learning_rate": 1.8462745233342613e-05,
135
  "loss": 0.6035,
136
  "step": 6606
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  }
138
  ],
139
  "logging_steps": 734,
@@ -148,7 +298,7 @@
148
  "should_evaluate": false,
149
  "should_log": false,
150
  "should_save": true,
151
- "should_training_stop": false
152
  },
153
  "attributes": {}
154
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 1468,
6
+ "global_step": 14672,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
134
  "learning_rate": 1.8462745233342613e-05,
135
  "loss": 0.6035,
136
  "step": 6606
137
+ },
138
+ {
139
+ "epoch": 1.000545256270447,
140
+ "grad_norm": 3.743481397628784,
141
+ "learning_rate": 1.699267443860664e-05,
142
+ "loss": 0.4733,
143
+ "step": 7340
144
+ },
145
+ {
146
+ "epoch": 1.000545256270447,
147
+ "eval_nli-pairs_loss": 0.4215342402458191,
148
+ "eval_nli-pairs_runtime": 3.6783,
149
+ "eval_nli-pairs_samples_per_second": 1850.875,
150
+ "eval_nli-pairs_steps_per_second": 57.908,
151
+ "step": 7340
152
+ },
153
+ {
154
+ "epoch": 1.000545256270447,
155
+ "eval_qnli-contrastive_loss": 0.01100869383662939,
156
+ "eval_qnli-contrastive_runtime": 3.639,
157
+ "eval_qnli-contrastive_samples_per_second": 1501.242,
158
+ "eval_qnli-contrastive_steps_per_second": 46.991,
159
+ "step": 7340
160
+ },
161
+ {
162
+ "epoch": 1.1005997818974917,
163
+ "grad_norm": 0.39953914284706116,
164
+ "learning_rate": 1.513957108680355e-05,
165
+ "loss": 0.4002,
166
+ "step": 8074
167
+ },
168
+ {
169
+ "epoch": 1.2006543075245366,
170
+ "grad_norm": 2.542104482650757,
171
+ "learning_rate": 1.3004941249978107e-05,
172
+ "loss": 0.3929,
173
+ "step": 8808
174
+ },
175
+ {
176
+ "epoch": 1.2006543075245366,
177
+ "eval_nli-pairs_loss": 0.37960606813430786,
178
+ "eval_nli-pairs_runtime": 3.5792,
179
+ "eval_nli-pairs_samples_per_second": 1902.102,
180
+ "eval_nli-pairs_steps_per_second": 59.511,
181
+ "step": 8808
182
+ },
183
+ {
184
+ "epoch": 1.2006543075245366,
185
+ "eval_qnli-contrastive_loss": 0.04537490755319595,
186
+ "eval_qnli-contrastive_runtime": 3.371,
187
+ "eval_qnli-contrastive_samples_per_second": 1620.568,
188
+ "eval_qnli-contrastive_steps_per_second": 50.726,
189
+ "step": 8808
190
+ },
191
+ {
192
+ "epoch": 1.3007088331515813,
193
+ "grad_norm": 2.3156607151031494,
194
+ "learning_rate": 1.0705711968273469e-05,
195
+ "loss": 0.3826,
196
+ "step": 9542
197
+ },
198
+ {
199
+ "epoch": 1.400763358778626,
200
+ "grad_norm": 3.3540971279144287,
201
+ "learning_rate": 8.370979573663896e-06,
202
+ "loss": 0.3522,
203
+ "step": 10276
204
+ },
205
+ {
206
+ "epoch": 1.400763358778626,
207
+ "eval_nli-pairs_loss": 0.3714284896850586,
208
+ "eval_nli-pairs_runtime": 3.5826,
209
+ "eval_nli-pairs_samples_per_second": 1900.32,
210
+ "eval_nli-pairs_steps_per_second": 59.455,
211
+ "step": 10276
212
+ },
213
+ {
214
+ "epoch": 1.400763358778626,
215
+ "eval_qnli-contrastive_loss": 0.017819516360759735,
216
+ "eval_qnli-contrastive_runtime": 3.4236,
217
+ "eval_qnli-contrastive_samples_per_second": 1595.701,
218
+ "eval_qnli-contrastive_steps_per_second": 49.948,
219
+ "step": 10276
220
+ },
221
+ {
222
+ "epoch": 1.5008178844056705,
223
+ "grad_norm": 1.3052864074707031,
224
+ "learning_rate": 6.125236966193413e-06,
225
+ "loss": 0.3627,
226
+ "step": 11010
227
+ },
228
+ {
229
+ "epoch": 1.6008724100327154,
230
+ "grad_norm": 1.00529944896698,
231
+ "learning_rate": 4.088586072137575e-06,
232
+ "loss": 0.3553,
233
+ "step": 11744
234
+ },
235
+ {
236
+ "epoch": 1.6008724100327154,
237
+ "eval_nli-pairs_loss": 0.3628700077533722,
238
+ "eval_nli-pairs_runtime": 3.7016,
239
+ "eval_nli-pairs_samples_per_second": 1839.184,
240
+ "eval_nli-pairs_steps_per_second": 57.542,
241
+ "step": 11744
242
+ },
243
+ {
244
+ "epoch": 1.6008724100327154,
245
+ "eval_qnli-contrastive_loss": 0.025695964694023132,
246
+ "eval_qnli-contrastive_runtime": 3.365,
247
+ "eval_qnli-contrastive_samples_per_second": 1623.495,
248
+ "eval_qnli-contrastive_steps_per_second": 50.818,
249
+ "step": 11744
250
+ },
251
+ {
252
+ "epoch": 1.70092693565976,
253
+ "grad_norm": 6.129855632781982,
254
+ "learning_rate": 2.375740327678049e-06,
255
+ "loss": 0.3406,
256
+ "step": 12478
257
+ },
258
+ {
259
+ "epoch": 1.800981461286805,
260
+ "grad_norm": 0.2667868733406067,
261
+ "learning_rate": 1.0819684733779468e-06,
262
+ "loss": 0.3288,
263
+ "step": 13212
264
+ },
265
+ {
266
+ "epoch": 1.800981461286805,
267
+ "eval_nli-pairs_loss": 0.3574618101119995,
268
+ "eval_nli-pairs_runtime": 3.5437,
269
+ "eval_nli-pairs_samples_per_second": 1921.132,
270
+ "eval_nli-pairs_steps_per_second": 60.106,
271
+ "step": 13212
272
+ },
273
+ {
274
+ "epoch": 1.800981461286805,
275
+ "eval_qnli-contrastive_loss": 0.028892073780298233,
276
+ "eval_qnli-contrastive_runtime": 3.3304,
277
+ "eval_qnli-contrastive_samples_per_second": 1640.352,
278
+ "eval_qnli-contrastive_steps_per_second": 51.345,
279
+ "step": 13212
280
+ },
281
+ {
282
+ "epoch": 1.9010359869138496,
283
+ "grad_norm": 5.174046039581299,
284
+ "learning_rate": 2.746246447818135e-07,
285
+ "loss": 0.4563,
286
+ "step": 13946
287
  }
288
  ],
289
  "logging_steps": 734,
 
298
  "should_evaluate": false,
299
  "should_log": false,
300
  "should_save": true,
301
+ "should_training_stop": true
302
  },
303
  "attributes": {}
304
  }