bobox commited on
Commit
5d1a2c2
·
verified ·
1 Parent(s): 9e3185b

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -544,13 +544,17 @@ You can finetune this model on your own dataset.
544
  </details>
545
 
546
  ### Training Logs
547
- | Epoch | Step | Training Loss | qnli-contrastive loss | nli-pairs loss |
548
- |:-----:|:----:|:-------------:|:---------------------:|:--------------:|
549
- | None | 0 | - | 6.0041 | 4.0946 |
550
- | 0.25 | 116 | 4.9013 | 5.9679 | 4.0430 |
551
- | 0.5 | 232 | 4.6399 | 5.5328 | 3.8479 |
552
- | 0.75 | 348 | 4.4683 | 4.2996 | 3.6937 |
553
- | 1.0 | 464 | 3.8129 | 2.8062 | 3.4773 |
 
 
 
 
554
 
555
 
556
  ### Framework Versions
 
544
  </details>
545
 
546
  ### Training Logs
547
+ | Epoch | Step | Training Loss | qnli-contrastive loss | nli-pairs loss |
548
+ |:------:|:----:|:-------------:|:---------------------:|:--------------:|
549
+ | None | 0 | - | 6.0041 | 4.0946 |
550
+ | 0.25 | 116 | 4.9013 | 5.9679 | 4.0430 |
551
+ | 0.5 | 232 | 4.6399 | 5.5328 | 3.8479 |
552
+ | 0.75 | 348 | 4.4683 | 4.2996 | 3.6937 |
553
+ | 1.0 | 464 | 3.8129 | 2.8062 | 3.4773 |
554
+ | 1.2457 | 580 | 3.3971 | 1.8330 | 3.1263 |
555
+ | 1.4957 | 696 | 2.7459 | 1.2780 | 2.7294 |
556
+ | 1.7457 | 812 | 2.8721 | 0.9296 | 2.2870 |
557
+ | 1.9957 | 928 | 2.5066 | 0.6388 | 2.0548 |
558
 
559
 
560
  ### Framework Versions
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1b597b0fc92d0b9e12711e77db2df5632db8e4f99e0202c6efc3f014202be84
3
  size 1130520122
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ce17548dc0abcf43ff57926fe223e86ff953c3d04d1b482e8a25b8f6b14c059
3
  size 1130520122
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff91915fd0ee36e4ba9d9d6ade05a5ea022b50bbe2d87122a050772b8597fe4a
3
  size 565251810
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f543b7b25e08dea4691b71c82b05f600955db6b8e954eade9d601f8b183156f1
3
  size 565251810
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a13f8f4ac4f71c8ac2411ec94584dffb535fff26a37d3787d5685cd9e588b3b4
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d13ecf4cf92281b9f41455432b2500dd25c260a5d0ae61639997c1a731a76a0
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43709499a8cea7d53df5052d159d8c3a24f835a2f5af6141080ad335219b234b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7a1b39f4627d13e5c5634857f196a756ce6ec36192b7bfb79cff9c42c9aa243
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0043103448275863,
5
  "eval_steps": 116,
6
- "global_step": 466,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -99,6 +99,98 @@
99
  "eval_qnli-contrastive_samples_per_second": 1397.948,
100
  "eval_qnli-contrastive_steps_per_second": 87.372,
101
  "step": 464
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  }
103
  ],
104
  "logging_steps": 116,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0043103448275863,
5
  "eval_steps": 116,
6
+ "global_step": 932,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
99
  "eval_qnli-contrastive_samples_per_second": 1397.948,
100
  "eval_qnli-contrastive_steps_per_second": 87.372,
101
  "step": 464
102
+ },
103
+ {
104
+ "epoch": 1.2456896551724137,
105
+ "grad_norm": 98.47541046142578,
106
+ "learning_rate": 2.251958224543081e-06,
107
+ "loss": 3.3971,
108
+ "step": 580
109
+ },
110
+ {
111
+ "epoch": 1.2456896551724137,
112
+ "eval_nli-pairs_loss": 3.126293182373047,
113
+ "eval_nli-pairs_runtime": 1.3419,
114
+ "eval_nli-pairs_samples_per_second": 1490.389,
115
+ "eval_nli-pairs_steps_per_second": 93.149,
116
+ "step": 580
117
+ },
118
+ {
119
+ "epoch": 1.2456896551724137,
120
+ "eval_qnli-contrastive_loss": 1.8329846858978271,
121
+ "eval_qnli-contrastive_runtime": 1.5173,
122
+ "eval_qnli-contrastive_samples_per_second": 1318.109,
123
+ "eval_qnli-contrastive_steps_per_second": 82.382,
124
+ "step": 580
125
+ },
126
+ {
127
+ "epoch": 1.4956896551724137,
128
+ "grad_norm": 16.574974060058594,
129
+ "learning_rate": 2.706266318537859e-06,
130
+ "loss": 2.7459,
131
+ "step": 696
132
+ },
133
+ {
134
+ "epoch": 1.4956896551724137,
135
+ "eval_nli-pairs_loss": 2.72936749458313,
136
+ "eval_nli-pairs_runtime": 1.3359,
137
+ "eval_nli-pairs_samples_per_second": 1497.081,
138
+ "eval_nli-pairs_steps_per_second": 93.568,
139
+ "step": 696
140
+ },
141
+ {
142
+ "epoch": 1.4956896551724137,
143
+ "eval_qnli-contrastive_loss": 1.2779531478881836,
144
+ "eval_qnli-contrastive_runtime": 1.4644,
145
+ "eval_qnli-contrastive_samples_per_second": 1365.702,
146
+ "eval_qnli-contrastive_steps_per_second": 85.356,
147
+ "step": 696
148
+ },
149
+ {
150
+ "epoch": 1.7456896551724137,
151
+ "grad_norm": 201.21456909179688,
152
+ "learning_rate": 2.9950983500630964e-06,
153
+ "loss": 2.8721,
154
+ "step": 812
155
+ },
156
+ {
157
+ "epoch": 1.7456896551724137,
158
+ "eval_nli-pairs_loss": 2.2870194911956787,
159
+ "eval_nli-pairs_runtime": 1.3503,
160
+ "eval_nli-pairs_samples_per_second": 1481.188,
161
+ "eval_nli-pairs_steps_per_second": 92.574,
162
+ "step": 812
163
+ },
164
+ {
165
+ "epoch": 1.7456896551724137,
166
+ "eval_qnli-contrastive_loss": 0.9296175837516785,
167
+ "eval_qnli-contrastive_runtime": 1.4485,
168
+ "eval_qnli-contrastive_samples_per_second": 1380.738,
169
+ "eval_qnli-contrastive_steps_per_second": 86.296,
170
+ "step": 812
171
+ },
172
+ {
173
+ "epoch": 1.9956896551724137,
174
+ "grad_norm": 12.68950366973877,
175
+ "learning_rate": 2.9260214825373185e-06,
176
+ "loss": 2.5066,
177
+ "step": 928
178
+ },
179
+ {
180
+ "epoch": 1.9956896551724137,
181
+ "eval_nli-pairs_loss": 2.0547828674316406,
182
+ "eval_nli-pairs_runtime": 1.2929,
183
+ "eval_nli-pairs_samples_per_second": 1546.937,
184
+ "eval_nli-pairs_steps_per_second": 96.684,
185
+ "step": 928
186
+ },
187
+ {
188
+ "epoch": 1.9956896551724137,
189
+ "eval_qnli-contrastive_loss": 0.6387521028518677,
190
+ "eval_qnli-contrastive_runtime": 1.4598,
191
+ "eval_qnli-contrastive_samples_per_second": 1370.032,
192
+ "eval_qnli-contrastive_steps_per_second": 85.627,
193
+ "step": 928
194
  }
195
  ],
196
  "logging_steps": 116,