Training in progress, epoch 2, checkpoint
Browse files
last-checkpoint/README.md
CHANGED
@@ -544,13 +544,17 @@ You can finetune this model on your own dataset.
|
|
544 |
</details>
|
545 |
|
546 |
### Training Logs
|
547 |
-
| Epoch
|
548 |
-
|
549 |
-
| None
|
550 |
-
| 0.25
|
551 |
-
| 0.5
|
552 |
-
| 0.75
|
553 |
-
| 1.0
|
|
|
|
|
|
|
|
|
554 |
|
555 |
|
556 |
### Framework Versions
|
|
|
544 |
</details>
|
545 |
|
546 |
### Training Logs
|
547 |
+
| Epoch | Step | Training Loss | qnli-contrastive loss | nli-pairs loss |
|
548 |
+
|:------:|:----:|:-------------:|:---------------------:|:--------------:|
|
549 |
+
| None | 0 | - | 6.0041 | 4.0946 |
|
550 |
+
| 0.25 | 116 | 4.9013 | 5.9679 | 4.0430 |
|
551 |
+
| 0.5 | 232 | 4.6399 | 5.5328 | 3.8479 |
|
552 |
+
| 0.75 | 348 | 4.4683 | 4.2996 | 3.6937 |
|
553 |
+
| 1.0 | 464 | 3.8129 | 2.8062 | 3.4773 |
|
554 |
+
| 1.2457 | 580 | 3.3971 | 1.8330 | 3.1263 |
|
555 |
+
| 1.4957 | 696 | 2.7459 | 1.2780 | 2.7294 |
|
556 |
+
| 1.7457 | 812 | 2.8721 | 0.9296 | 2.2870 |
|
557 |
+
| 1.9957 | 928 | 2.5066 | 0.6388 | 2.0548 |
|
558 |
|
559 |
|
560 |
### Framework Versions
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1130520122
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ce17548dc0abcf43ff57926fe223e86ff953c3d04d1b482e8a25b8f6b14c059
|
3 |
size 1130520122
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 565251810
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f543b7b25e08dea4691b71c82b05f600955db6b8e954eade9d601f8b183156f1
|
3 |
size 565251810
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14180
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1d13ecf4cf92281b9f41455432b2500dd25c260a5d0ae61639997c1a731a76a0
|
3 |
size 14180
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7a1b39f4627d13e5c5634857f196a756ce6ec36192b7bfb79cff9c42c9aa243
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 116,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -99,6 +99,98 @@
|
|
99 |
"eval_qnli-contrastive_samples_per_second": 1397.948,
|
100 |
"eval_qnli-contrastive_steps_per_second": 87.372,
|
101 |
"step": 464
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
}
|
103 |
],
|
104 |
"logging_steps": 116,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0043103448275863,
|
5 |
"eval_steps": 116,
|
6 |
+
"global_step": 932,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
99 |
"eval_qnli-contrastive_samples_per_second": 1397.948,
|
100 |
"eval_qnli-contrastive_steps_per_second": 87.372,
|
101 |
"step": 464
|
102 |
+
},
|
103 |
+
{
|
104 |
+
"epoch": 1.2456896551724137,
|
105 |
+
"grad_norm": 98.47541046142578,
|
106 |
+
"learning_rate": 2.251958224543081e-06,
|
107 |
+
"loss": 3.3971,
|
108 |
+
"step": 580
|
109 |
+
},
|
110 |
+
{
|
111 |
+
"epoch": 1.2456896551724137,
|
112 |
+
"eval_nli-pairs_loss": 3.126293182373047,
|
113 |
+
"eval_nli-pairs_runtime": 1.3419,
|
114 |
+
"eval_nli-pairs_samples_per_second": 1490.389,
|
115 |
+
"eval_nli-pairs_steps_per_second": 93.149,
|
116 |
+
"step": 580
|
117 |
+
},
|
118 |
+
{
|
119 |
+
"epoch": 1.2456896551724137,
|
120 |
+
"eval_qnli-contrastive_loss": 1.8329846858978271,
|
121 |
+
"eval_qnli-contrastive_runtime": 1.5173,
|
122 |
+
"eval_qnli-contrastive_samples_per_second": 1318.109,
|
123 |
+
"eval_qnli-contrastive_steps_per_second": 82.382,
|
124 |
+
"step": 580
|
125 |
+
},
|
126 |
+
{
|
127 |
+
"epoch": 1.4956896551724137,
|
128 |
+
"grad_norm": 16.574974060058594,
|
129 |
+
"learning_rate": 2.706266318537859e-06,
|
130 |
+
"loss": 2.7459,
|
131 |
+
"step": 696
|
132 |
+
},
|
133 |
+
{
|
134 |
+
"epoch": 1.4956896551724137,
|
135 |
+
"eval_nli-pairs_loss": 2.72936749458313,
|
136 |
+
"eval_nli-pairs_runtime": 1.3359,
|
137 |
+
"eval_nli-pairs_samples_per_second": 1497.081,
|
138 |
+
"eval_nli-pairs_steps_per_second": 93.568,
|
139 |
+
"step": 696
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"epoch": 1.4956896551724137,
|
143 |
+
"eval_qnli-contrastive_loss": 1.2779531478881836,
|
144 |
+
"eval_qnli-contrastive_runtime": 1.4644,
|
145 |
+
"eval_qnli-contrastive_samples_per_second": 1365.702,
|
146 |
+
"eval_qnli-contrastive_steps_per_second": 85.356,
|
147 |
+
"step": 696
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"epoch": 1.7456896551724137,
|
151 |
+
"grad_norm": 201.21456909179688,
|
152 |
+
"learning_rate": 2.9950983500630964e-06,
|
153 |
+
"loss": 2.8721,
|
154 |
+
"step": 812
|
155 |
+
},
|
156 |
+
{
|
157 |
+
"epoch": 1.7456896551724137,
|
158 |
+
"eval_nli-pairs_loss": 2.2870194911956787,
|
159 |
+
"eval_nli-pairs_runtime": 1.3503,
|
160 |
+
"eval_nli-pairs_samples_per_second": 1481.188,
|
161 |
+
"eval_nli-pairs_steps_per_second": 92.574,
|
162 |
+
"step": 812
|
163 |
+
},
|
164 |
+
{
|
165 |
+
"epoch": 1.7456896551724137,
|
166 |
+
"eval_qnli-contrastive_loss": 0.9296175837516785,
|
167 |
+
"eval_qnli-contrastive_runtime": 1.4485,
|
168 |
+
"eval_qnli-contrastive_samples_per_second": 1380.738,
|
169 |
+
"eval_qnli-contrastive_steps_per_second": 86.296,
|
170 |
+
"step": 812
|
171 |
+
},
|
172 |
+
{
|
173 |
+
"epoch": 1.9956896551724137,
|
174 |
+
"grad_norm": 12.68950366973877,
|
175 |
+
"learning_rate": 2.9260214825373185e-06,
|
176 |
+
"loss": 2.5066,
|
177 |
+
"step": 928
|
178 |
+
},
|
179 |
+
{
|
180 |
+
"epoch": 1.9956896551724137,
|
181 |
+
"eval_nli-pairs_loss": 2.0547828674316406,
|
182 |
+
"eval_nli-pairs_runtime": 1.2929,
|
183 |
+
"eval_nli-pairs_samples_per_second": 1546.937,
|
184 |
+
"eval_nli-pairs_steps_per_second": 96.684,
|
185 |
+
"step": 928
|
186 |
+
},
|
187 |
+
{
|
188 |
+
"epoch": 1.9956896551724137,
|
189 |
+
"eval_qnli-contrastive_loss": 0.6387521028518677,
|
190 |
+
"eval_qnli-contrastive_runtime": 1.4598,
|
191 |
+
"eval_qnli-contrastive_samples_per_second": 1370.032,
|
192 |
+
"eval_qnli-contrastive_steps_per_second": 85.627,
|
193 |
+
"step": 928
|
194 |
}
|
195 |
],
|
196 |
"logging_steps": 116,
|