KennethEnevoldsen commited on
Commit
c7392f2
1 Parent(s): 0ae4019

Initial commit

Browse files
README.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Trained using SimCSE with:
2
+
3
+ ```
4
+ CUDA_VISIBLE_DEVICES=0 python train.py \
5
+ --train_file data/dfm_paragraphs.txt \
6
+ --model_name_or_path chcaa/dfm-encoder-large-v1 \
7
+ --output_dir result/dfm-sentence-encoder-medium-v4 \
8
+ --num_train_epochs 1 \
9
+ --per_device_train_batch_size 128 \
10
+ --learning_rate 1e-5 \
11
+ --max_seq_length 32 \
12
+ --evaluation_strategy steps \
13
+ --metric_for_best_model stsb_spearman \
14
+ --load_best_model_at_end \
15
+ --pooler_type cls \
16
+ --mlp_only_train \
17
+ --do_mlm \
18
+ --overwrite_output_dir \
19
+ --temp 0.05 \
20
+ --do_train \
21
+ --fp16
22
+ ```
config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "result/dfm-sentence-encoder-medium-v4",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 1024,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 4096,
14
+ "layer_norm_eps": 1e-12,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 24,
19
+ "pad_token_id": 0,
20
+ "position_embedding_type": "absolute",
21
+ "torch_dtype": "float32",
22
+ "transformers_version": "4.2.1",
23
+ "type_vocab_size": 2,
24
+ "use_cache": true,
25
+ "vocab_size": 50000
26
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:537da485bf4ce22b7ece0467808210271465696a4cc35c4b554f420d7ad88221
3
+ size 3259345285
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efd6af53a499aea8fd88afa68771609bf18d54610ef7208bc859f16ca9735917
3
+ size 1629688773
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37986d1575010a759d822e70ad0dc80073f4848bf67e05bd2ea41b3da2b1f0eb
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": false, "do_basic_tokenize": true, "name_or_path": "chcaa/dfm-encoder-large-v1", "never_split": null, "special_tokens_map_file": null, "tokenizer_class": "BertTokenizer", "model_max_length": 512}
train_results.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ epoch = 1.0
2
+ train_runtime = 47384.4879
3
+ train_samples_per_second = 1.041
trainer_state.json ADDED
@@ -0,0 +1,1296 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.81132042094234,
3
+ "best_model_checkpoint": "result/dfm-sentence-encoder-medium-v4",
4
+ "epoch": 1.0,
5
+ "global_step": 49345,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 9.89867261120681e-06,
13
+ "loss": 0.551,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.01,
18
+ "eval_avg_sts": 0.7288716035836427,
19
+ "eval_sickr_spearman": 0.6917684187734319,
20
+ "eval_stsb_spearman": 0.7659747883938535,
21
+ "step": 500
22
+ },
23
+ {
24
+ "epoch": 0.02,
25
+ "learning_rate": 9.797345222413618e-06,
26
+ "loss": 0.4721,
27
+ "step": 1000
28
+ },
29
+ {
30
+ "epoch": 0.02,
31
+ "eval_avg_sts": 0.7351199653659765,
32
+ "eval_sickr_spearman": 0.6964072145192234,
33
+ "eval_stsb_spearman": 0.7738327162127295,
34
+ "step": 1000
35
+ },
36
+ {
37
+ "epoch": 0.03,
38
+ "learning_rate": 9.696017833620428e-06,
39
+ "loss": 0.4607,
40
+ "step": 1500
41
+ },
42
+ {
43
+ "epoch": 0.03,
44
+ "eval_avg_sts": 0.7423278666926523,
45
+ "eval_sickr_spearman": 0.6978035266691308,
46
+ "eval_stsb_spearman": 0.7868522067161736,
47
+ "step": 1500
48
+ },
49
+ {
50
+ "epoch": 0.04,
51
+ "learning_rate": 9.594690444827237e-06,
52
+ "loss": 0.4586,
53
+ "step": 2000
54
+ },
55
+ {
56
+ "epoch": 0.04,
57
+ "eval_avg_sts": 0.7441749546870178,
58
+ "eval_sickr_spearman": 0.6987989232153342,
59
+ "eval_stsb_spearman": 0.7895509861587012,
60
+ "step": 2000
61
+ },
62
+ {
63
+ "epoch": 0.05,
64
+ "learning_rate": 9.493363056034047e-06,
65
+ "loss": 0.4627,
66
+ "step": 2500
67
+ },
68
+ {
69
+ "epoch": 0.05,
70
+ "eval_avg_sts": 0.7445984460802536,
71
+ "eval_sickr_spearman": 0.69917351777545,
72
+ "eval_stsb_spearman": 0.7900233743850572,
73
+ "step": 2500
74
+ },
75
+ {
76
+ "epoch": 0.06,
77
+ "learning_rate": 9.392035667240856e-06,
78
+ "loss": 0.4686,
79
+ "step": 3000
80
+ },
81
+ {
82
+ "epoch": 0.06,
83
+ "eval_avg_sts": 0.7510002108817091,
84
+ "eval_sickr_spearman": 0.7075781920294968,
85
+ "eval_stsb_spearman": 0.7944222297339215,
86
+ "step": 3000
87
+ },
88
+ {
89
+ "epoch": 0.07,
90
+ "learning_rate": 9.290708278447666e-06,
91
+ "loss": 0.4686,
92
+ "step": 3500
93
+ },
94
+ {
95
+ "epoch": 0.07,
96
+ "eval_avg_sts": 0.7477270819237568,
97
+ "eval_sickr_spearman": 0.7019111464641181,
98
+ "eval_stsb_spearman": 0.7935430173833955,
99
+ "step": 3500
100
+ },
101
+ {
102
+ "epoch": 0.08,
103
+ "learning_rate": 9.189380889654475e-06,
104
+ "loss": 0.454,
105
+ "step": 4000
106
+ },
107
+ {
108
+ "epoch": 0.08,
109
+ "eval_avg_sts": 0.7419612954851657,
110
+ "eval_sickr_spearman": 0.6941440850816625,
111
+ "eval_stsb_spearman": 0.7897785058886688,
112
+ "step": 4000
113
+ },
114
+ {
115
+ "epoch": 0.09,
116
+ "learning_rate": 9.088053500861283e-06,
117
+ "loss": 0.4649,
118
+ "step": 4500
119
+ },
120
+ {
121
+ "epoch": 0.09,
122
+ "eval_avg_sts": 0.7418233576604656,
123
+ "eval_sickr_spearman": 0.6867260657133575,
124
+ "eval_stsb_spearman": 0.7969206496075738,
125
+ "step": 4500
126
+ },
127
+ {
128
+ "epoch": 0.1,
129
+ "learning_rate": 8.986726112068093e-06,
130
+ "loss": 0.467,
131
+ "step": 5000
132
+ },
133
+ {
134
+ "epoch": 0.1,
135
+ "eval_avg_sts": 0.7446135457895653,
136
+ "eval_sickr_spearman": 0.6906753269669034,
137
+ "eval_stsb_spearman": 0.7985517646122272,
138
+ "step": 5000
139
+ },
140
+ {
141
+ "epoch": 0.11,
142
+ "learning_rate": 8.885398723274902e-06,
143
+ "loss": 0.468,
144
+ "step": 5500
145
+ },
146
+ {
147
+ "epoch": 0.11,
148
+ "eval_avg_sts": 0.7410856251082112,
149
+ "eval_sickr_spearman": 0.6825309332515498,
150
+ "eval_stsb_spearman": 0.7996403169648725,
151
+ "step": 5500
152
+ },
153
+ {
154
+ "epoch": 0.12,
155
+ "learning_rate": 8.78407133448171e-06,
156
+ "loss": 0.4614,
157
+ "step": 6000
158
+ },
159
+ {
160
+ "epoch": 0.12,
161
+ "eval_avg_sts": 0.7509672954480082,
162
+ "eval_sickr_spearman": 0.6987787501527303,
163
+ "eval_stsb_spearman": 0.8031558407432862,
164
+ "step": 6000
165
+ },
166
+ {
167
+ "epoch": 0.13,
168
+ "learning_rate": 8.68274394568852e-06,
169
+ "loss": 0.4596,
170
+ "step": 6500
171
+ },
172
+ {
173
+ "epoch": 0.13,
174
+ "eval_avg_sts": 0.7500520514398668,
175
+ "eval_sickr_spearman": 0.6982613591280389,
176
+ "eval_stsb_spearman": 0.8018427437516946,
177
+ "step": 6500
178
+ },
179
+ {
180
+ "epoch": 0.14,
181
+ "learning_rate": 8.58141655689533e-06,
182
+ "loss": 0.4494,
183
+ "step": 7000
184
+ },
185
+ {
186
+ "epoch": 0.14,
187
+ "eval_avg_sts": 0.741497727059482,
188
+ "eval_sickr_spearman": 0.6884413524079153,
189
+ "eval_stsb_spearman": 0.7945541017110487,
190
+ "step": 7000
191
+ },
192
+ {
193
+ "epoch": 0.15,
194
+ "learning_rate": 8.480089168102139e-06,
195
+ "loss": 0.4535,
196
+ "step": 7500
197
+ },
198
+ {
199
+ "epoch": 0.15,
200
+ "eval_avg_sts": 0.7464036949270956,
201
+ "eval_sickr_spearman": 0.696571288761736,
202
+ "eval_stsb_spearman": 0.796236101092455,
203
+ "step": 7500
204
+ },
205
+ {
206
+ "epoch": 0.16,
207
+ "learning_rate": 8.378761779308948e-06,
208
+ "loss": 0.452,
209
+ "step": 8000
210
+ },
211
+ {
212
+ "epoch": 0.16,
213
+ "eval_avg_sts": 0.7481634305513074,
214
+ "eval_sickr_spearman": 0.6957231555725417,
215
+ "eval_stsb_spearman": 0.8006037055300731,
216
+ "step": 8000
217
+ },
218
+ {
219
+ "epoch": 0.17,
220
+ "learning_rate": 8.277434390515758e-06,
221
+ "loss": 0.4627,
222
+ "step": 8500
223
+ },
224
+ {
225
+ "epoch": 0.17,
226
+ "eval_avg_sts": 0.7469738693319797,
227
+ "eval_sickr_spearman": 0.6922042529878814,
228
+ "eval_stsb_spearman": 0.8017434856760781,
229
+ "step": 8500
230
+ },
231
+ {
232
+ "epoch": 0.18,
233
+ "learning_rate": 8.176107001722566e-06,
234
+ "loss": 0.4579,
235
+ "step": 9000
236
+ },
237
+ {
238
+ "epoch": 0.18,
239
+ "eval_avg_sts": 0.7445587366747802,
240
+ "eval_sickr_spearman": 0.689870373737902,
241
+ "eval_stsb_spearman": 0.7992470996116584,
242
+ "step": 9000
243
+ },
244
+ {
245
+ "epoch": 0.19,
246
+ "learning_rate": 8.074779612929375e-06,
247
+ "loss": 0.4631,
248
+ "step": 9500
249
+ },
250
+ {
251
+ "epoch": 0.19,
252
+ "eval_avg_sts": 0.7415088027050005,
253
+ "eval_sickr_spearman": 0.6845037627120188,
254
+ "eval_stsb_spearman": 0.7985138426979821,
255
+ "step": 9500
256
+ },
257
+ {
258
+ "epoch": 0.2,
259
+ "learning_rate": 7.973452224136185e-06,
260
+ "loss": 0.4584,
261
+ "step": 10000
262
+ },
263
+ {
264
+ "epoch": 0.2,
265
+ "eval_avg_sts": 0.7471105530073405,
266
+ "eval_sickr_spearman": 0.6933327917472712,
267
+ "eval_stsb_spearman": 0.8008883142674098,
268
+ "step": 10000
269
+ },
270
+ {
271
+ "epoch": 0.21,
272
+ "learning_rate": 7.872124835342994e-06,
273
+ "loss": 0.4589,
274
+ "step": 10500
275
+ },
276
+ {
277
+ "epoch": 0.21,
278
+ "eval_avg_sts": 0.745595984233087,
279
+ "eval_sickr_spearman": 0.6923495470697317,
280
+ "eval_stsb_spearman": 0.7988424213964425,
281
+ "step": 10500
282
+ },
283
+ {
284
+ "epoch": 0.22,
285
+ "learning_rate": 7.770797446549802e-06,
286
+ "loss": 0.4587,
287
+ "step": 11000
288
+ },
289
+ {
290
+ "epoch": 0.22,
291
+ "eval_avg_sts": 0.7466024940300805,
292
+ "eval_sickr_spearman": 0.692575533401998,
293
+ "eval_stsb_spearman": 0.800629454658163,
294
+ "step": 11000
295
+ },
296
+ {
297
+ "epoch": 0.23,
298
+ "learning_rate": 7.669470057756611e-06,
299
+ "loss": 0.4661,
300
+ "step": 11500
301
+ },
302
+ {
303
+ "epoch": 0.23,
304
+ "eval_avg_sts": 0.7517425175405497,
305
+ "eval_sickr_spearman": 0.6975225927568193,
306
+ "eval_stsb_spearman": 0.8059624423242799,
307
+ "step": 11500
308
+ },
309
+ {
310
+ "epoch": 0.24,
311
+ "learning_rate": 7.568142668963422e-06,
312
+ "loss": 0.4621,
313
+ "step": 12000
314
+ },
315
+ {
316
+ "epoch": 0.24,
317
+ "eval_avg_sts": 0.7477990028501873,
318
+ "eval_sickr_spearman": 0.6881468256938967,
319
+ "eval_stsb_spearman": 0.8074511800064779,
320
+ "step": 12000
321
+ },
322
+ {
323
+ "epoch": 0.25,
324
+ "learning_rate": 7.466815280170231e-06,
325
+ "loss": 0.4572,
326
+ "step": 12500
327
+ },
328
+ {
329
+ "epoch": 0.25,
330
+ "eval_avg_sts": 0.7490645066698164,
331
+ "eval_sickr_spearman": 0.690311203186901,
332
+ "eval_stsb_spearman": 0.8078178101527318,
333
+ "step": 12500
334
+ },
335
+ {
336
+ "epoch": 0.26,
337
+ "learning_rate": 7.36548789137704e-06,
338
+ "loss": 0.4505,
339
+ "step": 13000
340
+ },
341
+ {
342
+ "epoch": 0.26,
343
+ "eval_avg_sts": 0.7488632831459506,
344
+ "eval_sickr_spearman": 0.6918468535620804,
345
+ "eval_stsb_spearman": 0.8058797127298208,
346
+ "step": 13000
347
+ },
348
+ {
349
+ "epoch": 0.27,
350
+ "learning_rate": 7.264160502583849e-06,
351
+ "loss": 0.4587,
352
+ "step": 13500
353
+ },
354
+ {
355
+ "epoch": 0.27,
356
+ "eval_avg_sts": 0.7492004417544827,
357
+ "eval_sickr_spearman": 0.6910344555123561,
358
+ "eval_stsb_spearman": 0.8073664279966094,
359
+ "step": 13500
360
+ },
361
+ {
362
+ "epoch": 0.28,
363
+ "learning_rate": 7.162833113790658e-06,
364
+ "loss": 0.4567,
365
+ "step": 14000
366
+ },
367
+ {
368
+ "epoch": 0.28,
369
+ "eval_avg_sts": 0.7515448270625236,
370
+ "eval_sickr_spearman": 0.6936582024595145,
371
+ "eval_stsb_spearman": 0.8094314516655328,
372
+ "step": 14000
373
+ },
374
+ {
375
+ "epoch": 0.29,
376
+ "learning_rate": 7.061505724997467e-06,
377
+ "loss": 0.4543,
378
+ "step": 14500
379
+ },
380
+ {
381
+ "epoch": 0.29,
382
+ "eval_avg_sts": 0.7513387605617963,
383
+ "eval_sickr_spearman": 0.6940377922541799,
384
+ "eval_stsb_spearman": 0.8086397288694127,
385
+ "step": 14500
386
+ },
387
+ {
388
+ "epoch": 0.3,
389
+ "learning_rate": 6.9601783362042765e-06,
390
+ "loss": 0.4523,
391
+ "step": 15000
392
+ },
393
+ {
394
+ "epoch": 0.3,
395
+ "eval_avg_sts": 0.7500897857354842,
396
+ "eval_sickr_spearman": 0.6939183389049033,
397
+ "eval_stsb_spearman": 0.8062612325660653,
398
+ "step": 15000
399
+ },
400
+ {
401
+ "epoch": 0.31,
402
+ "learning_rate": 6.858850947411085e-06,
403
+ "loss": 0.4564,
404
+ "step": 15500
405
+ },
406
+ {
407
+ "epoch": 0.31,
408
+ "eval_avg_sts": 0.7472002180219787,
409
+ "eval_sickr_spearman": 0.6905755663692164,
410
+ "eval_stsb_spearman": 0.803824869674741,
411
+ "step": 15500
412
+ },
413
+ {
414
+ "epoch": 0.32,
415
+ "learning_rate": 6.757523558617895e-06,
416
+ "loss": 0.4646,
417
+ "step": 16000
418
+ },
419
+ {
420
+ "epoch": 0.32,
421
+ "eval_avg_sts": 0.7507948330729827,
422
+ "eval_sickr_spearman": 0.6941705502185548,
423
+ "eval_stsb_spearman": 0.8074191159274106,
424
+ "step": 16000
425
+ },
426
+ {
427
+ "epoch": 0.33,
428
+ "learning_rate": 6.656196169824705e-06,
429
+ "loss": 0.4492,
430
+ "step": 16500
431
+ },
432
+ {
433
+ "epoch": 0.33,
434
+ "eval_avg_sts": 0.7529879565790001,
435
+ "eval_sickr_spearman": 0.6971369030122712,
436
+ "eval_stsb_spearman": 0.8088390101457289,
437
+ "step": 16500
438
+ },
439
+ {
440
+ "epoch": 0.34,
441
+ "learning_rate": 6.554868781031514e-06,
442
+ "loss": 0.4575,
443
+ "step": 17000
444
+ },
445
+ {
446
+ "epoch": 0.34,
447
+ "eval_avg_sts": 0.7518569042679815,
448
+ "eval_sickr_spearman": 0.6972905545057717,
449
+ "eval_stsb_spearman": 0.8064232540301913,
450
+ "step": 17000
451
+ },
452
+ {
453
+ "epoch": 0.35,
454
+ "learning_rate": 6.453541392238322e-06,
455
+ "loss": 0.4637,
456
+ "step": 17500
457
+ },
458
+ {
459
+ "epoch": 0.35,
460
+ "eval_avg_sts": 0.7498519912345287,
461
+ "eval_sickr_spearman": 0.6938069547806683,
462
+ "eval_stsb_spearman": 0.8058970276883892,
463
+ "step": 17500
464
+ },
465
+ {
466
+ "epoch": 0.36,
467
+ "learning_rate": 6.352214003445132e-06,
468
+ "loss": 0.4583,
469
+ "step": 18000
470
+ },
471
+ {
472
+ "epoch": 0.36,
473
+ "eval_avg_sts": 0.7516061405002821,
474
+ "eval_sickr_spearman": 0.6938626708583365,
475
+ "eval_stsb_spearman": 0.8093496101422275,
476
+ "step": 18000
477
+ },
478
+ {
479
+ "epoch": 0.37,
480
+ "learning_rate": 6.250886614651941e-06,
481
+ "loss": 0.4645,
482
+ "step": 18500
483
+ },
484
+ {
485
+ "epoch": 0.37,
486
+ "eval_avg_sts": 0.7534092039150044,
487
+ "eval_sickr_spearman": 0.6986008429530034,
488
+ "eval_stsb_spearman": 0.8082175648770055,
489
+ "step": 18500
490
+ },
491
+ {
492
+ "epoch": 0.39,
493
+ "learning_rate": 6.14955922585875e-06,
494
+ "loss": 0.4525,
495
+ "step": 19000
496
+ },
497
+ {
498
+ "epoch": 0.39,
499
+ "eval_avg_sts": 0.7495273023794997,
500
+ "eval_sickr_spearman": 0.6942551329881874,
501
+ "eval_stsb_spearman": 0.8047994717708119,
502
+ "step": 19000
503
+ },
504
+ {
505
+ "epoch": 0.4,
506
+ "learning_rate": 6.048231837065559e-06,
507
+ "loss": 0.4616,
508
+ "step": 19500
509
+ },
510
+ {
511
+ "epoch": 0.4,
512
+ "eval_avg_sts": 0.7498676062871128,
513
+ "eval_sickr_spearman": 0.6946699295802069,
514
+ "eval_stsb_spearman": 0.8050652829940186,
515
+ "step": 19500
516
+ },
517
+ {
518
+ "epoch": 0.41,
519
+ "learning_rate": 5.946904448272368e-06,
520
+ "loss": 0.4607,
521
+ "step": 20000
522
+ },
523
+ {
524
+ "epoch": 0.41,
525
+ "eval_avg_sts": 0.7500055698075132,
526
+ "eval_sickr_spearman": 0.6939670904728629,
527
+ "eval_stsb_spearman": 0.8060440491421634,
528
+ "step": 20000
529
+ },
530
+ {
531
+ "epoch": 0.42,
532
+ "learning_rate": 5.845577059479177e-06,
533
+ "loss": 0.4548,
534
+ "step": 20500
535
+ },
536
+ {
537
+ "epoch": 0.42,
538
+ "eval_avg_sts": 0.7494506896871379,
539
+ "eval_sickr_spearman": 0.6917624148857522,
540
+ "eval_stsb_spearman": 0.8071389644885236,
541
+ "step": 20500
542
+ },
543
+ {
544
+ "epoch": 0.43,
545
+ "learning_rate": 5.744249670685987e-06,
546
+ "loss": 0.4596,
547
+ "step": 21000
548
+ },
549
+ {
550
+ "epoch": 0.43,
551
+ "eval_avg_sts": 0.7491829900995786,
552
+ "eval_sickr_spearman": 0.6914101067567036,
553
+ "eval_stsb_spearman": 0.8069558734424537,
554
+ "step": 21000
555
+ },
556
+ {
557
+ "epoch": 0.44,
558
+ "learning_rate": 5.642922281892796e-06,
559
+ "loss": 0.4484,
560
+ "step": 21500
561
+ },
562
+ {
563
+ "epoch": 0.44,
564
+ "eval_avg_sts": 0.7496264233935017,
565
+ "eval_sickr_spearman": 0.692096231040747,
566
+ "eval_stsb_spearman": 0.8071566157462565,
567
+ "step": 21500
568
+ },
569
+ {
570
+ "epoch": 0.45,
571
+ "learning_rate": 5.541594893099606e-06,
572
+ "loss": 0.4604,
573
+ "step": 22000
574
+ },
575
+ {
576
+ "epoch": 0.45,
577
+ "eval_avg_sts": 0.7475317779125041,
578
+ "eval_sickr_spearman": 0.6897084608949542,
579
+ "eval_stsb_spearman": 0.8053550949300541,
580
+ "step": 22000
581
+ },
582
+ {
583
+ "epoch": 0.46,
584
+ "learning_rate": 5.440267504306414e-06,
585
+ "loss": 0.4661,
586
+ "step": 22500
587
+ },
588
+ {
589
+ "epoch": 0.46,
590
+ "eval_avg_sts": 0.749782311685921,
591
+ "eval_sickr_spearman": 0.6907351256881937,
592
+ "eval_stsb_spearman": 0.8088294976836483,
593
+ "step": 22500
594
+ },
595
+ {
596
+ "epoch": 0.47,
597
+ "learning_rate": 5.338940115513224e-06,
598
+ "loss": 0.4629,
599
+ "step": 23000
600
+ },
601
+ {
602
+ "epoch": 0.47,
603
+ "eval_avg_sts": 0.7493434059552446,
604
+ "eval_sickr_spearman": 0.6895577873297428,
605
+ "eval_stsb_spearman": 0.8091290245807464,
606
+ "step": 23000
607
+ },
608
+ {
609
+ "epoch": 0.48,
610
+ "learning_rate": 5.2376127267200325e-06,
611
+ "loss": 0.46,
612
+ "step": 23500
613
+ },
614
+ {
615
+ "epoch": 0.48,
616
+ "eval_avg_sts": 0.7494797845962304,
617
+ "eval_sickr_spearman": 0.6888344389420848,
618
+ "eval_stsb_spearman": 0.810125130250376,
619
+ "step": 23500
620
+ },
621
+ {
622
+ "epoch": 0.49,
623
+ "learning_rate": 5.136285337926842e-06,
624
+ "loss": 0.457,
625
+ "step": 24000
626
+ },
627
+ {
628
+ "epoch": 0.49,
629
+ "eval_avg_sts": 0.7481539489426248,
630
+ "eval_sickr_spearman": 0.6876096938865144,
631
+ "eval_stsb_spearman": 0.808698203998735,
632
+ "step": 24000
633
+ },
634
+ {
635
+ "epoch": 0.5,
636
+ "learning_rate": 5.034957949133651e-06,
637
+ "loss": 0.4591,
638
+ "step": 24500
639
+ },
640
+ {
641
+ "epoch": 0.5,
642
+ "eval_avg_sts": 0.7487851537577938,
643
+ "eval_sickr_spearman": 0.6880744428240295,
644
+ "eval_stsb_spearman": 0.809495864691558,
645
+ "step": 24500
646
+ },
647
+ {
648
+ "epoch": 0.51,
649
+ "learning_rate": 4.93363056034046e-06,
650
+ "loss": 0.4595,
651
+ "step": 25000
652
+ },
653
+ {
654
+ "epoch": 0.51,
655
+ "eval_avg_sts": 0.749808924875282,
656
+ "eval_sickr_spearman": 0.6898596147711799,
657
+ "eval_stsb_spearman": 0.8097582349793842,
658
+ "step": 25000
659
+ },
660
+ {
661
+ "epoch": 0.52,
662
+ "learning_rate": 4.83230317154727e-06,
663
+ "loss": 0.4623,
664
+ "step": 25500
665
+ },
666
+ {
667
+ "epoch": 0.52,
668
+ "eval_avg_sts": 0.7498742734395706,
669
+ "eval_sickr_spearman": 0.6910954550111825,
670
+ "eval_stsb_spearman": 0.8086530918679586,
671
+ "step": 25500
672
+ },
673
+ {
674
+ "epoch": 0.53,
675
+ "learning_rate": 4.730975782754078e-06,
676
+ "loss": 0.461,
677
+ "step": 26000
678
+ },
679
+ {
680
+ "epoch": 0.53,
681
+ "eval_avg_sts": 0.74840720780684,
682
+ "eval_sickr_spearman": 0.6880080158107407,
683
+ "eval_stsb_spearman": 0.8088063998029394,
684
+ "step": 26000
685
+ },
686
+ {
687
+ "epoch": 0.54,
688
+ "learning_rate": 4.629648393960888e-06,
689
+ "loss": 0.4602,
690
+ "step": 26500
691
+ },
692
+ {
693
+ "epoch": 0.54,
694
+ "eval_avg_sts": 0.7495470437599494,
695
+ "eval_sickr_spearman": 0.6893540874285438,
696
+ "eval_stsb_spearman": 0.8097400000913552,
697
+ "step": 26500
698
+ },
699
+ {
700
+ "epoch": 0.55,
701
+ "learning_rate": 4.5283210051676975e-06,
702
+ "loss": 0.4494,
703
+ "step": 27000
704
+ },
705
+ {
706
+ "epoch": 0.55,
707
+ "eval_avg_sts": 0.7498239634592918,
708
+ "eval_sickr_spearman": 0.6894677290145463,
709
+ "eval_stsb_spearman": 0.8101801979040372,
710
+ "step": 27000
711
+ },
712
+ {
713
+ "epoch": 0.56,
714
+ "learning_rate": 4.426993616374506e-06,
715
+ "loss": 0.4621,
716
+ "step": 27500
717
+ },
718
+ {
719
+ "epoch": 0.56,
720
+ "eval_avg_sts": 0.751903069869123,
721
+ "eval_sickr_spearman": 0.692736149405207,
722
+ "eval_stsb_spearman": 0.8110699903330391,
723
+ "step": 27500
724
+ },
725
+ {
726
+ "epoch": 0.57,
727
+ "learning_rate": 4.325666227581316e-06,
728
+ "loss": 0.4589,
729
+ "step": 28000
730
+ },
731
+ {
732
+ "epoch": 0.57,
733
+ "eval_avg_sts": 0.7525324676455694,
734
+ "eval_sickr_spearman": 0.6937445143487988,
735
+ "eval_stsb_spearman": 0.81132042094234,
736
+ "step": 28000
737
+ },
738
+ {
739
+ "epoch": 0.58,
740
+ "learning_rate": 4.224338838788124e-06,
741
+ "loss": 0.4627,
742
+ "step": 28500
743
+ },
744
+ {
745
+ "epoch": 0.58,
746
+ "eval_avg_sts": 0.7538511183199736,
747
+ "eval_sickr_spearman": 0.696754047102708,
748
+ "eval_stsb_spearman": 0.8109481895372391,
749
+ "step": 28500
750
+ },
751
+ {
752
+ "epoch": 0.59,
753
+ "learning_rate": 4.123011449994934e-06,
754
+ "loss": 0.4564,
755
+ "step": 29000
756
+ },
757
+ {
758
+ "epoch": 0.59,
759
+ "eval_avg_sts": 0.7515831116726858,
760
+ "eval_sickr_spearman": 0.6939315954889002,
761
+ "eval_stsb_spearman": 0.8092346278564714,
762
+ "step": 29000
763
+ },
764
+ {
765
+ "epoch": 0.6,
766
+ "learning_rate": 4.021684061201743e-06,
767
+ "loss": 0.455,
768
+ "step": 29500
769
+ },
770
+ {
771
+ "epoch": 0.6,
772
+ "eval_avg_sts": 0.7533852057891757,
773
+ "eval_sickr_spearman": 0.6955959692159336,
774
+ "eval_stsb_spearman": 0.8111744423624179,
775
+ "step": 29500
776
+ },
777
+ {
778
+ "epoch": 0.61,
779
+ "learning_rate": 3.920356672408552e-06,
780
+ "loss": 0.4632,
781
+ "step": 30000
782
+ },
783
+ {
784
+ "epoch": 0.61,
785
+ "eval_avg_sts": 0.7518159970200462,
786
+ "eval_sickr_spearman": 0.6946737720683219,
787
+ "eval_stsb_spearman": 0.8089582219717706,
788
+ "step": 30000
789
+ },
790
+ {
791
+ "epoch": 0.62,
792
+ "learning_rate": 3.819029283615362e-06,
793
+ "loss": 0.4611,
794
+ "step": 30500
795
+ },
796
+ {
797
+ "epoch": 0.62,
798
+ "eval_avg_sts": 0.7533598766129962,
799
+ "eval_sickr_spearman": 0.6974833513469443,
800
+ "eval_stsb_spearman": 0.8092364018790481,
801
+ "step": 30500
802
+ },
803
+ {
804
+ "epoch": 0.63,
805
+ "learning_rate": 3.7177018948221703e-06,
806
+ "loss": 0.4534,
807
+ "step": 31000
808
+ },
809
+ {
810
+ "epoch": 0.63,
811
+ "eval_avg_sts": 0.7526633199575018,
812
+ "eval_sickr_spearman": 0.6965692714554756,
813
+ "eval_stsb_spearman": 0.808757368459528,
814
+ "step": 31000
815
+ },
816
+ {
817
+ "epoch": 0.64,
818
+ "learning_rate": 3.6163745060289802e-06,
819
+ "loss": 0.449,
820
+ "step": 31500
821
+ },
822
+ {
823
+ "epoch": 0.64,
824
+ "eval_avg_sts": 0.7523877050131327,
825
+ "eval_sickr_spearman": 0.6958016383922917,
826
+ "eval_stsb_spearman": 0.8089737716339735,
827
+ "step": 31500
828
+ },
829
+ {
830
+ "epoch": 0.65,
831
+ "learning_rate": 3.5150471172357893e-06,
832
+ "loss": 0.4634,
833
+ "step": 32000
834
+ },
835
+ {
836
+ "epoch": 0.65,
837
+ "eval_avg_sts": 0.7514477132225694,
838
+ "eval_sickr_spearman": 0.6954868425534663,
839
+ "eval_stsb_spearman": 0.8074085838916726,
840
+ "step": 32000
841
+ },
842
+ {
843
+ "epoch": 0.66,
844
+ "learning_rate": 3.4137197284425984e-06,
845
+ "loss": 0.4598,
846
+ "step": 32500
847
+ },
848
+ {
849
+ "epoch": 0.66,
850
+ "eval_avg_sts": 0.7515196489317053,
851
+ "eval_sickr_spearman": 0.697347375298773,
852
+ "eval_stsb_spearman": 0.8056919225646377,
853
+ "step": 32500
854
+ },
855
+ {
856
+ "epoch": 0.67,
857
+ "learning_rate": 3.312392339649407e-06,
858
+ "loss": 0.4499,
859
+ "step": 33000
860
+ },
861
+ {
862
+ "epoch": 0.67,
863
+ "eval_avg_sts": 0.7531240484791469,
864
+ "eval_sickr_spearman": 0.7002055620520509,
865
+ "eval_stsb_spearman": 0.806042534906243,
866
+ "step": 33000
867
+ },
868
+ {
869
+ "epoch": 0.68,
870
+ "learning_rate": 3.211064950856217e-06,
871
+ "loss": 0.4618,
872
+ "step": 33500
873
+ },
874
+ {
875
+ "epoch": 0.68,
876
+ "eval_avg_sts": 0.7543101399622849,
877
+ "eval_sickr_spearman": 0.701310901789446,
878
+ "eval_stsb_spearman": 0.8073093781351238,
879
+ "step": 33500
880
+ },
881
+ {
882
+ "epoch": 0.69,
883
+ "learning_rate": 3.109737562063026e-06,
884
+ "loss": 0.4622,
885
+ "step": 34000
886
+ },
887
+ {
888
+ "epoch": 0.69,
889
+ "eval_avg_sts": 0.7535012273240942,
890
+ "eval_sickr_spearman": 0.6987900374615681,
891
+ "eval_stsb_spearman": 0.8082124171866202,
892
+ "step": 34000
893
+ },
894
+ {
895
+ "epoch": 0.7,
896
+ "learning_rate": 3.0084101732698353e-06,
897
+ "loss": 0.4654,
898
+ "step": 34500
899
+ },
900
+ {
901
+ "epoch": 0.7,
902
+ "eval_avg_sts": 0.7528923913364809,
903
+ "eval_sickr_spearman": 0.698313232717592,
904
+ "eval_stsb_spearman": 0.8074715499553696,
905
+ "step": 34500
906
+ },
907
+ {
908
+ "epoch": 0.71,
909
+ "learning_rate": 2.907082784476644e-06,
910
+ "loss": 0.4577,
911
+ "step": 35000
912
+ },
913
+ {
914
+ "epoch": 0.71,
915
+ "eval_avg_sts": 0.7508251788488477,
916
+ "eval_sickr_spearman": 0.6952676286065028,
917
+ "eval_stsb_spearman": 0.8063827290911926,
918
+ "step": 35000
919
+ },
920
+ {
921
+ "epoch": 0.72,
922
+ "learning_rate": 2.805755395683453e-06,
923
+ "loss": 0.4506,
924
+ "step": 35500
925
+ },
926
+ {
927
+ "epoch": 0.72,
928
+ "eval_avg_sts": 0.7509892106103371,
929
+ "eval_sickr_spearman": 0.6953001456621763,
930
+ "eval_stsb_spearman": 0.8066782755584977,
931
+ "step": 35500
932
+ },
933
+ {
934
+ "epoch": 0.73,
935
+ "learning_rate": 2.704428006890263e-06,
936
+ "loss": 0.4613,
937
+ "step": 36000
938
+ },
939
+ {
940
+ "epoch": 0.73,
941
+ "eval_avg_sts": 0.7503441697836055,
942
+ "eval_sickr_spearman": 0.6956849228157971,
943
+ "eval_stsb_spearman": 0.8050034167514141,
944
+ "step": 36000
945
+ },
946
+ {
947
+ "epoch": 0.74,
948
+ "learning_rate": 2.603100618097072e-06,
949
+ "loss": 0.4639,
950
+ "step": 36500
951
+ },
952
+ {
953
+ "epoch": 0.74,
954
+ "eval_avg_sts": 0.7496733652176951,
955
+ "eval_sickr_spearman": 0.6941632014600347,
956
+ "eval_stsb_spearman": 0.8051835289753555,
957
+ "step": 36500
958
+ },
959
+ {
960
+ "epoch": 0.75,
961
+ "learning_rate": 2.501773229303881e-06,
962
+ "loss": 0.4637,
963
+ "step": 37000
964
+ },
965
+ {
966
+ "epoch": 0.75,
967
+ "eval_avg_sts": 0.7492569767417572,
968
+ "eval_sickr_spearman": 0.6940407221513675,
969
+ "eval_stsb_spearman": 0.8044732313321468,
970
+ "step": 37000
971
+ },
972
+ {
973
+ "epoch": 0.76,
974
+ "learning_rate": 2.4004458405106903e-06,
975
+ "loss": 0.4523,
976
+ "step": 37500
977
+ },
978
+ {
979
+ "epoch": 0.76,
980
+ "eval_avg_sts": 0.7480946141530285,
981
+ "eval_sickr_spearman": 0.6926867254018273,
982
+ "eval_stsb_spearman": 0.8035025029042295,
983
+ "step": 37500
984
+ },
985
+ {
986
+ "epoch": 0.77,
987
+ "learning_rate": 2.2991184517174994e-06,
988
+ "loss": 0.4547,
989
+ "step": 38000
990
+ },
991
+ {
992
+ "epoch": 0.77,
993
+ "eval_avg_sts": 0.7484090650166574,
994
+ "eval_sickr_spearman": 0.6932337035850045,
995
+ "eval_stsb_spearman": 0.8035844264483103,
996
+ "step": 38000
997
+ },
998
+ {
999
+ "epoch": 0.78,
1000
+ "learning_rate": 2.1977910629243085e-06,
1001
+ "loss": 0.4598,
1002
+ "step": 38500
1003
+ },
1004
+ {
1005
+ "epoch": 0.78,
1006
+ "eval_avg_sts": 0.7474650473627502,
1007
+ "eval_sickr_spearman": 0.6918610707681061,
1008
+ "eval_stsb_spearman": 0.8030690239573942,
1009
+ "step": 38500
1010
+ },
1011
+ {
1012
+ "epoch": 0.79,
1013
+ "learning_rate": 2.0964636741311176e-06,
1014
+ "loss": 0.4601,
1015
+ "step": 39000
1016
+ },
1017
+ {
1018
+ "epoch": 0.79,
1019
+ "eval_avg_sts": 0.7474876355051218,
1020
+ "eval_sickr_spearman": 0.6911482411916631,
1021
+ "eval_stsb_spearman": 0.8038270298185805,
1022
+ "step": 39000
1023
+ },
1024
+ {
1025
+ "epoch": 0.8,
1026
+ "learning_rate": 1.9951362853379267e-06,
1027
+ "loss": 0.4582,
1028
+ "step": 39500
1029
+ },
1030
+ {
1031
+ "epoch": 0.8,
1032
+ "eval_avg_sts": 0.749122521812451,
1033
+ "eval_sickr_spearman": 0.6930461587670631,
1034
+ "eval_stsb_spearman": 0.8051988848578389,
1035
+ "step": 39500
1036
+ },
1037
+ {
1038
+ "epoch": 0.81,
1039
+ "learning_rate": 1.8938088965447363e-06,
1040
+ "loss": 0.4589,
1041
+ "step": 40000
1042
+ },
1043
+ {
1044
+ "epoch": 0.81,
1045
+ "eval_avg_sts": 0.747485970375817,
1046
+ "eval_sickr_spearman": 0.6916881107718275,
1047
+ "eval_stsb_spearman": 0.8032838299798064,
1048
+ "step": 40000
1049
+ },
1050
+ {
1051
+ "epoch": 0.82,
1052
+ "learning_rate": 1.7924815077515454e-06,
1053
+ "loss": 0.4494,
1054
+ "step": 40500
1055
+ },
1056
+ {
1057
+ "epoch": 0.82,
1058
+ "eval_avg_sts": 0.7484522252119146,
1059
+ "eval_sickr_spearman": 0.6921244733283927,
1060
+ "eval_stsb_spearman": 0.8047799770954366,
1061
+ "step": 40500
1062
+ },
1063
+ {
1064
+ "epoch": 0.83,
1065
+ "learning_rate": 1.6911541189583547e-06,
1066
+ "loss": 0.4692,
1067
+ "step": 41000
1068
+ },
1069
+ {
1070
+ "epoch": 0.83,
1071
+ "eval_avg_sts": 0.7489686533203876,
1072
+ "eval_sickr_spearman": 0.6921774036021774,
1073
+ "eval_stsb_spearman": 0.8057599030385979,
1074
+ "step": 41000
1075
+ },
1076
+ {
1077
+ "epoch": 0.84,
1078
+ "learning_rate": 1.5898267301651638e-06,
1079
+ "loss": 0.4512,
1080
+ "step": 41500
1081
+ },
1082
+ {
1083
+ "epoch": 0.84,
1084
+ "eval_avg_sts": 0.7486704913862867,
1085
+ "eval_sickr_spearman": 0.6929249596649603,
1086
+ "eval_stsb_spearman": 0.8044160231076133,
1087
+ "step": 41500
1088
+ },
1089
+ {
1090
+ "epoch": 0.85,
1091
+ "learning_rate": 1.4884993413719729e-06,
1092
+ "loss": 0.4569,
1093
+ "step": 42000
1094
+ },
1095
+ {
1096
+ "epoch": 0.85,
1097
+ "eval_avg_sts": 0.7481669304082241,
1098
+ "eval_sickr_spearman": 0.691963905356285,
1099
+ "eval_stsb_spearman": 0.8043699554601632,
1100
+ "step": 42000
1101
+ },
1102
+ {
1103
+ "epoch": 0.86,
1104
+ "learning_rate": 1.3871719525787822e-06,
1105
+ "loss": 0.4543,
1106
+ "step": 42500
1107
+ },
1108
+ {
1109
+ "epoch": 0.86,
1110
+ "eval_avg_sts": 0.7481576956303677,
1111
+ "eval_sickr_spearman": 0.6925726515359116,
1112
+ "eval_stsb_spearman": 0.8037427397248238,
1113
+ "step": 42500
1114
+ },
1115
+ {
1116
+ "epoch": 0.87,
1117
+ "learning_rate": 1.2858445637855913e-06,
1118
+ "loss": 0.4556,
1119
+ "step": 43000
1120
+ },
1121
+ {
1122
+ "epoch": 0.87,
1123
+ "eval_avg_sts": 0.7478866338195715,
1124
+ "eval_sickr_spearman": 0.6912525647439866,
1125
+ "eval_stsb_spearman": 0.8045207028951564,
1126
+ "step": 43000
1127
+ },
1128
+ {
1129
+ "epoch": 0.88,
1130
+ "learning_rate": 1.1845171749924006e-06,
1131
+ "loss": 0.4593,
1132
+ "step": 43500
1133
+ },
1134
+ {
1135
+ "epoch": 0.88,
1136
+ "eval_avg_sts": 0.7482308793293835,
1137
+ "eval_sickr_spearman": 0.6917689951466491,
1138
+ "eval_stsb_spearman": 0.8046927635121179,
1139
+ "step": 43500
1140
+ },
1141
+ {
1142
+ "epoch": 0.89,
1143
+ "learning_rate": 1.0831897861992097e-06,
1144
+ "loss": 0.4578,
1145
+ "step": 44000
1146
+ },
1147
+ {
1148
+ "epoch": 0.89,
1149
+ "eval_avg_sts": 0.7487411634362362,
1150
+ "eval_sickr_spearman": 0.6923790381660146,
1151
+ "eval_stsb_spearman": 0.8051032887064578,
1152
+ "step": 44000
1153
+ },
1154
+ {
1155
+ "epoch": 0.9,
1156
+ "learning_rate": 9.818623974060188e-07,
1157
+ "loss": 0.4617,
1158
+ "step": 44500
1159
+ },
1160
+ {
1161
+ "epoch": 0.9,
1162
+ "eval_avg_sts": 0.7495089794416554,
1163
+ "eval_sickr_spearman": 0.6935791912976488,
1164
+ "eval_stsb_spearman": 0.8054387675856621,
1165
+ "step": 44500
1166
+ },
1167
+ {
1168
+ "epoch": 0.91,
1169
+ "learning_rate": 8.80535008612828e-07,
1170
+ "loss": 0.4534,
1171
+ "step": 45000
1172
+ },
1173
+ {
1174
+ "epoch": 0.91,
1175
+ "eval_avg_sts": 0.7495111256181921,
1176
+ "eval_sickr_spearman": 0.6928939315734312,
1177
+ "eval_stsb_spearman": 0.806128319662953,
1178
+ "step": 45000
1179
+ },
1180
+ {
1181
+ "epoch": 0.92,
1182
+ "learning_rate": 7.792076198196374e-07,
1183
+ "loss": 0.4546,
1184
+ "step": 45500
1185
+ },
1186
+ {
1187
+ "epoch": 0.92,
1188
+ "eval_avg_sts": 0.748989011722548,
1189
+ "eval_sickr_spearman": 0.6924577611412717,
1190
+ "eval_stsb_spearman": 0.8055202623038241,
1191
+ "step": 45500
1192
+ },
1193
+ {
1194
+ "epoch": 0.93,
1195
+ "learning_rate": 6.778802310264466e-07,
1196
+ "loss": 0.4613,
1197
+ "step": 46000
1198
+ },
1199
+ {
1200
+ "epoch": 0.93,
1201
+ "eval_avg_sts": 0.7489659458032893,
1202
+ "eval_sickr_spearman": 0.6927594444894045,
1203
+ "eval_stsb_spearman": 0.8051724471171742,
1204
+ "step": 46000
1205
+ },
1206
+ {
1207
+ "epoch": 0.94,
1208
+ "learning_rate": 5.765528422332558e-07,
1209
+ "loss": 0.4594,
1210
+ "step": 46500
1211
+ },
1212
+ {
1213
+ "epoch": 0.94,
1214
+ "eval_avg_sts": 0.7486440766299414,
1215
+ "eval_sickr_spearman": 0.6924414305667828,
1216
+ "eval_stsb_spearman": 0.8048467226930999,
1217
+ "step": 46500
1218
+ },
1219
+ {
1220
+ "epoch": 0.95,
1221
+ "learning_rate": 4.752254534400649e-07,
1222
+ "loss": 0.4584,
1223
+ "step": 47000
1224
+ },
1225
+ {
1226
+ "epoch": 0.95,
1227
+ "eval_avg_sts": 0.7490758878100736,
1228
+ "eval_sickr_spearman": 0.6929523854238814,
1229
+ "eval_stsb_spearman": 0.805199390196266,
1230
+ "step": 47000
1231
+ },
1232
+ {
1233
+ "epoch": 0.96,
1234
+ "learning_rate": 3.738980646468741e-07,
1235
+ "loss": 0.4496,
1236
+ "step": 47500
1237
+ },
1238
+ {
1239
+ "epoch": 0.96,
1240
+ "eval_avg_sts": 0.7489021602575776,
1241
+ "eval_sickr_spearman": 0.6924429195309273,
1242
+ "eval_stsb_spearman": 0.8053614009842279,
1243
+ "step": 47500
1244
+ },
1245
+ {
1246
+ "epoch": 0.97,
1247
+ "learning_rate": 2.725706758536833e-07,
1248
+ "loss": 0.4521,
1249
+ "step": 48000
1250
+ },
1251
+ {
1252
+ "epoch": 0.97,
1253
+ "eval_avg_sts": 0.7486292878627302,
1254
+ "eval_sickr_spearman": 0.6919387850902329,
1255
+ "eval_stsb_spearman": 0.8053197906352275,
1256
+ "step": 48000
1257
+ },
1258
+ {
1259
+ "epoch": 0.98,
1260
+ "learning_rate": 1.7124328706049245e-07,
1261
+ "loss": 0.4618,
1262
+ "step": 48500
1263
+ },
1264
+ {
1265
+ "epoch": 0.98,
1266
+ "eval_avg_sts": 0.7487302574375756,
1267
+ "eval_sickr_spearman": 0.6921317740558113,
1268
+ "eval_stsb_spearman": 0.80532874081934,
1269
+ "step": 48500
1270
+ },
1271
+ {
1272
+ "epoch": 0.99,
1273
+ "learning_rate": 6.991589826730166e-08,
1274
+ "loss": 0.4501,
1275
+ "step": 49000
1276
+ },
1277
+ {
1278
+ "epoch": 0.99,
1279
+ "eval_avg_sts": 0.748655762950658,
1280
+ "eval_sickr_spearman": 0.6920557408222348,
1281
+ "eval_stsb_spearman": 0.8052557850790812,
1282
+ "step": 49000
1283
+ },
1284
+ {
1285
+ "epoch": 1.0,
1286
+ "step": 49345,
1287
+ "train_runtime": 47384.4879,
1288
+ "train_samples_per_second": 1.041
1289
+ }
1290
+ ],
1291
+ "max_steps": 49345,
1292
+ "num_train_epochs": 1,
1293
+ "total_flos": 1976154204533723136,
1294
+ "trial_name": null,
1295
+ "trial_params": null
1296
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2cad68d9a0d868c7ea6a4863816d042123ae496c3230c9185dbc9e80561e3a6
3
+ size 2107
vocab.txt ADDED
The diff for this file is too large to render. See raw diff