minimario commited on
Commit
c3f6421
·
1 Parent(s): 1ef15eb

add new ranker

Browse files
ranker_9/checkpoint-800/config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/codebert-base",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 514,
17
+ "model_type": "roberta",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "output_past": true,
21
+ "pad_token_id": 1,
22
+ "position_embedding_type": "absolute",
23
+ "problem_type": "single_label_classification",
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.26.1",
26
+ "type_vocab_size": 1,
27
+ "use_cache": true,
28
+ "vocab_size": 50265
29
+ }
ranker_9/checkpoint-800/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
ranker_9/checkpoint-800/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20e036566e07151f5ed16f3285b17c47dafa80858829435ebb02d8b2975a214
3
+ size 997295237
ranker_9/checkpoint-800/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2cebe0eeefe4cd17f1335d6e91f0d05947c5157fe5f7dc54144f60d9233b535
3
+ size 498662069
ranker_9/checkpoint-800/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c27c5a13788d5ab0de360c018031ecf890c18fd13458ae74fad3f4d6b504d94
3
+ size 15597
ranker_9/checkpoint-800/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6a5c3e1c781b3d6c9f9adade16b230e620ed9571282c67bc5ddf8474b3ff217
3
+ size 627
ranker_9/checkpoint-800/special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": "<mask>",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "</s>",
8
+ "unk_token": "<unk>"
9
+ }
ranker_9/checkpoint-800/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "cls_token": {
12
+ "__type": "AddedToken",
13
+ "content": "<s>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "eos_token": {
20
+ "__type": "AddedToken",
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false
26
+ },
27
+ "errors": "replace",
28
+ "mask_token": {
29
+ "__type": "AddedToken",
30
+ "content": "<mask>",
31
+ "lstrip": true,
32
+ "normalized": true,
33
+ "rstrip": false,
34
+ "single_word": false
35
+ },
36
+ "model_max_length": 512,
37
+ "name_or_path": "microsoft/codebert-base",
38
+ "pad_token": {
39
+ "__type": "AddedToken",
40
+ "content": "<pad>",
41
+ "lstrip": false,
42
+ "normalized": true,
43
+ "rstrip": false,
44
+ "single_word": false
45
+ },
46
+ "sep_token": {
47
+ "__type": "AddedToken",
48
+ "content": "</s>",
49
+ "lstrip": false,
50
+ "normalized": true,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ },
54
+ "special_tokens_map_file": "/om2/user/gua/.cache/huggingface/hub/models--microsoft--codebert-base/snapshots/3b0952feddeffad0063f274080e3c23d75e7eb39/special_tokens_map.json",
55
+ "tokenizer_class": "RobertaTokenizer",
56
+ "unk_token": {
57
+ "__type": "AddedToken",
58
+ "content": "<unk>",
59
+ "lstrip": false,
60
+ "normalized": true,
61
+ "rstrip": false,
62
+ "single_word": false
63
+ },
64
+ "use_fast": true
65
+ }
ranker_9/checkpoint-800/trainer_state.json ADDED
@@ -0,0 +1,1028 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.3323615160349855,
5
+ "global_step": 800,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 1.9970845481049562e-05,
13
+ "loss": 0.7026,
14
+ "step": 5
15
+ },
16
+ {
17
+ "epoch": 0.03,
18
+ "learning_rate": 1.9941690962099126e-05,
19
+ "loss": 0.681,
20
+ "step": 10
21
+ },
22
+ {
23
+ "epoch": 0.04,
24
+ "learning_rate": 1.991253644314869e-05,
25
+ "loss": 0.6654,
26
+ "step": 15
27
+ },
28
+ {
29
+ "epoch": 0.06,
30
+ "learning_rate": 1.9883381924198253e-05,
31
+ "loss": 0.6638,
32
+ "step": 20
33
+ },
34
+ {
35
+ "epoch": 0.07,
36
+ "learning_rate": 1.9854227405247814e-05,
37
+ "loss": 0.6479,
38
+ "step": 25
39
+ },
40
+ {
41
+ "epoch": 0.09,
42
+ "learning_rate": 1.9825072886297377e-05,
43
+ "loss": 0.6243,
44
+ "step": 30
45
+ },
46
+ {
47
+ "epoch": 0.1,
48
+ "learning_rate": 1.979591836734694e-05,
49
+ "loss": 0.5941,
50
+ "step": 35
51
+ },
52
+ {
53
+ "epoch": 0.12,
54
+ "learning_rate": 1.9766763848396505e-05,
55
+ "loss": 0.5806,
56
+ "step": 40
57
+ },
58
+ {
59
+ "epoch": 0.13,
60
+ "learning_rate": 1.9737609329446065e-05,
61
+ "loss": 0.5747,
62
+ "step": 45
63
+ },
64
+ {
65
+ "epoch": 0.15,
66
+ "learning_rate": 1.970845481049563e-05,
67
+ "loss": 0.5485,
68
+ "step": 50
69
+ },
70
+ {
71
+ "epoch": 0.16,
72
+ "learning_rate": 1.9679300291545193e-05,
73
+ "loss": 0.5271,
74
+ "step": 55
75
+ },
76
+ {
77
+ "epoch": 0.17,
78
+ "learning_rate": 1.9650145772594753e-05,
79
+ "loss": 0.5541,
80
+ "step": 60
81
+ },
82
+ {
83
+ "epoch": 0.19,
84
+ "learning_rate": 1.9620991253644317e-05,
85
+ "loss": 0.5336,
86
+ "step": 65
87
+ },
88
+ {
89
+ "epoch": 0.2,
90
+ "learning_rate": 1.9591836734693877e-05,
91
+ "loss": 0.5274,
92
+ "step": 70
93
+ },
94
+ {
95
+ "epoch": 0.22,
96
+ "learning_rate": 1.956268221574344e-05,
97
+ "loss": 0.5194,
98
+ "step": 75
99
+ },
100
+ {
101
+ "epoch": 0.23,
102
+ "learning_rate": 1.9533527696793005e-05,
103
+ "loss": 0.4857,
104
+ "step": 80
105
+ },
106
+ {
107
+ "epoch": 0.25,
108
+ "learning_rate": 1.950437317784257e-05,
109
+ "loss": 0.498,
110
+ "step": 85
111
+ },
112
+ {
113
+ "epoch": 0.26,
114
+ "learning_rate": 1.947521865889213e-05,
115
+ "loss": 0.4707,
116
+ "step": 90
117
+ },
118
+ {
119
+ "epoch": 0.28,
120
+ "learning_rate": 1.9446064139941693e-05,
121
+ "loss": 0.4736,
122
+ "step": 95
123
+ },
124
+ {
125
+ "epoch": 0.29,
126
+ "learning_rate": 1.9416909620991257e-05,
127
+ "loss": 0.4997,
128
+ "step": 100
129
+ },
130
+ {
131
+ "epoch": 0.31,
132
+ "learning_rate": 1.9387755102040817e-05,
133
+ "loss": 0.4745,
134
+ "step": 105
135
+ },
136
+ {
137
+ "epoch": 0.32,
138
+ "learning_rate": 1.935860058309038e-05,
139
+ "loss": 0.4709,
140
+ "step": 110
141
+ },
142
+ {
143
+ "epoch": 0.34,
144
+ "learning_rate": 1.9329446064139944e-05,
145
+ "loss": 0.4663,
146
+ "step": 115
147
+ },
148
+ {
149
+ "epoch": 0.35,
150
+ "learning_rate": 1.9300291545189508e-05,
151
+ "loss": 0.4647,
152
+ "step": 120
153
+ },
154
+ {
155
+ "epoch": 0.36,
156
+ "learning_rate": 1.927113702623907e-05,
157
+ "loss": 0.4485,
158
+ "step": 125
159
+ },
160
+ {
161
+ "epoch": 0.38,
162
+ "learning_rate": 1.9241982507288632e-05,
163
+ "loss": 0.4528,
164
+ "step": 130
165
+ },
166
+ {
167
+ "epoch": 0.39,
168
+ "learning_rate": 1.9212827988338193e-05,
169
+ "loss": 0.4505,
170
+ "step": 135
171
+ },
172
+ {
173
+ "epoch": 0.41,
174
+ "learning_rate": 1.9183673469387756e-05,
175
+ "loss": 0.4499,
176
+ "step": 140
177
+ },
178
+ {
179
+ "epoch": 0.42,
180
+ "learning_rate": 1.915451895043732e-05,
181
+ "loss": 0.4496,
182
+ "step": 145
183
+ },
184
+ {
185
+ "epoch": 0.44,
186
+ "learning_rate": 1.912536443148688e-05,
187
+ "loss": 0.4808,
188
+ "step": 150
189
+ },
190
+ {
191
+ "epoch": 0.45,
192
+ "learning_rate": 1.9096209912536444e-05,
193
+ "loss": 0.4437,
194
+ "step": 155
195
+ },
196
+ {
197
+ "epoch": 0.47,
198
+ "learning_rate": 1.9067055393586008e-05,
199
+ "loss": 0.4519,
200
+ "step": 160
201
+ },
202
+ {
203
+ "epoch": 0.48,
204
+ "learning_rate": 1.9037900874635572e-05,
205
+ "loss": 0.4467,
206
+ "step": 165
207
+ },
208
+ {
209
+ "epoch": 0.5,
210
+ "learning_rate": 1.9008746355685132e-05,
211
+ "loss": 0.4314,
212
+ "step": 170
213
+ },
214
+ {
215
+ "epoch": 0.51,
216
+ "learning_rate": 1.8979591836734696e-05,
217
+ "loss": 0.4398,
218
+ "step": 175
219
+ },
220
+ {
221
+ "epoch": 0.52,
222
+ "learning_rate": 1.895043731778426e-05,
223
+ "loss": 0.4302,
224
+ "step": 180
225
+ },
226
+ {
227
+ "epoch": 0.54,
228
+ "learning_rate": 1.892128279883382e-05,
229
+ "loss": 0.4102,
230
+ "step": 185
231
+ },
232
+ {
233
+ "epoch": 0.55,
234
+ "learning_rate": 1.8892128279883384e-05,
235
+ "loss": 0.4152,
236
+ "step": 190
237
+ },
238
+ {
239
+ "epoch": 0.57,
240
+ "learning_rate": 1.8862973760932944e-05,
241
+ "loss": 0.4071,
242
+ "step": 195
243
+ },
244
+ {
245
+ "epoch": 0.58,
246
+ "learning_rate": 1.8833819241982508e-05,
247
+ "loss": 0.4138,
248
+ "step": 200
249
+ },
250
+ {
251
+ "epoch": 0.58,
252
+ "eval_accuracy": 0.7340279681930354,
253
+ "eval_accuracy_sklearn": 0.7340279681930354,
254
+ "eval_f1": 0.623921682659688,
255
+ "eval_loss": 0.5162232518196106,
256
+ "eval_precision": 0.6743138487324534,
257
+ "eval_recall": 0.5805375180375181,
258
+ "eval_runtime": 166.8611,
259
+ "eval_samples_per_second": 174.852,
260
+ "eval_steps_per_second": 10.931,
261
+ "step": 200
262
+ },
263
+ {
264
+ "epoch": 0.6,
265
+ "learning_rate": 1.8804664723032072e-05,
266
+ "loss": 0.3971,
267
+ "step": 205
268
+ },
269
+ {
270
+ "epoch": 0.61,
271
+ "learning_rate": 1.8775510204081636e-05,
272
+ "loss": 0.3886,
273
+ "step": 210
274
+ },
275
+ {
276
+ "epoch": 0.63,
277
+ "learning_rate": 1.8746355685131196e-05,
278
+ "loss": 0.4183,
279
+ "step": 215
280
+ },
281
+ {
282
+ "epoch": 0.64,
283
+ "learning_rate": 1.871720116618076e-05,
284
+ "loss": 0.4144,
285
+ "step": 220
286
+ },
287
+ {
288
+ "epoch": 0.66,
289
+ "learning_rate": 1.8688046647230323e-05,
290
+ "loss": 0.3996,
291
+ "step": 225
292
+ },
293
+ {
294
+ "epoch": 0.67,
295
+ "learning_rate": 1.8658892128279884e-05,
296
+ "loss": 0.3802,
297
+ "step": 230
298
+ },
299
+ {
300
+ "epoch": 0.69,
301
+ "learning_rate": 1.8629737609329448e-05,
302
+ "loss": 0.4201,
303
+ "step": 235
304
+ },
305
+ {
306
+ "epoch": 0.7,
307
+ "learning_rate": 1.8600583090379008e-05,
308
+ "loss": 0.3977,
309
+ "step": 240
310
+ },
311
+ {
312
+ "epoch": 0.71,
313
+ "learning_rate": 1.8571428571428575e-05,
314
+ "loss": 0.3927,
315
+ "step": 245
316
+ },
317
+ {
318
+ "epoch": 0.73,
319
+ "learning_rate": 1.8542274052478135e-05,
320
+ "loss": 0.387,
321
+ "step": 250
322
+ },
323
+ {
324
+ "epoch": 0.74,
325
+ "learning_rate": 1.85131195335277e-05,
326
+ "loss": 0.3892,
327
+ "step": 255
328
+ },
329
+ {
330
+ "epoch": 0.76,
331
+ "learning_rate": 1.848396501457726e-05,
332
+ "loss": 0.3728,
333
+ "step": 260
334
+ },
335
+ {
336
+ "epoch": 0.77,
337
+ "learning_rate": 1.8454810495626823e-05,
338
+ "loss": 0.3874,
339
+ "step": 265
340
+ },
341
+ {
342
+ "epoch": 0.79,
343
+ "learning_rate": 1.8425655976676387e-05,
344
+ "loss": 0.3806,
345
+ "step": 270
346
+ },
347
+ {
348
+ "epoch": 0.8,
349
+ "learning_rate": 1.8396501457725948e-05,
350
+ "loss": 0.3848,
351
+ "step": 275
352
+ },
353
+ {
354
+ "epoch": 0.82,
355
+ "learning_rate": 1.836734693877551e-05,
356
+ "loss": 0.3874,
357
+ "step": 280
358
+ },
359
+ {
360
+ "epoch": 0.83,
361
+ "learning_rate": 1.8338192419825075e-05,
362
+ "loss": 0.367,
363
+ "step": 285
364
+ },
365
+ {
366
+ "epoch": 0.85,
367
+ "learning_rate": 1.830903790087464e-05,
368
+ "loss": 0.3732,
369
+ "step": 290
370
+ },
371
+ {
372
+ "epoch": 0.86,
373
+ "learning_rate": 1.82798833819242e-05,
374
+ "loss": 0.3671,
375
+ "step": 295
376
+ },
377
+ {
378
+ "epoch": 0.87,
379
+ "learning_rate": 1.8250728862973763e-05,
380
+ "loss": 0.3776,
381
+ "step": 300
382
+ },
383
+ {
384
+ "epoch": 0.89,
385
+ "learning_rate": 1.8221574344023327e-05,
386
+ "loss": 0.3736,
387
+ "step": 305
388
+ },
389
+ {
390
+ "epoch": 0.9,
391
+ "learning_rate": 1.8192419825072887e-05,
392
+ "loss": 0.3657,
393
+ "step": 310
394
+ },
395
+ {
396
+ "epoch": 0.92,
397
+ "learning_rate": 1.816326530612245e-05,
398
+ "loss": 0.3591,
399
+ "step": 315
400
+ },
401
+ {
402
+ "epoch": 0.93,
403
+ "learning_rate": 1.813411078717201e-05,
404
+ "loss": 0.374,
405
+ "step": 320
406
+ },
407
+ {
408
+ "epoch": 0.95,
409
+ "learning_rate": 1.8104956268221575e-05,
410
+ "loss": 0.3747,
411
+ "step": 325
412
+ },
413
+ {
414
+ "epoch": 0.96,
415
+ "learning_rate": 1.807580174927114e-05,
416
+ "loss": 0.3609,
417
+ "step": 330
418
+ },
419
+ {
420
+ "epoch": 0.98,
421
+ "learning_rate": 1.8046647230320703e-05,
422
+ "loss": 0.3342,
423
+ "step": 335
424
+ },
425
+ {
426
+ "epoch": 0.99,
427
+ "learning_rate": 1.8017492711370263e-05,
428
+ "loss": 0.363,
429
+ "step": 340
430
+ },
431
+ {
432
+ "epoch": 1.01,
433
+ "learning_rate": 1.7988338192419827e-05,
434
+ "loss": 0.3569,
435
+ "step": 345
436
+ },
437
+ {
438
+ "epoch": 1.02,
439
+ "learning_rate": 1.795918367346939e-05,
440
+ "loss": 0.3517,
441
+ "step": 350
442
+ },
443
+ {
444
+ "epoch": 1.03,
445
+ "learning_rate": 1.793002915451895e-05,
446
+ "loss": 0.356,
447
+ "step": 355
448
+ },
449
+ {
450
+ "epoch": 1.05,
451
+ "learning_rate": 1.7900874635568515e-05,
452
+ "loss": 0.3314,
453
+ "step": 360
454
+ },
455
+ {
456
+ "epoch": 1.06,
457
+ "learning_rate": 1.7871720116618075e-05,
458
+ "loss": 0.3404,
459
+ "step": 365
460
+ },
461
+ {
462
+ "epoch": 1.08,
463
+ "learning_rate": 1.7842565597667642e-05,
464
+ "loss": 0.331,
465
+ "step": 370
466
+ },
467
+ {
468
+ "epoch": 1.09,
469
+ "learning_rate": 1.7813411078717202e-05,
470
+ "loss": 0.343,
471
+ "step": 375
472
+ },
473
+ {
474
+ "epoch": 1.11,
475
+ "learning_rate": 1.7784256559766766e-05,
476
+ "loss": 0.3249,
477
+ "step": 380
478
+ },
479
+ {
480
+ "epoch": 1.12,
481
+ "learning_rate": 1.7755102040816327e-05,
482
+ "loss": 0.3284,
483
+ "step": 385
484
+ },
485
+ {
486
+ "epoch": 1.14,
487
+ "learning_rate": 1.772594752186589e-05,
488
+ "loss": 0.3297,
489
+ "step": 390
490
+ },
491
+ {
492
+ "epoch": 1.15,
493
+ "learning_rate": 1.7696793002915454e-05,
494
+ "loss": 0.3462,
495
+ "step": 395
496
+ },
497
+ {
498
+ "epoch": 1.17,
499
+ "learning_rate": 1.7667638483965014e-05,
500
+ "loss": 0.3318,
501
+ "step": 400
502
+ },
503
+ {
504
+ "epoch": 1.17,
505
+ "eval_accuracy": 0.7431793254729915,
506
+ "eval_accuracy_sklearn": 0.7431793254729915,
507
+ "eval_f1": 0.67462764340614,
508
+ "eval_loss": 0.5504330396652222,
509
+ "eval_precision": 0.650531781257851,
510
+ "eval_recall": 0.7005772005772006,
511
+ "eval_runtime": 168.6146,
512
+ "eval_samples_per_second": 173.034,
513
+ "eval_steps_per_second": 10.818,
514
+ "step": 400
515
+ },
516
+ {
517
+ "epoch": 1.18,
518
+ "learning_rate": 1.7638483965014578e-05,
519
+ "loss": 0.3257,
520
+ "step": 405
521
+ },
522
+ {
523
+ "epoch": 1.2,
524
+ "learning_rate": 1.7609329446064142e-05,
525
+ "loss": 0.3325,
526
+ "step": 410
527
+ },
528
+ {
529
+ "epoch": 1.21,
530
+ "learning_rate": 1.7580174927113706e-05,
531
+ "loss": 0.3346,
532
+ "step": 415
533
+ },
534
+ {
535
+ "epoch": 1.22,
536
+ "learning_rate": 1.7551020408163266e-05,
537
+ "loss": 0.3549,
538
+ "step": 420
539
+ },
540
+ {
541
+ "epoch": 1.24,
542
+ "learning_rate": 1.752186588921283e-05,
543
+ "loss": 0.344,
544
+ "step": 425
545
+ },
546
+ {
547
+ "epoch": 1.25,
548
+ "learning_rate": 1.749271137026239e-05,
549
+ "loss": 0.3504,
550
+ "step": 430
551
+ },
552
+ {
553
+ "epoch": 1.27,
554
+ "learning_rate": 1.7463556851311957e-05,
555
+ "loss": 0.3281,
556
+ "step": 435
557
+ },
558
+ {
559
+ "epoch": 1.28,
560
+ "learning_rate": 1.7434402332361518e-05,
561
+ "loss": 0.3515,
562
+ "step": 440
563
+ },
564
+ {
565
+ "epoch": 1.3,
566
+ "learning_rate": 1.7405247813411078e-05,
567
+ "loss": 0.3384,
568
+ "step": 445
569
+ },
570
+ {
571
+ "epoch": 1.31,
572
+ "learning_rate": 1.7376093294460642e-05,
573
+ "loss": 0.339,
574
+ "step": 450
575
+ },
576
+ {
577
+ "epoch": 1.33,
578
+ "learning_rate": 1.7346938775510206e-05,
579
+ "loss": 0.3186,
580
+ "step": 455
581
+ },
582
+ {
583
+ "epoch": 1.34,
584
+ "learning_rate": 1.731778425655977e-05,
585
+ "loss": 0.3471,
586
+ "step": 460
587
+ },
588
+ {
589
+ "epoch": 1.36,
590
+ "learning_rate": 1.728862973760933e-05,
591
+ "loss": 0.333,
592
+ "step": 465
593
+ },
594
+ {
595
+ "epoch": 1.37,
596
+ "learning_rate": 1.7259475218658894e-05,
597
+ "loss": 0.2864,
598
+ "step": 470
599
+ },
600
+ {
601
+ "epoch": 1.38,
602
+ "learning_rate": 1.7230320699708457e-05,
603
+ "loss": 0.3198,
604
+ "step": 475
605
+ },
606
+ {
607
+ "epoch": 1.4,
608
+ "learning_rate": 1.720116618075802e-05,
609
+ "loss": 0.3165,
610
+ "step": 480
611
+ },
612
+ {
613
+ "epoch": 1.41,
614
+ "learning_rate": 1.717201166180758e-05,
615
+ "loss": 0.3007,
616
+ "step": 485
617
+ },
618
+ {
619
+ "epoch": 1.43,
620
+ "learning_rate": 1.7142857142857142e-05,
621
+ "loss": 0.2981,
622
+ "step": 490
623
+ },
624
+ {
625
+ "epoch": 1.44,
626
+ "learning_rate": 1.7113702623906706e-05,
627
+ "loss": 0.3166,
628
+ "step": 495
629
+ },
630
+ {
631
+ "epoch": 1.46,
632
+ "learning_rate": 1.708454810495627e-05,
633
+ "loss": 0.3288,
634
+ "step": 500
635
+ },
636
+ {
637
+ "epoch": 1.47,
638
+ "learning_rate": 1.7055393586005833e-05,
639
+ "loss": 0.3078,
640
+ "step": 505
641
+ },
642
+ {
643
+ "epoch": 1.49,
644
+ "learning_rate": 1.7026239067055393e-05,
645
+ "loss": 0.3049,
646
+ "step": 510
647
+ },
648
+ {
649
+ "epoch": 1.5,
650
+ "learning_rate": 1.6997084548104957e-05,
651
+ "loss": 0.3075,
652
+ "step": 515
653
+ },
654
+ {
655
+ "epoch": 1.52,
656
+ "learning_rate": 1.696793002915452e-05,
657
+ "loss": 0.3119,
658
+ "step": 520
659
+ },
660
+ {
661
+ "epoch": 1.53,
662
+ "learning_rate": 1.6938775510204085e-05,
663
+ "loss": 0.3032,
664
+ "step": 525
665
+ },
666
+ {
667
+ "epoch": 1.55,
668
+ "learning_rate": 1.6909620991253645e-05,
669
+ "loss": 0.2987,
670
+ "step": 530
671
+ },
672
+ {
673
+ "epoch": 1.56,
674
+ "learning_rate": 1.688046647230321e-05,
675
+ "loss": 0.324,
676
+ "step": 535
677
+ },
678
+ {
679
+ "epoch": 1.57,
680
+ "learning_rate": 1.6851311953352773e-05,
681
+ "loss": 0.3209,
682
+ "step": 540
683
+ },
684
+ {
685
+ "epoch": 1.59,
686
+ "learning_rate": 1.6822157434402333e-05,
687
+ "loss": 0.2964,
688
+ "step": 545
689
+ },
690
+ {
691
+ "epoch": 1.6,
692
+ "learning_rate": 1.6793002915451897e-05,
693
+ "loss": 0.3002,
694
+ "step": 550
695
+ },
696
+ {
697
+ "epoch": 1.62,
698
+ "learning_rate": 1.6763848396501457e-05,
699
+ "loss": 0.2872,
700
+ "step": 555
701
+ },
702
+ {
703
+ "epoch": 1.63,
704
+ "learning_rate": 1.673469387755102e-05,
705
+ "loss": 0.2999,
706
+ "step": 560
707
+ },
708
+ {
709
+ "epoch": 1.65,
710
+ "learning_rate": 1.6705539358600585e-05,
711
+ "loss": 0.3005,
712
+ "step": 565
713
+ },
714
+ {
715
+ "epoch": 1.66,
716
+ "learning_rate": 1.667638483965015e-05,
717
+ "loss": 0.2931,
718
+ "step": 570
719
+ },
720
+ {
721
+ "epoch": 1.68,
722
+ "learning_rate": 1.664723032069971e-05,
723
+ "loss": 0.2708,
724
+ "step": 575
725
+ },
726
+ {
727
+ "epoch": 1.69,
728
+ "learning_rate": 1.6618075801749273e-05,
729
+ "loss": 0.2525,
730
+ "step": 580
731
+ },
732
+ {
733
+ "epoch": 1.71,
734
+ "learning_rate": 1.6588921282798836e-05,
735
+ "loss": 0.2908,
736
+ "step": 585
737
+ },
738
+ {
739
+ "epoch": 1.72,
740
+ "learning_rate": 1.6559766763848397e-05,
741
+ "loss": 0.2732,
742
+ "step": 590
743
+ },
744
+ {
745
+ "epoch": 1.73,
746
+ "learning_rate": 1.653061224489796e-05,
747
+ "loss": 0.2932,
748
+ "step": 595
749
+ },
750
+ {
751
+ "epoch": 1.75,
752
+ "learning_rate": 1.6501457725947524e-05,
753
+ "loss": 0.2876,
754
+ "step": 600
755
+ },
756
+ {
757
+ "epoch": 1.75,
758
+ "eval_accuracy": 0.7540444200712915,
759
+ "eval_accuracy_sklearn": 0.7540444200712915,
760
+ "eval_f1": 0.6790984706198014,
761
+ "eval_loss": 0.5282062292098999,
762
+ "eval_precision": 0.6734965407131452,
763
+ "eval_recall": 0.6847943722943723,
764
+ "eval_runtime": 192.3562,
765
+ "eval_samples_per_second": 151.677,
766
+ "eval_steps_per_second": 9.482,
767
+ "step": 600
768
+ },
769
+ {
770
+ "epoch": 1.76,
771
+ "learning_rate": 1.6472303206997088e-05,
772
+ "loss": 0.302,
773
+ "step": 605
774
+ },
775
+ {
776
+ "epoch": 1.78,
777
+ "learning_rate": 1.644314868804665e-05,
778
+ "loss": 0.2874,
779
+ "step": 610
780
+ },
781
+ {
782
+ "epoch": 1.79,
783
+ "learning_rate": 1.6413994169096212e-05,
784
+ "loss": 0.2704,
785
+ "step": 615
786
+ },
787
+ {
788
+ "epoch": 1.81,
789
+ "learning_rate": 1.6384839650145773e-05,
790
+ "loss": 0.2649,
791
+ "step": 620
792
+ },
793
+ {
794
+ "epoch": 1.82,
795
+ "learning_rate": 1.6355685131195336e-05,
796
+ "loss": 0.2798,
797
+ "step": 625
798
+ },
799
+ {
800
+ "epoch": 1.84,
801
+ "learning_rate": 1.63265306122449e-05,
802
+ "loss": 0.2794,
803
+ "step": 630
804
+ },
805
+ {
806
+ "epoch": 1.85,
807
+ "learning_rate": 1.629737609329446e-05,
808
+ "loss": 0.2658,
809
+ "step": 635
810
+ },
811
+ {
812
+ "epoch": 1.87,
813
+ "learning_rate": 1.6268221574344024e-05,
814
+ "loss": 0.2816,
815
+ "step": 640
816
+ },
817
+ {
818
+ "epoch": 1.88,
819
+ "learning_rate": 1.6239067055393588e-05,
820
+ "loss": 0.2817,
821
+ "step": 645
822
+ },
823
+ {
824
+ "epoch": 1.9,
825
+ "learning_rate": 1.6209912536443152e-05,
826
+ "loss": 0.2698,
827
+ "step": 650
828
+ },
829
+ {
830
+ "epoch": 1.91,
831
+ "learning_rate": 1.6180758017492712e-05,
832
+ "loss": 0.2797,
833
+ "step": 655
834
+ },
835
+ {
836
+ "epoch": 1.92,
837
+ "learning_rate": 1.6151603498542276e-05,
838
+ "loss": 0.2941,
839
+ "step": 660
840
+ },
841
+ {
842
+ "epoch": 1.94,
843
+ "learning_rate": 1.612244897959184e-05,
844
+ "loss": 0.2784,
845
+ "step": 665
846
+ },
847
+ {
848
+ "epoch": 1.95,
849
+ "learning_rate": 1.60932944606414e-05,
850
+ "loss": 0.2913,
851
+ "step": 670
852
+ },
853
+ {
854
+ "epoch": 1.97,
855
+ "learning_rate": 1.6064139941690964e-05,
856
+ "loss": 0.2609,
857
+ "step": 675
858
+ },
859
+ {
860
+ "epoch": 1.98,
861
+ "learning_rate": 1.6034985422740524e-05,
862
+ "loss": 0.2767,
863
+ "step": 680
864
+ },
865
+ {
866
+ "epoch": 2.0,
867
+ "learning_rate": 1.6005830903790088e-05,
868
+ "loss": 0.2816,
869
+ "step": 685
870
+ },
871
+ {
872
+ "epoch": 2.01,
873
+ "learning_rate": 1.597667638483965e-05,
874
+ "loss": 0.2703,
875
+ "step": 690
876
+ },
877
+ {
878
+ "epoch": 2.03,
879
+ "learning_rate": 1.5947521865889215e-05,
880
+ "loss": 0.2857,
881
+ "step": 695
882
+ },
883
+ {
884
+ "epoch": 2.04,
885
+ "learning_rate": 1.5918367346938776e-05,
886
+ "loss": 0.2429,
887
+ "step": 700
888
+ },
889
+ {
890
+ "epoch": 2.06,
891
+ "learning_rate": 1.588921282798834e-05,
892
+ "loss": 0.2597,
893
+ "step": 705
894
+ },
895
+ {
896
+ "epoch": 2.07,
897
+ "learning_rate": 1.5860058309037903e-05,
898
+ "loss": 0.2666,
899
+ "step": 710
900
+ },
901
+ {
902
+ "epoch": 2.08,
903
+ "learning_rate": 1.5830903790087464e-05,
904
+ "loss": 0.2438,
905
+ "step": 715
906
+ },
907
+ {
908
+ "epoch": 2.1,
909
+ "learning_rate": 1.5801749271137027e-05,
910
+ "loss": 0.2628,
911
+ "step": 720
912
+ },
913
+ {
914
+ "epoch": 2.11,
915
+ "learning_rate": 1.577259475218659e-05,
916
+ "loss": 0.2574,
917
+ "step": 725
918
+ },
919
+ {
920
+ "epoch": 2.13,
921
+ "learning_rate": 1.5743440233236155e-05,
922
+ "loss": 0.2444,
923
+ "step": 730
924
+ },
925
+ {
926
+ "epoch": 2.14,
927
+ "learning_rate": 1.5714285714285715e-05,
928
+ "loss": 0.2554,
929
+ "step": 735
930
+ },
931
+ {
932
+ "epoch": 2.16,
933
+ "learning_rate": 1.568513119533528e-05,
934
+ "loss": 0.2508,
935
+ "step": 740
936
+ },
937
+ {
938
+ "epoch": 2.17,
939
+ "learning_rate": 1.565597667638484e-05,
940
+ "loss": 0.2386,
941
+ "step": 745
942
+ },
943
+ {
944
+ "epoch": 2.19,
945
+ "learning_rate": 1.5626822157434403e-05,
946
+ "loss": 0.2509,
947
+ "step": 750
948
+ },
949
+ {
950
+ "epoch": 2.2,
951
+ "learning_rate": 1.5597667638483967e-05,
952
+ "loss": 0.2644,
953
+ "step": 755
954
+ },
955
+ {
956
+ "epoch": 2.22,
957
+ "learning_rate": 1.5568513119533527e-05,
958
+ "loss": 0.2683,
959
+ "step": 760
960
+ },
961
+ {
962
+ "epoch": 2.23,
963
+ "learning_rate": 1.553935860058309e-05,
964
+ "loss": 0.2637,
965
+ "step": 765
966
+ },
967
+ {
968
+ "epoch": 2.24,
969
+ "learning_rate": 1.5510204081632655e-05,
970
+ "loss": 0.2588,
971
+ "step": 770
972
+ },
973
+ {
974
+ "epoch": 2.26,
975
+ "learning_rate": 1.548104956268222e-05,
976
+ "loss": 0.2567,
977
+ "step": 775
978
+ },
979
+ {
980
+ "epoch": 2.27,
981
+ "learning_rate": 1.545189504373178e-05,
982
+ "loss": 0.2339,
983
+ "step": 780
984
+ },
985
+ {
986
+ "epoch": 2.29,
987
+ "learning_rate": 1.5422740524781343e-05,
988
+ "loss": 0.246,
989
+ "step": 785
990
+ },
991
+ {
992
+ "epoch": 2.3,
993
+ "learning_rate": 1.5393586005830907e-05,
994
+ "loss": 0.2458,
995
+ "step": 790
996
+ },
997
+ {
998
+ "epoch": 2.32,
999
+ "learning_rate": 1.5364431486880467e-05,
1000
+ "loss": 0.2464,
1001
+ "step": 795
1002
+ },
1003
+ {
1004
+ "epoch": 2.33,
1005
+ "learning_rate": 1.533527696793003e-05,
1006
+ "loss": 0.2495,
1007
+ "step": 800
1008
+ },
1009
+ {
1010
+ "epoch": 2.33,
1011
+ "eval_accuracy": 0.7544899917740608,
1012
+ "eval_accuracy_sklearn": 0.7544899917740608,
1013
+ "eval_f1": 0.6955412929825308,
1014
+ "eval_loss": 0.5660556554794312,
1015
+ "eval_precision": 0.657769917195916,
1016
+ "eval_recall": 0.737914862914863,
1017
+ "eval_runtime": 168.3317,
1018
+ "eval_samples_per_second": 173.324,
1019
+ "eval_steps_per_second": 10.836,
1020
+ "step": 800
1021
+ }
1022
+ ],
1023
+ "max_steps": 3430,
1024
+ "num_train_epochs": 10,
1025
+ "total_flos": 1.0774766072469504e+17,
1026
+ "trial_name": null,
1027
+ "trial_params": null
1028
+ }
ranker_9/checkpoint-800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d89bd60f804d35602b8d9d64ae5a06e7c1e709434ab3891a2163119e8fb21fa
3
+ size 3451
ranker_9/checkpoint-800/vocab.json ADDED
The diff for this file is too large to render. See raw diff