seanfarrell commited on
Commit
1ce5e6a
·
verified ·
1 Parent(s): 3ed977b

Initial Push

Browse files
config.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "save_none_newset/bert-base-uncased-finetuned-Vet/checkpoint-664281",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "Diseases of the Ear or Mastoid Process",
14
+ "1": "Mental, behavioural or neurodevelopmental disorders",
15
+ "2": "Diseases of the blood or blood-forming organs",
16
+ "3": "Diseases of the circulatory system",
17
+ "4": "Dental",
18
+ "5": "Developmental anomalies",
19
+ "6": "Diseases of the digestive system",
20
+ "7": "Endocrine, nutritional or metabolic diseases",
21
+ "8": "Diseases of the immune system",
22
+ "9": "Certain infectious or parasitic diseases",
23
+ "10": "Diseases of the skin",
24
+ "11": "Diseases of the musculoskeletal system or connective tissue",
25
+ "12": "Neoplasms",
26
+ "13": "Diseases of the nervous system",
27
+ "14": "Diseases of the visual system",
28
+ "15": "Certain conditions originating in the perinatal period",
29
+ "16": "Pregnancy, childbirth or the puerperium",
30
+ "17": "Diseases of the respiratory system",
31
+ "18": "Injury, poisoning or certain other consequences of external causes",
32
+ "19": "Diseases of the genitourinary system"
33
+ },
34
+ "initializer_range": 0.02,
35
+ "intermediate_size": 3072,
36
+ "label2id": {
37
+ "Auditory_Labels": 0,
38
+ "Behaviour_Labels": 1,
39
+ "Bloods_Labels": 2,
40
+ "Circulatory_Labels": 3,
41
+ "Dental_Labels": 4,
42
+ "Developmental_Labels": 5,
43
+ "Digestive_Labels": 6,
44
+ "Endocrine_Labels": 7,
45
+ "Immuno_Labels": 8,
46
+ "Infectious_Parasites_Labels": 9,
47
+ "Integument_Labels": 10,
48
+ "Muskuloskeletal_Labels": 11,
49
+ "Neoplasia_Labels": 12,
50
+ "Neurological_Labels": 13,
51
+ "Ocular_Labels": 14,
52
+ "Perinetal_Period_Labels": 15,
53
+ "Pregnancy_Labels": 16,
54
+ "Respiratory_Labels": 17,
55
+ "Trauma_Labels": 18,
56
+ "Urinary_Labels": 19
57
+ },
58
+ "layer_norm_eps": 1e-12,
59
+ "max_position_embeddings": 512,
60
+ "model_type": "bert",
61
+ "num_attention_heads": 12,
62
+ "num_hidden_layers": 12,
63
+ "pad_token_id": 0,
64
+ "position_embedding_type": "absolute",
65
+ "problem_type": "multi_label_classification",
66
+ "torch_dtype": "float32",
67
+ "transformers_version": "4.25.1",
68
+ "type_vocab_size": 2,
69
+ "use_cache": true,
70
+ "vocab_size": 30522
71
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ee54e004fd69462e7b31f7e58e37cc6218f375f9a87139e5b5ce976eed93513
3
+ size 876099269
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:796d735d71d023a448c35de6588575227709e79379e58ba85611ab2f96c459f4
3
+ size 438063285
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f3b9440ddebd5723aad02a9f7bda79a7a720dacd84102cfcb95d15cf251078b
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a275e608dcb0c497024b05608e3705d633b9b9c98a81b1ddcc59e8c31427f50f
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "do_lower_case": true,
4
+ "mask_token": "[MASK]",
5
+ "model_max_length": 512,
6
+ "name_or_path": "bert-base-uncased",
7
+ "pad_token": "[PAD]",
8
+ "problem_type": "multi_label_classification",
9
+ "sep_token": "[SEP]",
10
+ "special_tokens_map_file": null,
11
+ "strip_accents": null,
12
+ "tokenize_chinese_chars": true,
13
+ "tokenizer_class": "BertTokenizer",
14
+ "unk_token": "[UNK]"
15
+ }
trainer_state.json ADDED
@@ -0,0 +1,535 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.9293855295231974,
3
+ "best_model_checkpoint": "Datasets/ICD/ICD_11_multilabel_MultiLabelSoftMarginLoss_FINAL_final/checkpoint-70317",
4
+ "epoch": 9.0,
5
+ "global_step": 70317,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.13,
12
+ "learning_rate": 4.9677959551719697e-05,
13
+ "loss": 0.6152,
14
+ "step": 1000
15
+ },
16
+ {
17
+ "epoch": 0.26,
18
+ "learning_rate": 4.9033878655159085e-05,
19
+ "loss": 0.186,
20
+ "step": 2000
21
+ },
22
+ {
23
+ "epoch": 0.38,
24
+ "learning_rate": 4.838979775859848e-05,
25
+ "loss": 0.1556,
26
+ "step": 3000
27
+ },
28
+ {
29
+ "epoch": 0.51,
30
+ "learning_rate": 4.7745716862037875e-05,
31
+ "loss": 0.1494,
32
+ "step": 4000
33
+ },
34
+ {
35
+ "epoch": 0.64,
36
+ "learning_rate": 4.710163596547726e-05,
37
+ "loss": 0.1352,
38
+ "step": 5000
39
+ },
40
+ {
41
+ "epoch": 0.77,
42
+ "learning_rate": 4.645755506891666e-05,
43
+ "loss": 0.132,
44
+ "step": 6000
45
+ },
46
+ {
47
+ "epoch": 0.9,
48
+ "learning_rate": 4.581347417235605e-05,
49
+ "loss": 0.1282,
50
+ "step": 7000
51
+ },
52
+ {
53
+ "epoch": 1.0,
54
+ "eval_accuracy": 0.726872,
55
+ "eval_f1": 0.8632296736577655,
56
+ "eval_loss": 0.12385939061641693,
57
+ "eval_roc_auc": 0.8871861128983161,
58
+ "eval_runtime": 1129.5296,
59
+ "eval_samples_per_second": 221.331,
60
+ "eval_steps_per_second": 6.917,
61
+ "step": 7813
62
+ },
63
+ {
64
+ "epoch": 1.02,
65
+ "learning_rate": 4.516939327579544e-05,
66
+ "loss": 0.1195,
67
+ "step": 8000
68
+ },
69
+ {
70
+ "epoch": 1.15,
71
+ "learning_rate": 4.4525312379234836e-05,
72
+ "loss": 0.0897,
73
+ "step": 9000
74
+ },
75
+ {
76
+ "epoch": 1.28,
77
+ "learning_rate": 4.3881231482674224e-05,
78
+ "loss": 0.0911,
79
+ "step": 10000
80
+ },
81
+ {
82
+ "epoch": 1.41,
83
+ "learning_rate": 4.323715058611362e-05,
84
+ "loss": 0.0911,
85
+ "step": 11000
86
+ },
87
+ {
88
+ "epoch": 1.54,
89
+ "learning_rate": 4.2593069689553014e-05,
90
+ "loss": 0.093,
91
+ "step": 12000
92
+ },
93
+ {
94
+ "epoch": 1.66,
95
+ "learning_rate": 4.19489887929924e-05,
96
+ "loss": 0.093,
97
+ "step": 13000
98
+ },
99
+ {
100
+ "epoch": 1.79,
101
+ "learning_rate": 4.130490789643179e-05,
102
+ "loss": 0.0913,
103
+ "step": 14000
104
+ },
105
+ {
106
+ "epoch": 1.92,
107
+ "learning_rate": 4.0660826999871186e-05,
108
+ "loss": 0.0896,
109
+ "step": 15000
110
+ },
111
+ {
112
+ "epoch": 2.0,
113
+ "eval_accuracy": 0.780268,
114
+ "eval_f1": 0.9023734184296859,
115
+ "eval_loss": 0.10922548174858093,
116
+ "eval_roc_auc": 0.9264076688968317,
117
+ "eval_runtime": 1128.1964,
118
+ "eval_samples_per_second": 221.593,
119
+ "eval_steps_per_second": 6.925,
120
+ "step": 15626
121
+ },
122
+ {
123
+ "epoch": 2.05,
124
+ "learning_rate": 4.0016746103310574e-05,
125
+ "loss": 0.077,
126
+ "step": 16000
127
+ },
128
+ {
129
+ "epoch": 2.18,
130
+ "learning_rate": 3.937266520674997e-05,
131
+ "loss": 0.057,
132
+ "step": 17000
133
+ },
134
+ {
135
+ "epoch": 2.3,
136
+ "learning_rate": 3.8728584310189364e-05,
137
+ "loss": 0.0571,
138
+ "step": 18000
139
+ },
140
+ {
141
+ "epoch": 2.43,
142
+ "learning_rate": 3.808450341362875e-05,
143
+ "loss": 0.0613,
144
+ "step": 19000
145
+ },
146
+ {
147
+ "epoch": 2.56,
148
+ "learning_rate": 3.744042251706815e-05,
149
+ "loss": 0.062,
150
+ "step": 20000
151
+ },
152
+ {
153
+ "epoch": 2.69,
154
+ "learning_rate": 3.6796341620507536e-05,
155
+ "loss": 0.0603,
156
+ "step": 21000
157
+ },
158
+ {
159
+ "epoch": 2.82,
160
+ "learning_rate": 3.615226072394693e-05,
161
+ "loss": 0.0596,
162
+ "step": 22000
163
+ },
164
+ {
165
+ "epoch": 2.94,
166
+ "learning_rate": 3.5508179827386326e-05,
167
+ "loss": 0.0596,
168
+ "step": 23000
169
+ },
170
+ {
171
+ "epoch": 3.0,
172
+ "eval_accuracy": 0.796324,
173
+ "eval_f1": 0.9105269872918949,
174
+ "eval_loss": 0.11485119163990021,
175
+ "eval_roc_auc": 0.9327701339514269,
176
+ "eval_runtime": 1128.1956,
177
+ "eval_samples_per_second": 221.593,
178
+ "eval_steps_per_second": 6.925,
179
+ "step": 23439
180
+ },
181
+ {
182
+ "epoch": 3.07,
183
+ "learning_rate": 3.4864098930825714e-05,
184
+ "loss": 0.0464,
185
+ "step": 24000
186
+ },
187
+ {
188
+ "epoch": 3.2,
189
+ "learning_rate": 3.422001803426511e-05,
190
+ "loss": 0.0364,
191
+ "step": 25000
192
+ },
193
+ {
194
+ "epoch": 3.33,
195
+ "learning_rate": 3.35759371377045e-05,
196
+ "loss": 0.0388,
197
+ "step": 26000
198
+ },
199
+ {
200
+ "epoch": 3.46,
201
+ "learning_rate": 3.2931856241143885e-05,
202
+ "loss": 0.0379,
203
+ "step": 27000
204
+ },
205
+ {
206
+ "epoch": 3.58,
207
+ "learning_rate": 3.228777534458328e-05,
208
+ "loss": 0.0412,
209
+ "step": 28000
210
+ },
211
+ {
212
+ "epoch": 3.71,
213
+ "learning_rate": 3.164369444802267e-05,
214
+ "loss": 0.0415,
215
+ "step": 29000
216
+ },
217
+ {
218
+ "epoch": 3.84,
219
+ "learning_rate": 3.0999613551462063e-05,
220
+ "loss": 0.0398,
221
+ "step": 30000
222
+ },
223
+ {
224
+ "epoch": 3.97,
225
+ "learning_rate": 3.035553265490146e-05,
226
+ "loss": 0.0391,
227
+ "step": 31000
228
+ },
229
+ {
230
+ "epoch": 4.0,
231
+ "eval_accuracy": 0.805976,
232
+ "eval_f1": 0.9167903002642254,
233
+ "eval_loss": 0.13589395582675934,
234
+ "eval_roc_auc": 0.9395522583821788,
235
+ "eval_runtime": 1127.0647,
236
+ "eval_samples_per_second": 221.815,
237
+ "eval_steps_per_second": 6.932,
238
+ "step": 31252
239
+ },
240
+ {
241
+ "epoch": 4.1,
242
+ "learning_rate": 2.9711451758340847e-05,
243
+ "loss": 0.0267,
244
+ "step": 32000
245
+ },
246
+ {
247
+ "epoch": 4.22,
248
+ "learning_rate": 2.906737086178024e-05,
249
+ "loss": 0.0251,
250
+ "step": 33000
251
+ },
252
+ {
253
+ "epoch": 4.35,
254
+ "learning_rate": 2.8423289965219633e-05,
255
+ "loss": 0.0251,
256
+ "step": 34000
257
+ },
258
+ {
259
+ "epoch": 4.48,
260
+ "learning_rate": 2.7779209068659025e-05,
261
+ "loss": 0.0243,
262
+ "step": 35000
263
+ },
264
+ {
265
+ "epoch": 4.61,
266
+ "learning_rate": 2.7135128172098416e-05,
267
+ "loss": 0.0248,
268
+ "step": 36000
269
+ },
270
+ {
271
+ "epoch": 4.74,
272
+ "learning_rate": 2.649104727553781e-05,
273
+ "loss": 0.0264,
274
+ "step": 37000
275
+ },
276
+ {
277
+ "epoch": 4.86,
278
+ "learning_rate": 2.58469663789772e-05,
279
+ "loss": 0.0257,
280
+ "step": 38000
281
+ },
282
+ {
283
+ "epoch": 4.99,
284
+ "learning_rate": 2.5202885482416595e-05,
285
+ "loss": 0.0275,
286
+ "step": 39000
287
+ },
288
+ {
289
+ "epoch": 5.0,
290
+ "eval_accuracy": 0.811504,
291
+ "eval_f1": 0.9201141643125628,
292
+ "eval_loss": 0.1432325541973114,
293
+ "eval_roc_auc": 0.944372612395835,
294
+ "eval_runtime": 1127.5732,
295
+ "eval_samples_per_second": 221.715,
296
+ "eval_steps_per_second": 6.929,
297
+ "step": 39065
298
+ },
299
+ {
300
+ "epoch": 5.12,
301
+ "learning_rate": 2.4558804585855986e-05,
302
+ "loss": 0.0156,
303
+ "step": 40000
304
+ },
305
+ {
306
+ "epoch": 5.25,
307
+ "learning_rate": 2.3914723689295378e-05,
308
+ "loss": 0.0152,
309
+ "step": 41000
310
+ },
311
+ {
312
+ "epoch": 5.38,
313
+ "learning_rate": 2.3270642792734766e-05,
314
+ "loss": 0.0153,
315
+ "step": 42000
316
+ },
317
+ {
318
+ "epoch": 5.5,
319
+ "learning_rate": 2.262656189617416e-05,
320
+ "loss": 0.0153,
321
+ "step": 43000
322
+ },
323
+ {
324
+ "epoch": 5.63,
325
+ "learning_rate": 2.1982480999613553e-05,
326
+ "loss": 0.0157,
327
+ "step": 44000
328
+ },
329
+ {
330
+ "epoch": 5.76,
331
+ "learning_rate": 2.1338400103052944e-05,
332
+ "loss": 0.0183,
333
+ "step": 45000
334
+ },
335
+ {
336
+ "epoch": 5.89,
337
+ "learning_rate": 2.0694319206492336e-05,
338
+ "loss": 0.0165,
339
+ "step": 46000
340
+ },
341
+ {
342
+ "epoch": 6.0,
343
+ "eval_accuracy": 0.820196,
344
+ "eval_f1": 0.926118284939716,
345
+ "eval_loss": 0.1556038111448288,
346
+ "eval_roc_auc": 0.9567798902277629,
347
+ "eval_runtime": 1129.3479,
348
+ "eval_samples_per_second": 221.367,
349
+ "eval_steps_per_second": 6.918,
350
+ "step": 46878
351
+ },
352
+ {
353
+ "epoch": 6.02,
354
+ "learning_rate": 2.0050238309931728e-05,
355
+ "loss": 0.014,
356
+ "step": 47000
357
+ },
358
+ {
359
+ "epoch": 6.14,
360
+ "learning_rate": 1.940615741337112e-05,
361
+ "loss": 0.0095,
362
+ "step": 48000
363
+ },
364
+ {
365
+ "epoch": 6.27,
366
+ "learning_rate": 1.876207651681051e-05,
367
+ "loss": 0.0101,
368
+ "step": 49000
369
+ },
370
+ {
371
+ "epoch": 6.4,
372
+ "learning_rate": 1.8117995620249902e-05,
373
+ "loss": 0.0108,
374
+ "step": 50000
375
+ },
376
+ {
377
+ "epoch": 6.53,
378
+ "learning_rate": 1.7473914723689297e-05,
379
+ "loss": 0.0098,
380
+ "step": 51000
381
+ },
382
+ {
383
+ "epoch": 6.66,
384
+ "learning_rate": 1.682983382712869e-05,
385
+ "loss": 0.0124,
386
+ "step": 52000
387
+ },
388
+ {
389
+ "epoch": 6.78,
390
+ "learning_rate": 1.618575293056808e-05,
391
+ "loss": 0.0104,
392
+ "step": 53000
393
+ },
394
+ {
395
+ "epoch": 6.91,
396
+ "learning_rate": 1.5541672034007472e-05,
397
+ "loss": 0.0102,
398
+ "step": 54000
399
+ },
400
+ {
401
+ "epoch": 7.0,
402
+ "eval_accuracy": 0.822104,
403
+ "eval_f1": 0.9267807957481025,
404
+ "eval_loss": 0.17248359322547913,
405
+ "eval_roc_auc": 0.9574790529696302,
406
+ "eval_runtime": 1129.5316,
407
+ "eval_samples_per_second": 221.331,
408
+ "eval_steps_per_second": 6.917,
409
+ "step": 54691
410
+ },
411
+ {
412
+ "epoch": 7.04,
413
+ "learning_rate": 1.4897591137446864e-05,
414
+ "loss": 0.0086,
415
+ "step": 55000
416
+ },
417
+ {
418
+ "epoch": 7.17,
419
+ "learning_rate": 1.4253510240886256e-05,
420
+ "loss": 0.0061,
421
+ "step": 56000
422
+ },
423
+ {
424
+ "epoch": 7.3,
425
+ "learning_rate": 1.3609429344325647e-05,
426
+ "loss": 0.0056,
427
+ "step": 57000
428
+ },
429
+ {
430
+ "epoch": 7.42,
431
+ "learning_rate": 1.296534844776504e-05,
432
+ "loss": 0.0058,
433
+ "step": 58000
434
+ },
435
+ {
436
+ "epoch": 7.55,
437
+ "learning_rate": 1.2321267551204432e-05,
438
+ "loss": 0.0056,
439
+ "step": 59000
440
+ },
441
+ {
442
+ "epoch": 7.68,
443
+ "learning_rate": 1.1677186654643824e-05,
444
+ "loss": 0.0057,
445
+ "step": 60000
446
+ },
447
+ {
448
+ "epoch": 7.81,
449
+ "learning_rate": 1.1033105758083217e-05,
450
+ "loss": 0.0058,
451
+ "step": 61000
452
+ },
453
+ {
454
+ "epoch": 7.94,
455
+ "learning_rate": 1.0389024861522607e-05,
456
+ "loss": 0.006,
457
+ "step": 62000
458
+ },
459
+ {
460
+ "epoch": 8.0,
461
+ "eval_accuracy": 0.82428,
462
+ "eval_f1": 0.9274938850870897,
463
+ "eval_loss": 0.17893491685390472,
464
+ "eval_roc_auc": 0.954270360446044,
465
+ "eval_runtime": 1128.7984,
466
+ "eval_samples_per_second": 221.474,
467
+ "eval_steps_per_second": 6.922,
468
+ "step": 62504
469
+ },
470
+ {
471
+ "epoch": 8.06,
472
+ "learning_rate": 9.744943964962e-06,
473
+ "loss": 0.0043,
474
+ "step": 63000
475
+ },
476
+ {
477
+ "epoch": 8.19,
478
+ "learning_rate": 9.100863068401392e-06,
479
+ "loss": 0.0033,
480
+ "step": 64000
481
+ },
482
+ {
483
+ "epoch": 8.32,
484
+ "learning_rate": 8.456782171840783e-06,
485
+ "loss": 0.0036,
486
+ "step": 65000
487
+ },
488
+ {
489
+ "epoch": 8.45,
490
+ "learning_rate": 7.812701275280175e-06,
491
+ "loss": 0.003,
492
+ "step": 66000
493
+ },
494
+ {
495
+ "epoch": 8.58,
496
+ "learning_rate": 7.1686203787195675e-06,
497
+ "loss": 0.0033,
498
+ "step": 67000
499
+ },
500
+ {
501
+ "epoch": 8.7,
502
+ "learning_rate": 6.524539482158959e-06,
503
+ "loss": 0.0034,
504
+ "step": 68000
505
+ },
506
+ {
507
+ "epoch": 8.83,
508
+ "learning_rate": 5.8804585855983516e-06,
509
+ "loss": 0.003,
510
+ "step": 69000
511
+ },
512
+ {
513
+ "epoch": 8.96,
514
+ "learning_rate": 5.236377689037743e-06,
515
+ "loss": 0.0026,
516
+ "step": 70000
517
+ },
518
+ {
519
+ "epoch": 9.0,
520
+ "eval_accuracy": 0.826624,
521
+ "eval_f1": 0.9293855295231974,
522
+ "eval_loss": 0.19182080030441284,
523
+ "eval_roc_auc": 0.9598984635520128,
524
+ "eval_runtime": 1129.0264,
525
+ "eval_samples_per_second": 221.43,
526
+ "eval_steps_per_second": 6.92,
527
+ "step": 70317
528
+ }
529
+ ],
530
+ "max_steps": 78130,
531
+ "num_train_epochs": 10,
532
+ "total_flos": 3.639239973215278e+17,
533
+ "trial_name": null,
534
+ "trial_params": null
535
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8927ceaf37476b477000a14227235382b9936580ab93eea17858ec17276ba07e
3
+ size 3515
vocab.txt ADDED
The diff for this file is too large to render. See raw diff