jon-t commited on
Commit
9b3d276
verified
1 Parent(s): 2d84ffe

Model save

Browse files
README.md CHANGED
@@ -1,11 +1,7 @@
1
  ---
2
  library_name: transformers
3
- license: mit
4
- base_model: emilyalsentzer/Bio_ClinicalBERT
5
  tags:
6
  - generated_from_trainer
7
- datasets:
8
- - squad_v2
9
  model-index:
10
  - name: Bio_ClinicalBERT_QA
11
  results: []
@@ -16,7 +12,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # Bio_ClinicalBERT_QA
18
 
19
- This model is a fine-tuned version of [emilyalsentzer/Bio_ClinicalBERT](https://huggingface.co/emilyalsentzer/Bio_ClinicalBERT) on the squad_v2 dataset.
20
 
21
  ## Model description
22
 
 
1
  ---
2
  library_name: transformers
 
 
3
  tags:
4
  - generated_from_trainer
 
 
5
  model-index:
6
  - name: Bio_ClinicalBERT_QA
7
  results: []
 
12
 
13
  # Bio_ClinicalBERT_QA
14
 
15
+ This model was trained from scratch on an unknown dataset.
16
 
17
  ## Model description
18
 
all_results.json CHANGED
@@ -1,26 +1,26 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_HasAns_exact": 69.87179487179488,
4
- "eval_HasAns_f1": 76.91826514384819,
5
- "eval_HasAns_total": 5928,
6
- "eval_NoAns_exact": 76.16484440706476,
7
- "eval_NoAns_f1": 76.16484440706476,
8
- "eval_NoAns_total": 5945,
9
- "eval_best_exact": 73.02282489682473,
10
  "eval_best_exact_thresh": 0.0,
11
- "eval_best_f1": 76.54101539398066,
12
  "eval_best_f1_thresh": 0.0,
13
- "eval_exact": 73.02282489682473,
14
- "eval_f1": 76.54101539398063,
15
- "eval_runtime": 310.3268,
16
- "eval_samples": 11985,
17
- "eval_samples_per_second": 38.621,
18
- "eval_steps_per_second": 4.83,
19
- "eval_total": 11873,
20
- "total_flos": 6.833694078916608e+16,
21
- "train_loss": 1.1028300029987763,
22
- "train_runtime": 22205.1611,
23
- "train_samples": 130765,
24
- "train_samples_per_second": 11.778,
25
- "train_steps_per_second": 0.982
26
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "eval_HasAns_exact": 4.444444444444445,
4
+ "eval_HasAns_f1": 7.0136424940346505,
5
+ "eval_HasAns_total": 45,
6
+ "eval_NoAns_exact": 0.0,
7
+ "eval_NoAns_f1": 0.0,
8
+ "eval_NoAns_total": 55,
9
+ "eval_best_exact": 55.0,
10
  "eval_best_exact_thresh": 0.0,
11
+ "eval_best_f1": 55.0,
12
  "eval_best_f1_thresh": 0.0,
13
+ "eval_exact": 2.0,
14
+ "eval_f1": 3.1561391223155923,
15
+ "eval_runtime": 2.3661,
16
+ "eval_samples": 100,
17
+ "eval_samples_per_second": 42.264,
18
+ "eval_steps_per_second": 5.494,
19
+ "eval_total": 100,
20
+ "total_flos": 52259351347200.0,
21
+ "train_loss": 5.54022216796875,
22
+ "train_runtime": 14.9371,
23
+ "train_samples": 100,
24
+ "train_samples_per_second": 13.389,
25
+ "train_steps_per_second": 1.205
26
  }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "emilyalsentzer/Bio_ClinicalBERT",
3
  "architectures": [
4
  "BertForQuestionAnswering"
5
  ],
 
1
  {
2
+ "_name_or_path": "./results",
3
  "architectures": [
4
  "BertForQuestionAnswering"
5
  ],
eval_nbest_predictions.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:175f0381806c469c067f4760a1252b69290fa4176e963b016f300f63496549cb
3
- size 55260844
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:096303ad112a5e33698c26eac76a3f700af54afec0329a8230831077827d52bb
3
+ size 487233
eval_null_odds.json CHANGED
The diff for this file is too large to render. See raw diff
 
eval_predictions.json CHANGED
The diff for this file is too large to render. See raw diff
 
eval_results.json CHANGED
@@ -1,20 +1,20 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_HasAns_exact": 69.87179487179488,
4
- "eval_HasAns_f1": 76.91826514384819,
5
- "eval_HasAns_total": 5928,
6
- "eval_NoAns_exact": 76.16484440706476,
7
- "eval_NoAns_f1": 76.16484440706476,
8
- "eval_NoAns_total": 5945,
9
- "eval_best_exact": 73.02282489682473,
10
  "eval_best_exact_thresh": 0.0,
11
- "eval_best_f1": 76.54101539398066,
12
  "eval_best_f1_thresh": 0.0,
13
- "eval_exact": 73.02282489682473,
14
- "eval_f1": 76.54101539398063,
15
- "eval_runtime": 310.3268,
16
- "eval_samples": 11985,
17
- "eval_samples_per_second": 38.621,
18
- "eval_steps_per_second": 4.83,
19
- "eval_total": 11873
20
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "eval_HasAns_exact": 4.444444444444445,
4
+ "eval_HasAns_f1": 7.0136424940346505,
5
+ "eval_HasAns_total": 45,
6
+ "eval_NoAns_exact": 0.0,
7
+ "eval_NoAns_f1": 0.0,
8
+ "eval_NoAns_total": 55,
9
+ "eval_best_exact": 55.0,
10
  "eval_best_exact_thresh": 0.0,
11
+ "eval_best_f1": 55.0,
12
  "eval_best_f1_thresh": 0.0,
13
+ "eval_exact": 2.0,
14
+ "eval_f1": 3.1561391223155923,
15
+ "eval_runtime": 2.3661,
16
+ "eval_samples": 100,
17
+ "eval_samples_per_second": 42.264,
18
+ "eval_steps_per_second": 5.494,
19
+ "eval_total": 100
20
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1a630f553dfa3d6b3a7bee4a9354a1a2d5d3f2527fa35cb495f613c0d36dabd
3
  size 430908208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7fcb040f3e5d5d236313d5f12cfafaa3da0c8c37791a8a54e354a913eb12ffe
3
  size 430908208
special_tokens_map.json CHANGED
@@ -1,7 +1,37 @@
1
  {
2
- "cls_token": "[CLS]",
3
- "mask_token": "[MASK]",
4
- "pad_token": "[PAD]",
5
- "sep_token": "[SEP]",
6
- "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  }
 
1
  {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
  }
tokenizer.json CHANGED
@@ -1,7 +1,21 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "OnlySecond",
7
+ "stride": 128
8
+ },
9
+ "padding": {
10
+ "strategy": {
11
+ "Fixed": 512
12
+ },
13
+ "direction": "Right",
14
+ "pad_to_multiple_of": null,
15
+ "pad_id": 0,
16
+ "pad_type_id": 0,
17
+ "pad_token": "[PAD]"
18
+ },
19
  "added_tokens": [
20
  {
21
  "id": 0,
tokenizer_config.json CHANGED
@@ -47,12 +47,19 @@
47
  "do_lower_case": true,
48
  "extra_special_tokens": {},
49
  "mask_token": "[MASK]",
 
50
  "model_max_length": 1000000000000000019884624838656,
51
  "never_split": null,
 
52
  "pad_token": "[PAD]",
 
 
53
  "sep_token": "[SEP]",
 
54
  "strip_accents": null,
55
  "tokenize_chinese_chars": true,
56
  "tokenizer_class": "BertTokenizer",
 
 
57
  "unk_token": "[UNK]"
58
  }
 
47
  "do_lower_case": true,
48
  "extra_special_tokens": {},
49
  "mask_token": "[MASK]",
50
+ "max_length": 512,
51
  "model_max_length": 1000000000000000019884624838656,
52
  "never_split": null,
53
+ "pad_to_multiple_of": null,
54
  "pad_token": "[PAD]",
55
+ "pad_token_type_id": 0,
56
+ "padding_side": "right",
57
  "sep_token": "[SEP]",
58
+ "stride": 128,
59
  "strip_accents": null,
60
  "tokenize_chinese_chars": true,
61
  "tokenizer_class": "BertTokenizer",
62
+ "truncation_side": "right",
63
+ "truncation_strategy": "only_second",
64
  "unk_token": "[UNK]"
65
  }
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 2.0,
3
- "total_flos": 6.833694078916608e+16,
4
- "train_loss": 1.1028300029987763,
5
- "train_runtime": 22205.1611,
6
- "train_samples": 130765,
7
- "train_samples_per_second": 11.778,
8
- "train_steps_per_second": 0.982
9
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "total_flos": 52259351347200.0,
4
+ "train_loss": 5.54022216796875,
5
+ "train_runtime": 14.9371,
6
+ "train_samples": 100,
7
+ "train_samples_per_second": 13.389,
8
+ "train_steps_per_second": 1.205
9
  }
trainer_state.json CHANGED
@@ -3,340 +3,39 @@
3
  "best_model_checkpoint": null,
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
- "global_step": 21796,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.04587997797761057,
13
- "grad_norm": 14.238367080688477,
14
- "learning_rate": 2.9311800330335842e-05,
15
- "loss": 2.6043,
16
- "step": 500
17
- },
18
- {
19
- "epoch": 0.09175995595522114,
20
- "grad_norm": 14.928800582885742,
21
- "learning_rate": 2.8623600660671684e-05,
22
- "loss": 1.9062,
23
- "step": 1000
24
- },
25
- {
26
- "epoch": 0.13763993393283172,
27
- "grad_norm": 20.992238998413086,
28
- "learning_rate": 2.7935400991007525e-05,
29
- "loss": 1.7256,
30
- "step": 1500
31
- },
32
- {
33
- "epoch": 0.18351991191044228,
34
- "grad_norm": 17.107088088989258,
35
- "learning_rate": 2.7247201321343367e-05,
36
- "loss": 1.5579,
37
- "step": 2000
38
- },
39
- {
40
- "epoch": 0.22939988988805285,
41
- "grad_norm": 17.52288246154785,
42
- "learning_rate": 2.6559001651679208e-05,
43
- "loss": 1.5227,
44
- "step": 2500
45
- },
46
- {
47
- "epoch": 0.27527986786566344,
48
- "grad_norm": 15.443458557128906,
49
- "learning_rate": 2.587080198201505e-05,
50
- "loss": 1.4445,
51
- "step": 3000
52
- },
53
- {
54
- "epoch": 0.321159845843274,
55
- "grad_norm": 15.13930892944336,
56
- "learning_rate": 2.518260231235089e-05,
57
- "loss": 1.4353,
58
- "step": 3500
59
- },
60
- {
61
- "epoch": 0.36703982382088457,
62
- "grad_norm": 21.271406173706055,
63
- "learning_rate": 2.4494402642686732e-05,
64
- "loss": 1.3741,
65
- "step": 4000
66
- },
67
- {
68
- "epoch": 0.41291980179849513,
69
- "grad_norm": 14.242935180664062,
70
- "learning_rate": 2.3806202973022574e-05,
71
- "loss": 1.3533,
72
- "step": 4500
73
- },
74
- {
75
- "epoch": 0.4587997797761057,
76
- "grad_norm": 19.635059356689453,
77
- "learning_rate": 2.3118003303358415e-05,
78
- "loss": 1.3076,
79
- "step": 5000
80
- },
81
- {
82
- "epoch": 0.5046797577537163,
83
- "grad_norm": 22.73372459411621,
84
- "learning_rate": 2.2429803633694257e-05,
85
- "loss": 1.2723,
86
- "step": 5500
87
- },
88
- {
89
- "epoch": 0.5505597357313269,
90
- "grad_norm": 20.632278442382812,
91
- "learning_rate": 2.17416039640301e-05,
92
- "loss": 1.2534,
93
- "step": 6000
94
- },
95
- {
96
- "epoch": 0.5964397137089374,
97
- "grad_norm": 24.003353118896484,
98
- "learning_rate": 2.105340429436594e-05,
99
- "loss": 1.2556,
100
- "step": 6500
101
- },
102
- {
103
- "epoch": 0.642319691686548,
104
- "grad_norm": 20.14078712463379,
105
- "learning_rate": 2.036520462470178e-05,
106
- "loss": 1.2472,
107
- "step": 7000
108
- },
109
- {
110
- "epoch": 0.6881996696641586,
111
- "grad_norm": 11.762835502624512,
112
- "learning_rate": 1.9677004955037623e-05,
113
- "loss": 1.1952,
114
- "step": 7500
115
- },
116
- {
117
- "epoch": 0.7340796476417691,
118
- "grad_norm": 19.036245346069336,
119
- "learning_rate": 1.8988805285373464e-05,
120
- "loss": 1.1887,
121
- "step": 8000
122
- },
123
- {
124
- "epoch": 0.7799596256193797,
125
- "grad_norm": 23.435789108276367,
126
- "learning_rate": 1.8300605615709306e-05,
127
- "loss": 1.2012,
128
- "step": 8500
129
- },
130
- {
131
- "epoch": 0.8258396035969903,
132
- "grad_norm": 8.716887474060059,
133
- "learning_rate": 1.7612405946045144e-05,
134
- "loss": 1.1651,
135
- "step": 9000
136
- },
137
- {
138
- "epoch": 0.8717195815746008,
139
- "grad_norm": 18.400320053100586,
140
- "learning_rate": 1.692420627638099e-05,
141
- "loss": 1.1494,
142
- "step": 9500
143
- },
144
- {
145
- "epoch": 0.9175995595522114,
146
- "grad_norm": 13.751968383789062,
147
- "learning_rate": 1.6236006606716827e-05,
148
- "loss": 1.1467,
149
- "step": 10000
150
- },
151
- {
152
- "epoch": 0.963479537529822,
153
- "grad_norm": 17.547643661499023,
154
- "learning_rate": 1.554780693705267e-05,
155
- "loss": 1.1369,
156
- "step": 10500
157
- },
158
- {
159
- "epoch": 1.0093595155074326,
160
- "grad_norm": 10.978132247924805,
161
- "learning_rate": 1.4859607267388513e-05,
162
- "loss": 1.0624,
163
- "step": 11000
164
- },
165
- {
166
- "epoch": 1.055239493485043,
167
- "grad_norm": 13.976412773132324,
168
- "learning_rate": 1.4171407597724355e-05,
169
- "loss": 0.8489,
170
- "step": 11500
171
- },
172
- {
173
- "epoch": 1.1011194714626538,
174
- "grad_norm": 20.82090950012207,
175
- "learning_rate": 1.3483207928060196e-05,
176
- "loss": 0.8338,
177
- "step": 12000
178
- },
179
- {
180
- "epoch": 1.1469994494402642,
181
- "grad_norm": 15.114026069641113,
182
- "learning_rate": 1.2795008258396037e-05,
183
- "loss": 0.8326,
184
- "step": 12500
185
- },
186
- {
187
- "epoch": 1.192879427417875,
188
- "grad_norm": 12.581435203552246,
189
- "learning_rate": 1.2106808588731877e-05,
190
- "loss": 0.8298,
191
- "step": 13000
192
- },
193
- {
194
- "epoch": 1.2387594053954853,
195
- "grad_norm": 7.8120856285095215,
196
- "learning_rate": 1.1418608919067719e-05,
197
- "loss": 0.8512,
198
- "step": 13500
199
- },
200
- {
201
- "epoch": 1.284639383373096,
202
- "grad_norm": 8.480123519897461,
203
- "learning_rate": 1.073040924940356e-05,
204
- "loss": 0.8186,
205
- "step": 14000
206
- },
207
- {
208
- "epoch": 1.3305193613507065,
209
- "grad_norm": 23.09768295288086,
210
- "learning_rate": 1.0042209579739402e-05,
211
- "loss": 0.8084,
212
- "step": 14500
213
- },
214
- {
215
- "epoch": 1.3763993393283172,
216
- "grad_norm": 13.01577091217041,
217
- "learning_rate": 9.354009910075243e-06,
218
- "loss": 0.8521,
219
- "step": 15000
220
- },
221
- {
222
- "epoch": 1.4222793173059278,
223
- "grad_norm": 17.708642959594727,
224
- "learning_rate": 8.665810240411085e-06,
225
- "loss": 0.8355,
226
- "step": 15500
227
- },
228
- {
229
- "epoch": 1.4681592952835383,
230
- "grad_norm": 18.191207885742188,
231
- "learning_rate": 7.977610570746926e-06,
232
- "loss": 0.8216,
233
- "step": 16000
234
- },
235
- {
236
- "epoch": 1.5140392732611487,
237
- "grad_norm": 22.47287940979004,
238
- "learning_rate": 7.2894109010827675e-06,
239
- "loss": 0.8332,
240
- "step": 16500
241
- },
242
- {
243
- "epoch": 1.5599192512387594,
244
- "grad_norm": 17.28014373779297,
245
- "learning_rate": 6.601211231418609e-06,
246
- "loss": 0.7964,
247
- "step": 17000
248
- },
249
- {
250
- "epoch": 1.60579922921637,
251
- "grad_norm": 15.099440574645996,
252
- "learning_rate": 5.9130115617544504e-06,
253
- "loss": 0.8361,
254
- "step": 17500
255
- },
256
- {
257
- "epoch": 1.6516792071939805,
258
- "grad_norm": 15.106084823608398,
259
- "learning_rate": 5.224811892090292e-06,
260
- "loss": 0.8145,
261
- "step": 18000
262
- },
263
- {
264
- "epoch": 1.697559185171591,
265
- "grad_norm": 12.694562911987305,
266
- "learning_rate": 4.536612222426133e-06,
267
- "loss": 0.784,
268
- "step": 18500
269
- },
270
- {
271
- "epoch": 1.7434391631492017,
272
- "grad_norm": 17.011945724487305,
273
- "learning_rate": 3.848412552761975e-06,
274
- "loss": 0.7694,
275
- "step": 19000
276
- },
277
- {
278
- "epoch": 1.7893191411268123,
279
- "grad_norm": 8.237266540527344,
280
- "learning_rate": 3.1602128830978163e-06,
281
- "loss": 0.7693,
282
- "step": 19500
283
- },
284
- {
285
- "epoch": 1.8351991191044228,
286
- "grad_norm": 12.428207397460938,
287
- "learning_rate": 2.4720132134336578e-06,
288
- "loss": 0.824,
289
- "step": 20000
290
- },
291
- {
292
- "epoch": 1.8810790970820332,
293
- "grad_norm": 21.18424415588379,
294
- "learning_rate": 1.783813543769499e-06,
295
- "loss": 0.7838,
296
- "step": 20500
297
- },
298
- {
299
- "epoch": 1.926959075059644,
300
- "grad_norm": 13.418033599853516,
301
- "learning_rate": 1.0956138741053403e-06,
302
- "loss": 0.7877,
303
- "step": 21000
304
- },
305
- {
306
- "epoch": 1.9728390530372546,
307
- "grad_norm": 11.871217727661133,
308
- "learning_rate": 4.074142044411819e-07,
309
- "loss": 0.7867,
310
- "step": 21500
311
- },
312
  {
313
  "epoch": 2.0,
314
- "step": 21796,
315
- "total_flos": 6.833694078916608e+16,
316
- "train_loss": 1.1028300029987763,
317
- "train_runtime": 22205.1611,
318
- "train_samples_per_second": 11.778,
319
- "train_steps_per_second": 0.982
320
  }
321
  ],
322
  "logging_steps": 500,
323
- "max_steps": 21796,
324
  "num_input_tokens_seen": 0,
325
  "num_train_epochs": 2,
326
- "save_steps": 2000,
327
  "stateful_callbacks": {
328
  "TrainerControl": {
329
  "args": {
330
  "should_epoch_stop": false,
331
  "should_evaluate": false,
332
  "should_log": false,
333
- "should_save": true,
334
- "should_training_stop": true
335
  },
336
  "attributes": {}
337
  }
338
  },
339
- "total_flos": 6.833694078916608e+16,
340
  "train_batch_size": 12,
341
  "trial_name": null,
342
  "trial_params": null
 
3
  "best_model_checkpoint": null,
4
  "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 18,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  {
12
  "epoch": 2.0,
13
+ "step": 18,
14
+ "total_flos": 52259351347200.0,
15
+ "train_loss": 5.54022216796875,
16
+ "train_runtime": 14.9371,
17
+ "train_samples_per_second": 13.389,
18
+ "train_steps_per_second": 1.205
19
  }
20
  ],
21
  "logging_steps": 500,
22
+ "max_steps": 18,
23
  "num_input_tokens_seen": 0,
24
  "num_train_epochs": 2,
25
+ "save_steps": 500,
26
  "stateful_callbacks": {
27
  "TrainerControl": {
28
  "args": {
29
  "should_epoch_stop": false,
30
  "should_evaluate": false,
31
  "should_log": false,
32
+ "should_save": false,
33
+ "should_training_stop": false
34
  },
35
  "attributes": {}
36
  }
37
  },
38
+ "total_flos": 52259351347200.0,
39
  "train_batch_size": 12,
40
  "trial_name": null,
41
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c44e27d9ab74701a273360a21c4f3a21a049f71f2c14c65bbd296835fa6abb3
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebb3dcd35ffae5482ce42e2efaf472d62497d18365210fc8786b9d454e2d6d97
3
  size 5368