Shresthadev403 commited on
Commit
6789a54
·
verified ·
1 Parent(s): 9b6c80d

End of training

Browse files
README.md CHANGED
@@ -1,4 +1,6 @@
1
  ---
 
 
2
  tags:
3
  - generated_from_trainer
4
  model-index:
@@ -11,14 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # ner-bert-ingredients
13
 
14
- This model was trained from scratch on an unknown dataset.
15
- It achieves the following results on the evaluation set:
16
- - eval_loss: 1.3551
17
- - eval_runtime: 83.7743
18
- - eval_samples_per_second: 59.684
19
- - eval_steps_per_second: 0.943
20
- - epoch: 24.26
21
- - step: 18000
22
 
23
  ## Model description
24
 
@@ -45,9 +40,13 @@ The following hyperparameters were used during training:
45
  - total_train_batch_size: 128
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
- - num_epochs: 100
49
  - mixed_precision_training: Native AMP
50
 
 
 
 
 
51
  ### Framework versions
52
 
53
  - Transformers 4.36.0
 
1
  ---
2
+ license: apache-2.0
3
+ base_model: bert-base-uncased
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # ner-bert-ingredients
15
 
16
+ This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on an unknown dataset.
 
 
 
 
 
 
 
17
 
18
  ## Model description
19
 
 
40
  - total_train_batch_size: 128
41
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
42
  - lr_scheduler_type: linear
43
+ - num_epochs: 50
44
  - mixed_precision_training: Native AMP
45
 
46
+ ### Training results
47
+
48
+
49
+
50
  ### Framework versions
51
 
52
  - Transformers 4.36.0
config.json CHANGED
The diff for this file is too large to render. See raw diff
 
logs/events.out.tfevents.1705980708.70e47a1f5afe.42.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8b895c6bfc5a23bd39e157af13b3392bec37dfddec8b1ed972119675eb7bb83
3
+ size 8429
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99fd67a82b1777a8bbb4dcfd6c2bda43327b2e79d9caf192697494fc9ebced34
3
- size 535667604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dbfecedd805a055d558e713fdab59dc3f5ce6a3cc40f43d3abc6ed1ac915a0b
3
+ size 435863700
special_tokens_map.json CHANGED
@@ -1,37 +1,7 @@
1
  {
2
- "cls_token": {
3
- "content": "[CLS]",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "mask_token": {
10
- "content": "[MASK]",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "pad_token": {
17
- "content": "[PAD]",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- },
23
- "sep_token": {
24
- "content": "[SEP]",
25
- "lstrip": false,
26
- "normalized": false,
27
- "rstrip": false,
28
- "single_word": false
29
- },
30
- "unk_token": {
31
- "content": "[UNK]",
32
- "lstrip": false,
33
- "normalized": false,
34
- "rstrip": false,
35
- "single_word": false
36
- }
37
  }
 
1
  {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  }
tokenizer_config.json CHANGED
@@ -45,18 +45,11 @@
45
  "cls_token": "[CLS]",
46
  "do_lower_case": true,
47
  "mask_token": "[MASK]",
48
- "max_length": 512,
49
  "model_max_length": 512,
50
- "pad_to_multiple_of": null,
51
  "pad_token": "[PAD]",
52
- "pad_token_type_id": 0,
53
- "padding_side": "right",
54
  "sep_token": "[SEP]",
55
- "stride": 0,
56
  "strip_accents": null,
57
  "tokenize_chinese_chars": true,
58
  "tokenizer_class": "BertTokenizer",
59
- "truncation_side": "right",
60
- "truncation_strategy": "longest_first",
61
  "unk_token": "[UNK]"
62
  }
 
45
  "cls_token": "[CLS]",
46
  "do_lower_case": true,
47
  "mask_token": "[MASK]",
 
48
  "model_max_length": 512,
 
49
  "pad_token": "[PAD]",
 
 
50
  "sep_token": "[SEP]",
 
51
  "strip_accents": null,
52
  "tokenize_chinese_chars": true,
53
  "tokenizer_class": "BertTokenizer",
 
 
54
  "unk_token": "[UNK]"
55
  }
trainer_state.json CHANGED
@@ -1,356 +1,29 @@
1
  {
2
- "best_metric": 1.3392431735992432,
3
- "best_model_checkpoint": "ner-bert-ingredients/checkpoint-17250",
4
- "epoch": 24.256652071404513,
5
  "eval_steps": 750,
6
- "global_step": 18000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 1.01,
13
- "learning_rate": 4.949460916442048e-05,
14
- "loss": 5.4,
15
- "step": 750
16
- },
17
- {
18
- "epoch": 1.01,
19
- "eval_loss": 3.985283613204956,
20
- "eval_runtime": 84.6074,
21
- "eval_samples_per_second": 59.096,
22
- "eval_steps_per_second": 0.934,
23
- "step": 750
24
- },
25
- {
26
- "epoch": 2.02,
27
- "learning_rate": 4.8989218328840976e-05,
28
- "loss": 3.3737,
29
- "step": 1500
30
- },
31
- {
32
- "epoch": 2.02,
33
- "eval_loss": 2.947718858718872,
34
- "eval_runtime": 84.1622,
35
- "eval_samples_per_second": 59.409,
36
- "eval_steps_per_second": 0.939,
37
- "step": 1500
38
- },
39
- {
40
- "epoch": 3.03,
41
- "learning_rate": 4.84845013477089e-05,
42
- "loss": 2.5551,
43
- "step": 2250
44
- },
45
- {
46
- "epoch": 3.03,
47
- "eval_loss": 2.259490728378296,
48
- "eval_runtime": 84.2333,
49
- "eval_samples_per_second": 59.359,
50
- "eval_steps_per_second": 0.938,
51
- "step": 2250
52
- },
53
- {
54
- "epoch": 4.04,
55
- "learning_rate": 4.7979110512129385e-05,
56
- "loss": 2.0321,
57
- "step": 3000
58
- },
59
- {
60
- "epoch": 4.04,
61
- "eval_loss": 1.9211012125015259,
62
- "eval_runtime": 84.2271,
63
- "eval_samples_per_second": 59.363,
64
- "eval_steps_per_second": 0.938,
65
- "step": 3000
66
- },
67
- {
68
- "epoch": 5.05,
69
- "learning_rate": 4.7473719676549865e-05,
70
- "loss": 1.6766,
71
- "step": 3750
72
- },
73
- {
74
- "epoch": 5.05,
75
- "eval_loss": 1.6346299648284912,
76
- "eval_runtime": 84.1885,
77
- "eval_samples_per_second": 59.391,
78
- "eval_steps_per_second": 0.938,
79
- "step": 3750
80
- },
81
- {
82
- "epoch": 6.06,
83
- "learning_rate": 4.696900269541779e-05,
84
- "loss": 1.437,
85
- "step": 4500
86
- },
87
- {
88
- "epoch": 6.06,
89
- "eval_loss": 1.5281634330749512,
90
- "eval_runtime": 84.2371,
91
- "eval_samples_per_second": 59.356,
92
- "eval_steps_per_second": 0.938,
93
- "step": 4500
94
- },
95
- {
96
- "epoch": 7.08,
97
- "learning_rate": 4.949460916442048e-05,
98
- "loss": 3.7548,
99
- "step": 5250
100
- },
101
- {
102
- "epoch": 7.08,
103
- "eval_loss": 2.378469228744507,
104
- "eval_runtime": 84.0031,
105
- "eval_samples_per_second": 59.522,
106
- "eval_steps_per_second": 0.94,
107
- "step": 5250
108
- },
109
- {
110
- "epoch": 8.09,
111
- "learning_rate": 4.8989218328840976e-05,
112
- "loss": 1.9326,
113
- "step": 6000
114
- },
115
- {
116
- "epoch": 8.09,
117
- "eval_loss": 1.8503246307373047,
118
- "eval_runtime": 83.659,
119
- "eval_samples_per_second": 59.766,
120
- "eval_steps_per_second": 0.944,
121
- "step": 6000
122
- },
123
- {
124
- "epoch": 9.1,
125
- "learning_rate": 4.84845013477089e-05,
126
- "loss": 1.489,
127
- "step": 6750
128
- },
129
- {
130
- "epoch": 9.1,
131
- "eval_loss": 1.630631685256958,
132
- "eval_runtime": 83.6797,
133
- "eval_samples_per_second": 59.752,
134
- "eval_steps_per_second": 0.944,
135
- "step": 6750
136
- },
137
- {
138
- "epoch": 10.11,
139
- "learning_rate": 4.7979784366576826e-05,
140
- "loss": 1.2457,
141
- "step": 7500
142
- },
143
- {
144
- "epoch": 10.11,
145
- "eval_loss": 1.4776005744934082,
146
- "eval_runtime": 83.6823,
147
- "eval_samples_per_second": 59.75,
148
- "eval_steps_per_second": 0.944,
149
- "step": 7500
150
- },
151
- {
152
- "epoch": 11.12,
153
- "learning_rate": 4.7474393530997306e-05,
154
- "loss": 1.0713,
155
- "step": 8250
156
- },
157
- {
158
- "epoch": 11.12,
159
- "eval_loss": 1.4048632383346558,
160
- "eval_runtime": 83.6632,
161
- "eval_samples_per_second": 59.763,
162
- "eval_steps_per_second": 0.944,
163
- "step": 8250
164
- },
165
- {
166
- "epoch": 12.13,
167
- "learning_rate": 4.696900269541779e-05,
168
- "loss": 0.9469,
169
- "step": 9000
170
- },
171
- {
172
- "epoch": 12.13,
173
- "eval_loss": 1.349704384803772,
174
- "eval_runtime": 83.6712,
175
- "eval_samples_per_second": 59.758,
176
- "eval_steps_per_second": 0.944,
177
- "step": 9000
178
- },
179
- {
180
- "epoch": 13.14,
181
- "learning_rate": 4.949460916442048e-05,
182
- "loss": 3.1119,
183
- "step": 9750
184
- },
185
- {
186
- "epoch": 13.14,
187
- "eval_loss": 2.09057354927063,
188
- "eval_runtime": 84.2266,
189
- "eval_samples_per_second": 59.364,
190
- "eval_steps_per_second": 0.938,
191
- "step": 9750
192
- },
193
- {
194
- "epoch": 14.15,
195
- "learning_rate": 4.8989218328840976e-05,
196
- "loss": 1.5021,
197
- "step": 10500
198
- },
199
- {
200
- "epoch": 14.15,
201
- "eval_loss": 1.6629363298416138,
202
- "eval_runtime": 83.9283,
203
- "eval_samples_per_second": 59.575,
204
- "eval_steps_per_second": 0.941,
205
- "step": 10500
206
- },
207
- {
208
- "epoch": 15.16,
209
- "learning_rate": 4.84845013477089e-05,
210
- "loss": 1.1682,
211
- "step": 11250
212
- },
213
- {
214
- "epoch": 15.16,
215
- "eval_loss": 1.5476787090301514,
216
- "eval_runtime": 83.9134,
217
- "eval_samples_per_second": 59.585,
218
- "eval_steps_per_second": 0.941,
219
- "step": 11250
220
- },
221
- {
222
- "epoch": 16.17,
223
- "learning_rate": 4.7979110512129385e-05,
224
- "loss": 0.9836,
225
- "step": 12000
226
- },
227
- {
228
- "epoch": 16.17,
229
- "eval_loss": 1.5123697519302368,
230
- "eval_runtime": 83.9374,
231
- "eval_samples_per_second": 59.568,
232
- "eval_steps_per_second": 0.941,
233
- "step": 12000
234
- },
235
- {
236
- "epoch": 17.18,
237
- "learning_rate": 4.7474393530997306e-05,
238
- "loss": 0.8542,
239
- "step": 12750
240
- },
241
- {
242
- "epoch": 17.18,
243
- "eval_loss": 1.3803095817565918,
244
- "eval_runtime": 83.8966,
245
- "eval_samples_per_second": 59.597,
246
- "eval_steps_per_second": 0.942,
247
- "step": 12750
248
- },
249
- {
250
- "epoch": 18.19,
251
- "learning_rate": 4.696900269541779e-05,
252
- "loss": 0.7554,
253
- "step": 13500
254
- },
255
- {
256
- "epoch": 18.19,
257
- "eval_loss": 1.3329607248306274,
258
- "eval_runtime": 83.8879,
259
- "eval_samples_per_second": 59.603,
260
- "eval_steps_per_second": 0.942,
261
- "step": 13500
262
- },
263
- {
264
- "epoch": 19.2,
265
- "learning_rate": 4.949460916442048e-05,
266
- "loss": 2.6984,
267
- "step": 14250
268
- },
269
- {
270
- "epoch": 19.2,
271
- "eval_loss": 1.934728741645813,
272
- "eval_runtime": 84.0312,
273
- "eval_samples_per_second": 59.502,
274
- "eval_steps_per_second": 0.94,
275
- "step": 14250
276
- },
277
- {
278
- "epoch": 20.21,
279
- "learning_rate": 4.8989218328840976e-05,
280
- "loss": 1.2723,
281
- "step": 15000
282
- },
283
- {
284
- "epoch": 20.21,
285
- "eval_loss": 1.616538405418396,
286
- "eval_runtime": 83.7157,
287
- "eval_samples_per_second": 59.726,
288
- "eval_steps_per_second": 0.944,
289
- "step": 15000
290
- },
291
- {
292
- "epoch": 21.23,
293
- "learning_rate": 4.84845013477089e-05,
294
- "loss": 1.002,
295
- "step": 15750
296
- },
297
- {
298
- "epoch": 21.23,
299
- "eval_loss": 1.481406331062317,
300
- "eval_runtime": 83.6915,
301
- "eval_samples_per_second": 59.743,
302
- "eval_steps_per_second": 0.944,
303
- "step": 15750
304
- },
305
- {
306
- "epoch": 22.24,
307
- "learning_rate": 4.7979110512129385e-05,
308
- "loss": 0.8449,
309
- "step": 16500
310
- },
311
- {
312
- "epoch": 22.24,
313
- "eval_loss": 1.4224437475204468,
314
- "eval_runtime": 83.7229,
315
- "eval_samples_per_second": 59.721,
316
- "eval_steps_per_second": 0.944,
317
- "step": 16500
318
- },
319
- {
320
- "epoch": 23.25,
321
- "learning_rate": 4.7473719676549865e-05,
322
- "loss": 0.7344,
323
- "step": 17250
324
- },
325
- {
326
- "epoch": 23.25,
327
- "eval_loss": 1.3392431735992432,
328
- "eval_runtime": 83.6972,
329
- "eval_samples_per_second": 59.739,
330
- "eval_steps_per_second": 0.944,
331
- "step": 17250
332
- },
333
- {
334
- "epoch": 24.26,
335
- "learning_rate": 4.6969676549865234e-05,
336
- "loss": 0.6533,
337
- "step": 18000
338
- },
339
- {
340
- "epoch": 24.26,
341
- "eval_loss": 1.3550658226013184,
342
- "eval_runtime": 83.7743,
343
- "eval_samples_per_second": 59.684,
344
- "eval_steps_per_second": 0.943,
345
- "step": 18000
346
  }
347
  ],
348
  "logging_steps": 750,
349
- "max_steps": 74200,
350
  "num_input_tokens_seen": 0,
351
- "num_train_epochs": 100,
352
  "save_steps": 750,
353
- "total_flos": 7.79036533801943e+17,
354
  "train_batch_size": 16,
355
  "trial_name": null,
356
  "trial_params": null
 
1
  {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 50.0,
5
  "eval_steps": 750,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 50.0,
13
+ "step": 50,
14
+ "total_flos": 32636710679400.0,
15
+ "train_loss": 0.2473917007446289,
16
+ "train_runtime": 8.5287,
17
+ "train_samples_per_second": 52.763,
18
+ "train_steps_per_second": 5.863
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "logging_steps": 750,
22
+ "max_steps": 50,
23
  "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 50,
25
  "save_steps": 750,
26
+ "total_flos": 32636710679400.0,
27
  "train_batch_size": 16,
28
  "trial_name": null,
29
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37a91c4ff3b80be24391e06650540dddea861285c3a8030b1771c2469cae3f40
3
  size 4283
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:539f836c129844c7433ce90ce7a776a98d7e07cfa66853d433c5135b23816fc8
3
  size 4283