quim-motger commited on
Commit
111d1ac
1 Parent(s): c484190

Upload 10 files

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "xlnet-large-cased",
3
  "architectures": [
4
  "XLNetForTokenClassification"
5
  ],
@@ -45,7 +45,7 @@
45
  }
46
  },
47
  "torch_dtype": "float32",
48
- "transformers_version": "4.30.2",
49
  "untie_r": true,
50
  "use_mems_eval": true,
51
  "use_mems_train": false,
 
1
  {
2
+ "_name_or_path": "data/further_pretraining/xlnet-large-cased/checkpoint-4706",
3
  "architectures": [
4
  "XLNetForTokenClassification"
5
  ],
 
45
  }
46
  },
47
  "torch_dtype": "float32",
48
+ "transformers_version": "4.39.1",
49
  "untie_r": true,
50
  "use_mems_eval": true,
51
  "use_mems_train": false,
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b26a97cc1731582f81c2cdf3218be279d30d8060ee9746e58eb3f9c7ca99f43
3
+ size 1441133004
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e2faa41336c927c289261e2147f320bdf4b19a5c75013f9fa48e5b7504eb7e0
3
- size 2882410053
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f7a1271d60ff87a2733a1cd8aec6abe341c8ff4feddaa9b2eaa9ead4481b195
3
+ size 2882521638
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0edd2ceb30d6babc1f3e5958a8417a2569537b5352b92d741a4661c167a82c5
3
- size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c196bf769a36bd6546a97ba0f5572414b970ed2454097564e55b302a5d4f62a
3
+ size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a81b9e64a6d36ab9d27ca33efaa732434676daf622cfdf89ee1179762e733965
3
- size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5be2f897f916fdd0db20a90cc5571404430bc18f53f310ccdce81881dd982463
3
+ size 1064
tokenizer.json CHANGED
@@ -123,7 +123,8 @@
123
  {
124
  "type": "Metaspace",
125
  "replacement": "▁",
126
- "add_prefix_space": true
 
127
  }
128
  ]
129
  },
@@ -205,7 +206,8 @@
205
  "decoder": {
206
  "type": "Metaspace",
207
  "replacement": "▁",
208
- "add_prefix_space": true
 
209
  },
210
  "model": {
211
  "type": "Unigram",
@@ -128211,6 +128213,7 @@
128211
  "•",
128212
  -14.79216480255127
128213
  ]
128214
- ]
 
128215
  }
128216
  }
 
123
  {
124
  "type": "Metaspace",
125
  "replacement": "▁",
126
+ "add_prefix_space": true,
127
+ "prepend_scheme": "always"
128
  }
129
  ]
130
  },
 
206
  "decoder": {
207
  "type": "Metaspace",
208
  "replacement": "▁",
209
+ "add_prefix_space": true,
210
+ "prepend_scheme": "always"
211
  },
212
  "model": {
213
  "type": "Unigram",
 
128213
  "•",
128214
  -14.79216480255127
128215
  ]
128216
+ ],
128217
+ "byte_fallback": false
128218
  }
128219
  }
tokenizer_config.json CHANGED
@@ -1,4 +1,78 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "additional_special_tokens": [
3
  "<eop>",
4
  "<eod>"
@@ -9,14 +83,7 @@
9
  "do_lower_case": false,
10
  "eos_token": "</s>",
11
  "keep_accents": false,
12
- "mask_token": {
13
- "__type": "AddedToken",
14
- "content": "<mask>",
15
- "lstrip": true,
16
- "normalized": true,
17
- "rstrip": false,
18
- "single_word": false
19
- },
20
  "model_max_length": 1000000000000000019884624838656,
21
  "pad_token": "<pad>",
22
  "remove_space": true,
 
1
  {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<cls>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "<sep>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "5": {
44
+ "content": "<pad>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "6": {
52
+ "content": "<mask>",
53
+ "lstrip": true,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "7": {
60
+ "content": "<eod>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "8": {
68
+ "content": "<eop>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ }
75
+ },
76
  "additional_special_tokens": [
77
  "<eop>",
78
  "<eod>"
 
83
  "do_lower_case": false,
84
  "eos_token": "</s>",
85
  "keep_accents": false,
86
+ "mask_token": "<mask>",
 
 
 
 
 
 
 
87
  "model_max_length": 1000000000000000019884624838656,
88
  "pad_token": "<pad>",
89
  "remove_space": true,
trainer_state.json CHANGED
@@ -1,64 +1,87 @@
1
  {
2
- "best_metric": 0.004898133222013712,
3
- "best_model_checkpoint": "data/train-test///model/checkpoint-3351",
4
- "epoch": 1.0,
5
- "global_step": 3351,
 
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.15,
12
- "learning_rate": 1.850790808713817e-05,
13
- "loss": 0.1149,
 
14
  "step": 500
15
  },
16
  {
17
- "epoch": 0.3,
18
- "learning_rate": 1.7015816174276335e-05,
19
- "loss": 0.0198,
 
20
  "step": 1000
21
  },
22
  {
23
- "epoch": 0.45,
24
- "learning_rate": 1.5523724261414504e-05,
25
- "loss": 0.0101,
 
26
  "step": 1500
27
  },
28
  {
29
- "epoch": 0.6,
30
- "learning_rate": 1.4031632348552673e-05,
31
- "loss": 0.0063,
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  "step": 2000
33
  },
34
  {
35
- "epoch": 0.75,
36
- "learning_rate": 1.253954043569084e-05,
37
- "loss": 0.0067,
 
38
  "step": 2500
39
  },
40
  {
41
- "epoch": 0.9,
42
- "learning_rate": 1.1047448522829008e-05,
43
- "loss": 0.0068,
 
44
  "step": 3000
45
  },
46
  {
47
- "epoch": 1.0,
48
  "eval_accuracy": null,
49
- "eval_f1": 0.9943977591036415,
50
- "eval_loss": 0.004898133222013712,
51
- "eval_precision": 0.9943977591036415,
52
- "eval_recall": 0.9943977591036415,
53
- "eval_runtime": 1.5284,
54
- "eval_samples_per_second": 634.661,
55
- "eval_steps_per_second": 79.823,
56
- "step": 3351
57
  }
58
  ],
59
- "max_steps": 6702,
 
 
60
  "num_train_epochs": 2,
61
- "total_flos": 2597776456686000.0,
 
 
62
  "trial_name": null,
63
  "trial_params": null
64
  }
 
1
  {
2
+ "best_metric": 0.9972401103955841,
3
+ "best_model_checkpoint": "data/train-test/xlnet-large-cased//model/checkpoint-3352",
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 3352,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.3,
13
+ "grad_norm": 0.6528932452201843,
14
+ "learning_rate": 2.552505966587112e-05,
15
+ "loss": 0.0674,
16
  "step": 500
17
  },
18
  {
19
+ "epoch": 0.6,
20
+ "grad_norm": 0.035294823348522186,
21
+ "learning_rate": 2.1050119331742245e-05,
22
+ "loss": 0.0058,
23
  "step": 1000
24
  },
25
  {
26
+ "epoch": 0.89,
27
+ "grad_norm": 0.07718423008918762,
28
+ "learning_rate": 1.6575178997613365e-05,
29
+ "loss": 0.0041,
30
  "step": 1500
31
  },
32
  {
33
+ "epoch": 1.0,
34
+ "eval_accuracy": null,
35
+ "eval_f1": 0.9853211009174311,
36
+ "eval_loss": 0.00522511824965477,
37
+ "eval_precision": 0.9826166514181153,
38
+ "eval_recall": 0.9880404783808647,
39
+ "eval_runtime": 8.7222,
40
+ "eval_samples_per_second": 111.211,
41
+ "eval_steps_per_second": 6.994,
42
+ "step": 1676
43
+ },
44
+ {
45
+ "epoch": 1.19,
46
+ "grad_norm": 0.03843804448843002,
47
+ "learning_rate": 1.2100238663484488e-05,
48
+ "loss": 0.003,
49
  "step": 2000
50
  },
51
  {
52
+ "epoch": 1.49,
53
+ "grad_norm": 0.008623383939266205,
54
+ "learning_rate": 7.625298329355609e-06,
55
+ "loss": 0.0015,
56
  "step": 2500
57
  },
58
  {
59
+ "epoch": 1.79,
60
+ "grad_norm": 0.005918263457715511,
61
+ "learning_rate": 3.1503579952267305e-06,
62
+ "loss": 0.0012,
63
  "step": 3000
64
  },
65
  {
66
+ "epoch": 2.0,
67
  "eval_accuracy": null,
68
+ "eval_f1": 0.9972401103955841,
69
+ "eval_loss": 0.002058240817859769,
70
+ "eval_precision": 0.9972401103955841,
71
+ "eval_recall": 0.9972401103955841,
72
+ "eval_runtime": 8.7323,
73
+ "eval_samples_per_second": 111.082,
74
+ "eval_steps_per_second": 6.986,
75
+ "step": 3352
76
  }
77
  ],
78
+ "logging_steps": 500,
79
+ "max_steps": 3352,
80
+ "num_input_tokens_seen": 0,
81
  "num_train_epochs": 2,
82
+ "save_steps": 500,
83
+ "total_flos": 6566441558618250.0,
84
+ "train_batch_size": 16,
85
  "trial_name": null,
86
  "trial_params": null
87
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d97d2a8f287fdc8b9e66604d38c2f4463bc68b55cef78f20bc93ec03f39e104
3
- size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93902a43abf31e011f1d767fe0af9922c3522a026e3f77bdedf32ca0b56b3d09
3
+ size 4984