wesamalnabki commited on
Commit
53ccb86
·
1 Parent(s): b0aa609

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer_config.json +21 -1
  2. vocab.json +0 -0
tokenizer_config.json CHANGED
@@ -17,6 +17,12 @@
17
  "rstrip": false,
18
  "single_word": false
19
  },
 
 
 
 
 
 
20
  "eos_token": {
21
  "__type": "AddedToken",
22
  "content": "</s>",
@@ -36,6 +42,7 @@
36
  },
37
  "max_len": 512,
38
  "model_max_length": 512,
 
39
  "pad_token": {
40
  "__type": "AddedToken",
41
  "content": "<pad>",
@@ -44,6 +51,13 @@
44
  "rstrip": false,
45
  "single_word": false
46
  },
 
 
 
 
 
 
 
47
  "sep_token": {
48
  "__type": "AddedToken",
49
  "content": "</s>",
@@ -52,7 +66,13 @@
52
  "rstrip": false,
53
  "single_word": false
54
  },
55
- "tokenizer_class": "RobertaTokenizer",
 
 
 
 
 
 
56
  "trim_offsets": true,
57
  "unk_token": {
58
  "__type": "AddedToken",
 
17
  "rstrip": false,
18
  "single_word": false
19
  },
20
+ "cls_token_box": [
21
+ 0,
22
+ 0,
23
+ 0,
24
+ 0
25
+ ],
26
  "eos_token": {
27
  "__type": "AddedToken",
28
  "content": "</s>",
 
42
  },
43
  "max_len": 512,
44
  "model_max_length": 512,
45
+ "only_label_first_subword": true,
46
  "pad_token": {
47
  "__type": "AddedToken",
48
  "content": "<pad>",
 
51
  "rstrip": false,
52
  "single_word": false
53
  },
54
+ "pad_token_box": [
55
+ 0,
56
+ 0,
57
+ 0,
58
+ 0
59
+ ],
60
+ "pad_token_label": -100,
61
  "sep_token": {
62
  "__type": "AddedToken",
63
  "content": "</s>",
 
66
  "rstrip": false,
67
  "single_word": false
68
  },
69
+ "sep_token_box": [
70
+ 0,
71
+ 0,
72
+ 0,
73
+ 0
74
+ ],
75
+ "tokenizer_class": "LayoutLMv3Tokenizer",
76
  "trim_offsets": true,
77
  "unk_token": {
78
  "__type": "AddedToken",
vocab.json CHANGED
The diff for this file is too large to render. See raw diff