luonghuuthanhnam5
commited on
Commit
•
4b65ffe
1
Parent(s):
3f8ac0b
Training done
Browse files- added_tokens.json +5 -1
- tokenizer_config.json +32 -0
added_tokens.json
CHANGED
@@ -1,4 +1,8 @@
|
|
1 |
{
|
|
|
|
|
2 |
"<mask>": 64000,
|
3 |
-
"<
|
|
|
|
|
4 |
}
|
|
|
1 |
{
|
2 |
+
"</s_document_type>": 64004,
|
3 |
+
"</s_extraction>": 64002,
|
4 |
"<mask>": 64000,
|
5 |
+
"<s_document_type>": 64003,
|
6 |
+
"<s_extraction>": 64001,
|
7 |
+
"<s_pair_ocr>": 64005
|
8 |
}
|
tokenizer_config.json
CHANGED
@@ -41,6 +41,38 @@
|
|
41 |
"special": true
|
42 |
},
|
43 |
"64001": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
"content": "<s_pair_ocr>",
|
45 |
"lstrip": false,
|
46 |
"normalized": true,
|
|
|
41 |
"special": true
|
42 |
},
|
43 |
"64001": {
|
44 |
+
"content": "<s_extraction>",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": true,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": false
|
50 |
+
},
|
51 |
+
"64002": {
|
52 |
+
"content": "</s_extraction>",
|
53 |
+
"lstrip": false,
|
54 |
+
"normalized": true,
|
55 |
+
"rstrip": false,
|
56 |
+
"single_word": false,
|
57 |
+
"special": false
|
58 |
+
},
|
59 |
+
"64003": {
|
60 |
+
"content": "<s_document_type>",
|
61 |
+
"lstrip": false,
|
62 |
+
"normalized": true,
|
63 |
+
"rstrip": false,
|
64 |
+
"single_word": false,
|
65 |
+
"special": false
|
66 |
+
},
|
67 |
+
"64004": {
|
68 |
+
"content": "</s_document_type>",
|
69 |
+
"lstrip": false,
|
70 |
+
"normalized": true,
|
71 |
+
"rstrip": false,
|
72 |
+
"single_word": false,
|
73 |
+
"special": false
|
74 |
+
},
|
75 |
+
"64005": {
|
76 |
"content": "<s_pair_ocr>",
|
77 |
"lstrip": false,
|
78 |
"normalized": true,
|