luonghuuthanhnam5 commited on
Commit
4b65ffe
1 Parent(s): 3f8ac0b

Training done

Browse files
Files changed (2) hide show
  1. added_tokens.json +5 -1
  2. tokenizer_config.json +32 -0
added_tokens.json CHANGED
@@ -1,4 +1,8 @@
1
  {
 
 
2
  "<mask>": 64000,
3
- "<s_pair_ocr>": 64001
 
 
4
  }
 
1
  {
2
+ "</s_document_type>": 64004,
3
+ "</s_extraction>": 64002,
4
  "<mask>": 64000,
5
+ "<s_document_type>": 64003,
6
+ "<s_extraction>": 64001,
7
+ "<s_pair_ocr>": 64005
8
  }
tokenizer_config.json CHANGED
@@ -41,6 +41,38 @@
41
  "special": true
42
  },
43
  "64001": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  "content": "<s_pair_ocr>",
45
  "lstrip": false,
46
  "normalized": true,
 
41
  "special": true
42
  },
43
  "64001": {
44
+ "content": "<s_extraction>",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ },
51
+ "64002": {
52
+ "content": "</s_extraction>",
53
+ "lstrip": false,
54
+ "normalized": true,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": false
58
+ },
59
+ "64003": {
60
+ "content": "<s_document_type>",
61
+ "lstrip": false,
62
+ "normalized": true,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": false
66
+ },
67
+ "64004": {
68
+ "content": "</s_document_type>",
69
+ "lstrip": false,
70
+ "normalized": true,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": false
74
+ },
75
+ "64005": {
76
  "content": "<s_pair_ocr>",
77
  "lstrip": false,
78
  "normalized": true,