wangyk22 commited on
Commit
7a212f5
1 Parent(s): b070256

Upload 2 files

Browse files
Files changed (2) hide show
  1. seq.ckpt +3 -0
  2. tokenizer.json +197 -0
seq.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb236879e883477a446011c720c48e5235b05e447d7854292ba9e94076d6c633
3
+ size 136497774
tokenizer.json ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": {
5
+ "strategy": "BatchLongest",
6
+ "direction": "Right",
7
+ "pad_to_multiple_of": null,
8
+ "pad_id": 0,
9
+ "pad_type_id": 0,
10
+ "pad_token": "<pad>"
11
+ },
12
+ "added_tokens": [
13
+ {
14
+ "id": 0,
15
+ "content": "<pad>",
16
+ "single_word": false,
17
+ "lstrip": false,
18
+ "rstrip": false,
19
+ "normalized": false,
20
+ "special": true
21
+ },
22
+ {
23
+ "id": 1,
24
+ "content": "<unk>",
25
+ "single_word": false,
26
+ "lstrip": false,
27
+ "rstrip": false,
28
+ "normalized": false,
29
+ "special": true
30
+ },
31
+ {
32
+ "id": 2,
33
+ "content": "<s>",
34
+ "single_word": false,
35
+ "lstrip": false,
36
+ "rstrip": false,
37
+ "normalized": false,
38
+ "special": true
39
+ },
40
+ {
41
+ "id": 3,
42
+ "content": "</s>",
43
+ "single_word": false,
44
+ "lstrip": false,
45
+ "rstrip": false,
46
+ "normalized": false,
47
+ "special": true
48
+ }
49
+ ],
50
+ "normalizer": {
51
+ "type": "NFD"
52
+ },
53
+ "pre_tokenizer": {
54
+ "type": "Sequence",
55
+ "pretokenizers": [
56
+ {
57
+ "type": "Whitespace"
58
+ },
59
+ {
60
+ "type": "Split",
61
+ "pattern": {
62
+ "Regex": "[\\[\\]\\(\\)\\.]"
63
+ },
64
+ "behavior": "MergedWithPrevious",
65
+ "invert": false
66
+ },
67
+ {
68
+ "type": "Split",
69
+ "pattern": {
70
+ "Regex": "Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|H"
71
+ },
72
+ "behavior": "Isolated",
73
+ "invert": false
74
+ },
75
+ {
76
+ "type": "Split",
77
+ "pattern": {
78
+ "Regex": "=|#|-|\\+|\\\\|\\/|:|~|@|\\?|>|\\*|\\$|\\%[0-9]{2}|[0-9]"
79
+ },
80
+ "behavior": "MergedWithPrevious",
81
+ "invert": false
82
+ }
83
+ ]
84
+ },
85
+ "post_processor": {
86
+ "type": "TemplateProcessing",
87
+ "single": [
88
+ {
89
+ "Sequence": {
90
+ "id": "A",
91
+ "type_id": 0
92
+ }
93
+ },
94
+ {
95
+ "SpecialToken": {
96
+ "id": "<s>",
97
+ "type_id": 0
98
+ }
99
+ }
100
+ ],
101
+ "pair": [
102
+ {
103
+ "Sequence": {
104
+ "id": "A",
105
+ "type_id": 0
106
+ }
107
+ },
108
+ {
109
+ "SpecialToken": {
110
+ "id": "<s>",
111
+ "type_id": 0
112
+ }
113
+ },
114
+ {
115
+ "Sequence": {
116
+ "id": "B",
117
+ "type_id": 1
118
+ }
119
+ },
120
+ {
121
+ "SpecialToken": {
122
+ "id": "</s>",
123
+ "type_id": 1
124
+ }
125
+ }
126
+ ],
127
+ "special_tokens": {
128
+ "</s>": {
129
+ "id": "</s>",
130
+ "ids": [
131
+ 3
132
+ ],
133
+ "tokens": [
134
+ "</s>"
135
+ ]
136
+ },
137
+ "<s>": {
138
+ "id": "<s>",
139
+ "ids": [
140
+ 2
141
+ ],
142
+ "tokens": [
143
+ "<s>"
144
+ ]
145
+ }
146
+ }
147
+ },
148
+ "decoder": null,
149
+ "model": {
150
+ "type": "WordLevel",
151
+ "vocab": {
152
+ "<pad>": 0,
153
+ "<unk>": 1,
154
+ "<s>": 2,
155
+ "</s>": 3,
156
+ "C": 4,
157
+ "(": 5,
158
+ ")": 6,
159
+ "H": 7,
160
+ "O": 8,
161
+ "1": 9,
162
+ "c": 10,
163
+ "N": 11,
164
+ "@": 12,
165
+ "=": 13,
166
+ "[": 14,
167
+ "]": 15,
168
+ "n": 16,
169
+ "2": 17,
170
+ "3": 18,
171
+ "S": 19,
172
+ "F": 20,
173
+ "o": 21,
174
+ "#": 22,
175
+ "s": 23,
176
+ "*": 24,
177
+ "Cl": 25,
178
+ "-": 26,
179
+ "/": 27,
180
+ "4": 28,
181
+ "5": 29,
182
+ "Br": 30,
183
+ "\\": 31,
184
+ "6": 32,
185
+ "+": 33,
186
+ "8": 34,
187
+ "I": 35,
188
+ "9": 36,
189
+ "0": 37,
190
+ "7": 38,
191
+ "B": 39,
192
+ "i": 40,
193
+ "P": 41
194
+ },
195
+ "unk_token": "<unk>"
196
+ }
197
+ }