TinyPixel commited on
Commit
685a78d
·
1 Parent(s): a761ef8

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. merges.txt +0 -0
  2. special_tokens_map.json +21 -0
  3. tokenizer.json +0 -0
  4. tokenizer_config.json +93 -119
  5. vocab.json +0 -0
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json CHANGED
@@ -1,4 +1,25 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": "<|endoftext|>",
3
  "eos_token": "<|endoftext|>",
4
  "unk_token": "<|endoftext|>"
 
1
  {
2
+ "additional_special_tokens": [
3
+ "<|endoftext|>",
4
+ "<fim_prefix>",
5
+ "<fim_middle>",
6
+ "<fim_suffix>",
7
+ "<fim_pad>",
8
+ "<filename>",
9
+ "<gh_stars>",
10
+ "<issue_start>",
11
+ "<issue_comment>",
12
+ "<issue_closed>",
13
+ "<jupyter_start>",
14
+ "<jupyter_text>",
15
+ "<jupyter_code>",
16
+ "<jupyter_output>",
17
+ "<empty_output>",
18
+ "<commit_before>",
19
+ "<commit_msg>",
20
+ "<commit_after>",
21
+ "<reponame>"
22
+ ],
23
  "bos_token": "<|endoftext|>",
24
  "eos_token": "<|endoftext|>",
25
  "unk_token": "<|endoftext|>"
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -10,202 +10,176 @@
10
  "special": true
11
  },
12
  "1": {
13
- "content": "<|padding|>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
  },
20
- "50254": {
21
- "content": " ",
22
  "lstrip": false,
23
- "normalized": true,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": false
27
- },
28
- "50255": {
29
- "content": " ",
30
- "lstrip": false,
31
- "normalized": true,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": false
35
- },
36
- "50256": {
37
- "content": " ",
38
- "lstrip": false,
39
- "normalized": true,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": false
43
- },
44
- "50257": {
45
- "content": " ",
46
- "lstrip": false,
47
- "normalized": true,
48
- "rstrip": false,
49
- "single_word": false,
50
- "special": false
51
- },
52
- "50258": {
53
- "content": " ",
54
- "lstrip": false,
55
- "normalized": true,
56
- "rstrip": false,
57
- "single_word": false,
58
- "special": false
59
- },
60
- "50259": {
61
- "content": " ",
62
- "lstrip": false,
63
- "normalized": true,
64
- "rstrip": false,
65
- "single_word": false,
66
- "special": false
67
- },
68
- "50260": {
69
- "content": " ",
70
- "lstrip": false,
71
- "normalized": true,
72
  "rstrip": false,
73
  "single_word": false,
74
- "special": false
75
  },
76
- "50261": {
77
- "content": " ",
78
  "lstrip": false,
79
- "normalized": true,
80
  "rstrip": false,
81
  "single_word": false,
82
- "special": false
83
  },
84
- "50262": {
85
- "content": " ",
86
  "lstrip": false,
87
- "normalized": true,
88
  "rstrip": false,
89
  "single_word": false,
90
- "special": false
91
  },
92
- "50263": {
93
- "content": " ",
94
  "lstrip": false,
95
- "normalized": true,
96
  "rstrip": false,
97
  "single_word": false,
98
- "special": false
99
  },
100
- "50264": {
101
- "content": " ",
102
  "lstrip": false,
103
- "normalized": true,
104
  "rstrip": false,
105
  "single_word": false,
106
- "special": false
107
  },
108
- "50265": {
109
- "content": " ",
110
  "lstrip": false,
111
- "normalized": true,
112
  "rstrip": false,
113
  "single_word": false,
114
- "special": false
115
  },
116
- "50266": {
117
- "content": " ",
118
  "lstrip": false,
119
- "normalized": true,
120
  "rstrip": false,
121
  "single_word": false,
122
- "special": false
123
  },
124
- "50267": {
125
- "content": " ",
126
  "lstrip": false,
127
- "normalized": true,
128
  "rstrip": false,
129
  "single_word": false,
130
- "special": false
131
  },
132
- "50268": {
133
- "content": " ",
134
  "lstrip": false,
135
- "normalized": true,
136
  "rstrip": false,
137
  "single_word": false,
138
- "special": false
139
  },
140
- "50269": {
141
- "content": " ",
142
  "lstrip": false,
143
- "normalized": true,
144
  "rstrip": false,
145
  "single_word": false,
146
- "special": false
147
  },
148
- "50270": {
149
- "content": " ",
150
  "lstrip": false,
151
- "normalized": true,
152
  "rstrip": false,
153
  "single_word": false,
154
- "special": false
155
  },
156
- "50271": {
157
- "content": " ",
158
  "lstrip": false,
159
- "normalized": true,
160
  "rstrip": false,
161
  "single_word": false,
162
- "special": false
163
  },
164
- "50272": {
165
- "content": " ",
166
  "lstrip": false,
167
- "normalized": true,
168
  "rstrip": false,
169
  "single_word": false,
170
- "special": false
171
  },
172
- "50273": {
173
- "content": " ",
174
  "lstrip": false,
175
- "normalized": true,
176
  "rstrip": false,
177
  "single_word": false,
178
- "special": false
179
  },
180
- "50274": {
181
- "content": " ",
182
  "lstrip": false,
183
- "normalized": true,
184
  "rstrip": false,
185
  "single_word": false,
186
- "special": false
187
  },
188
- "50275": {
189
- "content": " ",
190
  "lstrip": false,
191
- "normalized": true,
192
  "rstrip": false,
193
  "single_word": false,
194
- "special": false
195
  },
196
- "50276": {
197
- "content": " ",
198
  "lstrip": false,
199
- "normalized": true,
200
  "rstrip": false,
201
  "single_word": false,
202
- "special": false
203
  }
204
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  "bos_token": "<|endoftext|>",
206
  "clean_up_tokenization_spaces": true,
207
  "eos_token": "<|endoftext|>",
208
  "model_max_length": 1000000000000000019884624838656,
209
- "tokenizer_class": "GPTNeoXTokenizer",
210
- "unk_token": "<|endoftext|>"
 
211
  }
 
10
  "special": true
11
  },
12
  "1": {
13
+ "content": "<fim_prefix>",
14
  "lstrip": false,
15
  "normalized": false,
16
  "rstrip": false,
17
  "single_word": false,
18
  "special": true
19
  },
20
+ "2": {
21
+ "content": "<fim_middle>",
22
  "lstrip": false,
23
+ "normalized": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  "rstrip": false,
25
  "single_word": false,
26
+ "special": true
27
  },
28
+ "3": {
29
+ "content": "<fim_suffix>",
30
  "lstrip": false,
31
+ "normalized": false,
32
  "rstrip": false,
33
  "single_word": false,
34
+ "special": true
35
  },
36
+ "4": {
37
+ "content": "<fim_pad>",
38
  "lstrip": false,
39
+ "normalized": false,
40
  "rstrip": false,
41
  "single_word": false,
42
+ "special": true
43
  },
44
+ "5": {
45
+ "content": "<filename>",
46
  "lstrip": false,
47
+ "normalized": false,
48
  "rstrip": false,
49
  "single_word": false,
50
+ "special": true
51
  },
52
+ "6": {
53
+ "content": "<gh_stars>",
54
  "lstrip": false,
55
+ "normalized": false,
56
  "rstrip": false,
57
  "single_word": false,
58
+ "special": true
59
  },
60
+ "7": {
61
+ "content": "<issue_start>",
62
  "lstrip": false,
63
+ "normalized": false,
64
  "rstrip": false,
65
  "single_word": false,
66
+ "special": true
67
  },
68
+ "8": {
69
+ "content": "<issue_comment>",
70
  "lstrip": false,
71
+ "normalized": false,
72
  "rstrip": false,
73
  "single_word": false,
74
+ "special": true
75
  },
76
+ "9": {
77
+ "content": "<issue_closed>",
78
  "lstrip": false,
79
+ "normalized": false,
80
  "rstrip": false,
81
  "single_word": false,
82
+ "special": true
83
  },
84
+ "10": {
85
+ "content": "<jupyter_start>",
86
  "lstrip": false,
87
+ "normalized": false,
88
  "rstrip": false,
89
  "single_word": false,
90
+ "special": true
91
  },
92
+ "11": {
93
+ "content": "<jupyter_text>",
94
  "lstrip": false,
95
+ "normalized": false,
96
  "rstrip": false,
97
  "single_word": false,
98
+ "special": true
99
  },
100
+ "12": {
101
+ "content": "<jupyter_code>",
102
  "lstrip": false,
103
+ "normalized": false,
104
  "rstrip": false,
105
  "single_word": false,
106
+ "special": true
107
  },
108
+ "13": {
109
+ "content": "<jupyter_output>",
110
  "lstrip": false,
111
+ "normalized": false,
112
  "rstrip": false,
113
  "single_word": false,
114
+ "special": true
115
  },
116
+ "14": {
117
+ "content": "<empty_output>",
118
  "lstrip": false,
119
+ "normalized": false,
120
  "rstrip": false,
121
  "single_word": false,
122
+ "special": true
123
  },
124
+ "15": {
125
+ "content": "<commit_before>",
126
  "lstrip": false,
127
+ "normalized": false,
128
  "rstrip": false,
129
  "single_word": false,
130
+ "special": true
131
  },
132
+ "16": {
133
+ "content": "<commit_msg>",
134
  "lstrip": false,
135
+ "normalized": false,
136
  "rstrip": false,
137
  "single_word": false,
138
+ "special": true
139
  },
140
+ "17": {
141
+ "content": "<commit_after>",
142
  "lstrip": false,
143
+ "normalized": false,
144
  "rstrip": false,
145
  "single_word": false,
146
+ "special": true
147
  },
148
+ "18": {
149
+ "content": "<reponame>",
150
  "lstrip": false,
151
+ "normalized": false,
152
  "rstrip": false,
153
  "single_word": false,
154
+ "special": true
155
  }
156
  },
157
+ "additional_special_tokens": [
158
+ "<|endoftext|>",
159
+ "<fim_prefix>",
160
+ "<fim_middle>",
161
+ "<fim_suffix>",
162
+ "<fim_pad>",
163
+ "<filename>",
164
+ "<gh_stars>",
165
+ "<issue_start>",
166
+ "<issue_comment>",
167
+ "<issue_closed>",
168
+ "<jupyter_start>",
169
+ "<jupyter_text>",
170
+ "<jupyter_code>",
171
+ "<jupyter_output>",
172
+ "<empty_output>",
173
+ "<commit_before>",
174
+ "<commit_msg>",
175
+ "<commit_after>",
176
+ "<reponame>"
177
+ ],
178
  "bos_token": "<|endoftext|>",
179
  "clean_up_tokenization_spaces": true,
180
  "eos_token": "<|endoftext|>",
181
  "model_max_length": 1000000000000000019884624838656,
182
+ "tokenizer_class": "GPT2Tokenizer",
183
+ "unk_token": "<|endoftext|>",
184
+ "vocab_size": 49152
185
  }
vocab.json ADDED
The diff for this file is too large to render. See raw diff