Isaak Carter Augustus commited on
Commit
89a24fd
1 Parent(s): 3940275

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,27 +1,28 @@
1
  {
2
- "<|assistant|>": 151651,
3
- "<|context|>": 151659,
4
- "<|current_states|>": 151658,
5
  "<|endoftext|>": 151643,
6
- "<|function_call|>": 151652,
7
- "<|function_response|>": 151653,
8
  "<|functions|>": 151647,
9
- "<|gökdeniz|>": 151648,
10
- "<|home_state|>": 151657,
11
  "<|im_end|>": 151645,
12
  "<|im_start|>": 151644,
13
- "<|image|>": 151654,
14
- "<|josie|>": 151650,
15
- "<|long_term_memory|>": 151655,
16
- "<|short_term_memory|>": 151656,
17
  "<|startoftext|>": 151646,
18
- "<|user|>": 151649,
19
- "Gökdeniz": 151661,
20
- "Gökdeniz Gülmez": 151660,
21
- "Gülmez": 151662,
22
- "J.O.S.I.E.": 151664,
23
- "JOSIE": 151663,
24
- "Josie": 151665,
25
- "Just an Outstandingly Smart and Intelligent Entity": 151667,
26
- "josie": 151666
 
27
  }
 
1
  {
2
+ "<|assistant|>": 151652,
3
+ "<|context|>": 151660,
4
+ "<|current_states|>": 151659,
5
  "<|endoftext|>": 151643,
6
+ "<|function_call|>": 151653,
7
+ "<|function_response|>": 151654,
8
  "<|functions|>": 151647,
9
+ "<|gökdeniz|>": 151649,
10
+ "<|home_state|>": 151658,
11
  "<|im_end|>": 151645,
12
  "<|im_start|>": 151644,
13
+ "<|image|>": 151655,
14
+ "<|josie|>": 151651,
15
+ "<|long_term_memory|>": 151656,
16
+ "<|short_term_memory|>": 151657,
17
  "<|startoftext|>": 151646,
18
+ "<|system|>": 151648,
19
+ "<|user|>": 151650,
20
+ "Gökdeniz": 151662,
21
+ "Gökdeniz Gülmez": 151661,
22
+ "Gülmez": 151663,
23
+ "J.O.S.I.E.": 151665,
24
+ "JOSIE": 151664,
25
+ "Josie": 151666,
26
+ "Just an Outstandingly Smart and Intelligent Entity": 151668,
27
+ "josie": 151667
28
  }
special_tokens_map.json CHANGED
@@ -7,6 +7,13 @@
7
  "rstrip": false,
8
  "single_word": false
9
  },
 
 
 
 
 
 
 
10
  {
11
  "content": "<|gökdeniz|>",
12
  "lstrip": false,
 
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
+ {
11
+ "content": "<|system|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ },
17
  {
18
  "content": "<|gökdeniz|>",
19
  "lstrip": false,
tokenizer.json CHANGED
@@ -50,7 +50,7 @@
50
  },
51
  {
52
  "id": 151648,
53
- "content": "<|gökdeniz|>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
@@ -59,7 +59,7 @@
59
  },
60
  {
61
  "id": 151649,
62
- "content": "<|user|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
@@ -68,7 +68,7 @@
68
  },
69
  {
70
  "id": 151650,
71
- "content": "<|josie|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
@@ -77,7 +77,7 @@
77
  },
78
  {
79
  "id": 151651,
80
- "content": "<|assistant|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
@@ -86,7 +86,7 @@
86
  },
87
  {
88
  "id": 151652,
89
- "content": "<|function_call|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
@@ -95,7 +95,7 @@
95
  },
96
  {
97
  "id": 151653,
98
- "content": "<|function_response|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
@@ -104,7 +104,7 @@
104
  },
105
  {
106
  "id": 151654,
107
- "content": "<|image|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
@@ -113,7 +113,7 @@
113
  },
114
  {
115
  "id": 151655,
116
- "content": "<|long_term_memory|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
@@ -122,7 +122,7 @@
122
  },
123
  {
124
  "id": 151656,
125
- "content": "<|short_term_memory|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
@@ -131,7 +131,7 @@
131
  },
132
  {
133
  "id": 151657,
134
- "content": "<|home_state|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
@@ -140,7 +140,7 @@
140
  },
141
  {
142
  "id": 151658,
143
- "content": "<|current_states|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
@@ -149,7 +149,7 @@
149
  },
150
  {
151
  "id": 151659,
152
- "content": "<|context|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
@@ -158,6 +158,15 @@
158
  },
159
  {
160
  "id": 151660,
 
 
 
 
 
 
 
 
 
161
  "content": "Gökdeniz Gülmez",
162
  "single_word": false,
163
  "lstrip": false,
@@ -166,7 +175,7 @@
166
  "special": false
167
  },
168
  {
169
- "id": 151661,
170
  "content": "Gökdeniz",
171
  "single_word": false,
172
  "lstrip": false,
@@ -175,7 +184,7 @@
175
  "special": false
176
  },
177
  {
178
- "id": 151662,
179
  "content": "Gülmez",
180
  "single_word": false,
181
  "lstrip": false,
@@ -184,7 +193,7 @@
184
  "special": false
185
  },
186
  {
187
- "id": 151663,
188
  "content": "JOSIE",
189
  "single_word": false,
190
  "lstrip": false,
@@ -193,7 +202,7 @@
193
  "special": false
194
  },
195
  {
196
- "id": 151664,
197
  "content": "J.O.S.I.E.",
198
  "single_word": false,
199
  "lstrip": false,
@@ -202,7 +211,7 @@
202
  "special": false
203
  },
204
  {
205
- "id": 151665,
206
  "content": "Josie",
207
  "single_word": false,
208
  "lstrip": false,
@@ -211,7 +220,7 @@
211
  "special": false
212
  },
213
  {
214
- "id": 151666,
215
  "content": "josie",
216
  "single_word": false,
217
  "lstrip": false,
@@ -220,7 +229,7 @@
220
  "special": false
221
  },
222
  {
223
- "id": 151667,
224
  "content": "Just an Outstandingly Smart and Intelligent Entity",
225
  "single_word": false,
226
  "lstrip": false,
 
50
  },
51
  {
52
  "id": 151648,
53
+ "content": "<|system|>",
54
  "single_word": false,
55
  "lstrip": false,
56
  "rstrip": false,
 
59
  },
60
  {
61
  "id": 151649,
62
+ "content": "<|gökdeniz|>",
63
  "single_word": false,
64
  "lstrip": false,
65
  "rstrip": false,
 
68
  },
69
  {
70
  "id": 151650,
71
+ "content": "<|user|>",
72
  "single_word": false,
73
  "lstrip": false,
74
  "rstrip": false,
 
77
  },
78
  {
79
  "id": 151651,
80
+ "content": "<|josie|>",
81
  "single_word": false,
82
  "lstrip": false,
83
  "rstrip": false,
 
86
  },
87
  {
88
  "id": 151652,
89
+ "content": "<|assistant|>",
90
  "single_word": false,
91
  "lstrip": false,
92
  "rstrip": false,
 
95
  },
96
  {
97
  "id": 151653,
98
+ "content": "<|function_call|>",
99
  "single_word": false,
100
  "lstrip": false,
101
  "rstrip": false,
 
104
  },
105
  {
106
  "id": 151654,
107
+ "content": "<|function_response|>",
108
  "single_word": false,
109
  "lstrip": false,
110
  "rstrip": false,
 
113
  },
114
  {
115
  "id": 151655,
116
+ "content": "<|image|>",
117
  "single_word": false,
118
  "lstrip": false,
119
  "rstrip": false,
 
122
  },
123
  {
124
  "id": 151656,
125
+ "content": "<|long_term_memory|>",
126
  "single_word": false,
127
  "lstrip": false,
128
  "rstrip": false,
 
131
  },
132
  {
133
  "id": 151657,
134
+ "content": "<|short_term_memory|>",
135
  "single_word": false,
136
  "lstrip": false,
137
  "rstrip": false,
 
140
  },
141
  {
142
  "id": 151658,
143
+ "content": "<|home_state|>",
144
  "single_word": false,
145
  "lstrip": false,
146
  "rstrip": false,
 
149
  },
150
  {
151
  "id": 151659,
152
+ "content": "<|current_states|>",
153
  "single_word": false,
154
  "lstrip": false,
155
  "rstrip": false,
 
158
  },
159
  {
160
  "id": 151660,
161
+ "content": "<|context|>",
162
+ "single_word": false,
163
+ "lstrip": false,
164
+ "rstrip": false,
165
+ "normalized": false,
166
+ "special": true
167
+ },
168
+ {
169
+ "id": 151661,
170
  "content": "Gökdeniz Gülmez",
171
  "single_word": false,
172
  "lstrip": false,
 
175
  "special": false
176
  },
177
  {
178
+ "id": 151662,
179
  "content": "Gökdeniz",
180
  "single_word": false,
181
  "lstrip": false,
 
184
  "special": false
185
  },
186
  {
187
+ "id": 151663,
188
  "content": "Gülmez",
189
  "single_word": false,
190
  "lstrip": false,
 
193
  "special": false
194
  },
195
  {
196
+ "id": 151664,
197
  "content": "JOSIE",
198
  "single_word": false,
199
  "lstrip": false,
 
202
  "special": false
203
  },
204
  {
205
+ "id": 151665,
206
  "content": "J.O.S.I.E.",
207
  "single_word": false,
208
  "lstrip": false,
 
211
  "special": false
212
  },
213
  {
214
+ "id": 151666,
215
  "content": "Josie",
216
  "single_word": false,
217
  "lstrip": false,
 
220
  "special": false
221
  },
222
  {
223
+ "id": 151667,
224
  "content": "josie",
225
  "single_word": false,
226
  "lstrip": false,
 
229
  "special": false
230
  },
231
  {
232
+ "id": 151668,
233
  "content": "Just an Outstandingly Smart and Intelligent Entity",
234
  "single_word": false,
235
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -42,7 +42,7 @@
42
  "special": true
43
  },
44
  "151648": {
45
- "content": "<|gökdeniz|>",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
@@ -50,7 +50,7 @@
50
  "special": true
51
  },
52
  "151649": {
53
- "content": "<|user|>",
54
  "lstrip": false,
55
  "normalized": false,
56
  "rstrip": false,
@@ -58,7 +58,7 @@
58
  "special": true
59
  },
60
  "151650": {
61
- "content": "<|josie|>",
62
  "lstrip": false,
63
  "normalized": false,
64
  "rstrip": false,
@@ -66,7 +66,7 @@
66
  "special": true
67
  },
68
  "151651": {
69
- "content": "<|assistant|>",
70
  "lstrip": false,
71
  "normalized": false,
72
  "rstrip": false,
@@ -74,7 +74,7 @@
74
  "special": true
75
  },
76
  "151652": {
77
- "content": "<|function_call|>",
78
  "lstrip": false,
79
  "normalized": false,
80
  "rstrip": false,
@@ -82,7 +82,7 @@
82
  "special": true
83
  },
84
  "151653": {
85
- "content": "<|function_response|>",
86
  "lstrip": false,
87
  "normalized": false,
88
  "rstrip": false,
@@ -90,7 +90,7 @@
90
  "special": true
91
  },
92
  "151654": {
93
- "content": "<|image|>",
94
  "lstrip": false,
95
  "normalized": false,
96
  "rstrip": false,
@@ -98,7 +98,7 @@
98
  "special": true
99
  },
100
  "151655": {
101
- "content": "<|long_term_memory|>",
102
  "lstrip": false,
103
  "normalized": false,
104
  "rstrip": false,
@@ -106,7 +106,7 @@
106
  "special": true
107
  },
108
  "151656": {
109
- "content": "<|short_term_memory|>",
110
  "lstrip": false,
111
  "normalized": false,
112
  "rstrip": false,
@@ -114,7 +114,7 @@
114
  "special": true
115
  },
116
  "151657": {
117
- "content": "<|home_state|>",
118
  "lstrip": false,
119
  "normalized": false,
120
  "rstrip": false,
@@ -122,7 +122,7 @@
122
  "special": true
123
  },
124
  "151658": {
125
- "content": "<|current_states|>",
126
  "lstrip": false,
127
  "normalized": false,
128
  "rstrip": false,
@@ -130,7 +130,7 @@
130
  "special": true
131
  },
132
  "151659": {
133
- "content": "<|context|>",
134
  "lstrip": false,
135
  "normalized": false,
136
  "rstrip": false,
@@ -138,6 +138,14 @@
138
  "special": true
139
  },
140
  "151660": {
 
 
 
 
 
 
 
 
141
  "content": "Gökdeniz Gülmez",
142
  "lstrip": false,
143
  "normalized": true,
@@ -145,7 +153,7 @@
145
  "single_word": false,
146
  "special": false
147
  },
148
- "151661": {
149
  "content": "Gökdeniz",
150
  "lstrip": false,
151
  "normalized": true,
@@ -153,7 +161,7 @@
153
  "single_word": false,
154
  "special": false
155
  },
156
- "151662": {
157
  "content": "Gülmez",
158
  "lstrip": false,
159
  "normalized": true,
@@ -161,7 +169,7 @@
161
  "single_word": false,
162
  "special": false
163
  },
164
- "151663": {
165
  "content": "JOSIE",
166
  "lstrip": false,
167
  "normalized": true,
@@ -169,7 +177,7 @@
169
  "single_word": false,
170
  "special": false
171
  },
172
- "151664": {
173
  "content": "J.O.S.I.E.",
174
  "lstrip": false,
175
  "normalized": true,
@@ -177,7 +185,7 @@
177
  "single_word": false,
178
  "special": false
179
  },
180
- "151665": {
181
  "content": "Josie",
182
  "lstrip": false,
183
  "normalized": true,
@@ -185,7 +193,7 @@
185
  "single_word": false,
186
  "special": false
187
  },
188
- "151666": {
189
  "content": "josie",
190
  "lstrip": false,
191
  "normalized": true,
@@ -193,7 +201,7 @@
193
  "single_word": false,
194
  "special": false
195
  },
196
- "151667": {
197
  "content": "Just an Outstandingly Smart and Intelligent Entity",
198
  "lstrip": false,
199
  "normalized": true,
@@ -204,6 +212,7 @@
204
  },
205
  "additional_special_tokens": [
206
  "<|functions|>",
 
207
  "<|gökdeniz|>",
208
  "<|user|>",
209
  "<|josie|>",
 
42
  "special": true
43
  },
44
  "151648": {
45
+ "content": "<|system|>",
46
  "lstrip": false,
47
  "normalized": false,
48
  "rstrip": false,
 
50
  "special": true
51
  },
52
  "151649": {
53
+ "content": "<|gökdeniz|>",
54
  "lstrip": false,
55
  "normalized": false,
56
  "rstrip": false,
 
58
  "special": true
59
  },
60
  "151650": {
61
+ "content": "<|user|>",
62
  "lstrip": false,
63
  "normalized": false,
64
  "rstrip": false,
 
66
  "special": true
67
  },
68
  "151651": {
69
+ "content": "<|josie|>",
70
  "lstrip": false,
71
  "normalized": false,
72
  "rstrip": false,
 
74
  "special": true
75
  },
76
  "151652": {
77
+ "content": "<|assistant|>",
78
  "lstrip": false,
79
  "normalized": false,
80
  "rstrip": false,
 
82
  "special": true
83
  },
84
  "151653": {
85
+ "content": "<|function_call|>",
86
  "lstrip": false,
87
  "normalized": false,
88
  "rstrip": false,
 
90
  "special": true
91
  },
92
  "151654": {
93
+ "content": "<|function_response|>",
94
  "lstrip": false,
95
  "normalized": false,
96
  "rstrip": false,
 
98
  "special": true
99
  },
100
  "151655": {
101
+ "content": "<|image|>",
102
  "lstrip": false,
103
  "normalized": false,
104
  "rstrip": false,
 
106
  "special": true
107
  },
108
  "151656": {
109
+ "content": "<|long_term_memory|>",
110
  "lstrip": false,
111
  "normalized": false,
112
  "rstrip": false,
 
114
  "special": true
115
  },
116
  "151657": {
117
+ "content": "<|short_term_memory|>",
118
  "lstrip": false,
119
  "normalized": false,
120
  "rstrip": false,
 
122
  "special": true
123
  },
124
  "151658": {
125
+ "content": "<|home_state|>",
126
  "lstrip": false,
127
  "normalized": false,
128
  "rstrip": false,
 
130
  "special": true
131
  },
132
  "151659": {
133
+ "content": "<|current_states|>",
134
  "lstrip": false,
135
  "normalized": false,
136
  "rstrip": false,
 
138
  "special": true
139
  },
140
  "151660": {
141
+ "content": "<|context|>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "151661": {
149
  "content": "Gökdeniz Gülmez",
150
  "lstrip": false,
151
  "normalized": true,
 
153
  "single_word": false,
154
  "special": false
155
  },
156
+ "151662": {
157
  "content": "Gökdeniz",
158
  "lstrip": false,
159
  "normalized": true,
 
161
  "single_word": false,
162
  "special": false
163
  },
164
+ "151663": {
165
  "content": "Gülmez",
166
  "lstrip": false,
167
  "normalized": true,
 
169
  "single_word": false,
170
  "special": false
171
  },
172
+ "151664": {
173
  "content": "JOSIE",
174
  "lstrip": false,
175
  "normalized": true,
 
177
  "single_word": false,
178
  "special": false
179
  },
180
+ "151665": {
181
  "content": "J.O.S.I.E.",
182
  "lstrip": false,
183
  "normalized": true,
 
185
  "single_word": false,
186
  "special": false
187
  },
188
+ "151666": {
189
  "content": "Josie",
190
  "lstrip": false,
191
  "normalized": true,
 
193
  "single_word": false,
194
  "special": false
195
  },
196
+ "151667": {
197
  "content": "josie",
198
  "lstrip": false,
199
  "normalized": true,
 
201
  "single_word": false,
202
  "special": false
203
  },
204
+ "151668": {
205
  "content": "Just an Outstandingly Smart and Intelligent Entity",
206
  "lstrip": false,
207
  "normalized": true,
 
212
  },
213
  "additional_special_tokens": [
214
  "<|functions|>",
215
+ "<|system|>",
216
  "<|gökdeniz|>",
217
  "<|user|>",
218
  "<|josie|>",