Pinkstack commited on
Commit
f28859a
·
verified ·
1 Parent(s): 3ff48e9

updated tokenizer to work better

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +33 -1
tokenizer_config.json CHANGED
@@ -136,10 +136,42 @@
136
  "rstrip": false,
137
  "single_word": false,
138
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  }
140
  },
141
  "bos_token": "<|im_start|>",
142
- "chat_template": "{% if message['role'] == 'system' %}<model_identity>Your name is Superthoughts lite by Pinkstack. You are an open weights AI model released in 2025 with built-in information up to 2024.</model_identity>\n{% endif %}{% if 'role' in messages[0] %}{% for message in messages %}{% if message['role'] == 'user' %}{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|im_start|>assistant\n<think>\n' + message['thinking_content'] + '</think>\n<output>\n' + message['content'] + '</output>\n<|im_end|>\n' }}{% else %}{{ '<|im_start|>system\n' + message['content'] + '<|im_end|>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}{% else %}{% for message in messages %}{% if message['from'] == 'human' %}{{'<|im_start|>user\n' + message['value'] + '<|im_end|>\n'}}{% elif message['from'] == 'assistant' %}{{'<|im_start|>assistant\n<think>\n' + message['thinking_value'] + '</think>\n<output>\n' + message['value'] + '</output>\n<|im_end|>\n' }}{% else %}{{ '<|im_start|>system\n' + message['value'] + '<|im_end|>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}{% endif %}",
143
  "clean_up_tokenization_spaces": false,
144
  "eos_token": "<|im_end|>",
145
  "extra_special_tokens": {},
 
136
  "rstrip": false,
137
  "single_word": false,
138
  "special": true
139
+ },
140
+ "17": {
141
+ "content": "<think>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "18": {
149
+ "content": "</think>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "19": {
157
+ "content": "<output>",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "20": {
165
+ "content": "</output>",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
  }
172
  },
173
  "bos_token": "<|im_start|>",
174
+ "chat_template": "{% if 'role' in messages[0] %}{% for message in messages %}{% if message['role'] == 'user' %}{{'<|im_start|>user\n' + message['content'] + '<|im_end|>\n'}}{% elif message['role'] == 'assistant' %}{{'<|im_start|>assistant\n<think>\n' + message['content'] + '\n</think>\n<output>\n' + message['content'] + '\n</output><|im_end|>\n' }}{% else %}{{ '<|im_start|>system\n' + message['content'] + '<|im_end|>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n<think>\n' }}{% endif %}{% else %}{% for message in messages %}{% if message['from'] == 'human' %}{{'<|im_start|>user\n' + message['value'] + '<|im_end|>\n'}}{% elif message['from'] == 'gpt' %}{{'<|im_start|>assistant\n<think>\n' + message['value'] + '\n</think>\n<output>\n' + message['value'] + '\n</output><|im_end|>\n' }}{% else %}{{ '<|im_start|>system\n' + message['value'] + '<|im_end|>\n' }}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n<think>\n' }}{% endif %}{% endif %}",
175
  "clean_up_tokenization_spaces": false,
176
  "eos_token": "<|im_end|>",
177
  "extra_special_tokens": {},