Guilherme34
commited on
Commit
•
46f2550
1
Parent(s):
d181e03
Update tokenizer_config.json
Browse files- tokenizer_config.json +13 -14
tokenizer_config.json
CHANGED
@@ -2049,17 +2049,16 @@
|
|
2049 |
"special": true
|
2050 |
}
|
2051 |
},
|
2052 |
-
|
2053 |
-
|
2054 |
-
|
2055 |
-
|
2056 |
-
|
2057 |
-
|
2058 |
-
|
2059 |
-
|
2060 |
-
|
2061 |
-
|
2062 |
-
|
2063 |
-
|
2064 |
-
|
2065 |
-
}
|
|
|
2049 |
"special": true
|
2050 |
}
|
2051 |
},
|
2052 |
+
"bos_token": "<|begin_of_text|>",
|
2053 |
+
"chat_template": "{{- bos_token }}\n{%- for message in messages %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n' }}\n {{- message['content'] + eos_token + '\n' }}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\n' }}\n{%- endif %}\n{%- if add_reasoning_prompt %}\n {{- '<|start_header_id|>reasoning<|end_header_id|>\n' }}\n{%- endif %}",
|
2054 |
+
"clean_up_tokenization_spaces": true,
|
2055 |
+
"eos_token": "<|eot_id|>",
|
2056 |
+
"model_input_names": [
|
2057 |
+
"input_ids",
|
2058 |
+
"attention_mask"
|
2059 |
+
],
|
2060 |
+
"model_max_length": 131072,
|
2061 |
+
"pad_token": "<|eot_id|>",
|
2062 |
+
"padding_side": "right",
|
2063 |
+
"tokenizer_class": "PreTrainedTokenizerFast"
|
2064 |
+
}
|
|