manaestras commited on
Commit
5791ecf
·
verified ·
1 Parent(s): cbca738

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +6 -13
tokenizer_config.json CHANGED
@@ -1,25 +1,18 @@
1
  {
2
- "added_tokens_decoder": {},
3
- "additional_special_tokens": [
4
- "<|startoftext|>",
5
- "<|extra_0|>",
6
- "<|extra_4|>",
7
- "<|extra_5|>",
8
- "<|eos|>"
9
- ],
10
  "architectures": [
11
  "GPT2LMHeadModel"
12
  ],
 
 
13
  "auto_map": {
14
  "AutoTokenizer": [
15
  "tokenization_hy.HYTokenizer",
16
  null
17
- ]
18
  },
19
- "chat_template": "{% set context = {'has_head': true} %}{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = message['content'] %}{% if loop.index0 == 0 %}{% if content == '' %}{% set _ = context.update({'has_head': false}) %}{% elif message['role'] == 'system' %}{% set content = '<|startoftext|>' + content + '<|extra_4|>' %}{% endif %}{% endif %}{% if message['role'] == 'user' %}{% if loop.index0 == 1 and not context.has_head %}{% set content = '<|startoftext|>' + content %}{% endif %}{% if loop.index0 == 1 and context.has_head %}{% set content = content + '<|extra_0|>' %}{% else %}{% set content = '<|startoftext|>' + content + '<|extra_0|>' %}{% endif %}{% elif message['role'] == 'assistant' %}{% set content = content + '<|eos|>' %}{% endif %}{{ content }}{% endfor %}",
20
- "clean_up_tokenization_spaces": true,
21
- "model_max_length": 1048576,
22
  "model_type": "gpt2",
 
23
  "pad_token": "<|pad|>",
24
- "tokenizer_class": "HYTokenizer"
25
  }
 
1
  {
 
 
 
 
 
 
 
 
2
  "architectures": [
3
  "GPT2LMHeadModel"
4
  ],
5
+ "model_max_length": 1048576,
6
+ "tokenizer_class": "HYTokenizer",
7
  "auto_map": {
8
  "AutoTokenizer": [
9
  "tokenization_hy.HYTokenizer",
10
  null
11
+ ]
12
  },
13
+ "eos_token": "<|eos|>",
 
 
14
  "model_type": "gpt2",
15
+ "additional_special_tokens": ["<|startoftext|>", "<|extra_0|>", "<|extra_4|>", "<|extra_5|>", "<|eos|>"],
16
  "pad_token": "<|pad|>",
17
+ "chat_template": "{% set loop_messages = messages %}\n{% if tools %}\n {% set weekday_map = {'Monday': '星期一', 'Tuesday': '星期二', 'Wednesday': '星期三', 'Thursday': '星期四', 'Friday': '星期五', 'Saturday': '星期六', 'Sunday': '星期日'} %}\n {% set weekday_cn = weekday_map[strftime_now('%A')] %}\n {% set datetime_str = strftime_now('%Y-%m-%d %H:%M:%S') %}\n {% set datetime_str = datetime_str + ' ' + weekday_cn %}\n {% for message in loop_messages %}\n {% if 'content' in message %}\n {% set content = message['content'] %}\n {% else %}\n {% set content = '' %}\n {% endif %}\n {% if loop.index0 == 0 %}\n {% set content_tmp = '你是一位函数组合专家。你会得到一个问题和一组可能的函数。根据问题,你需要进行一个或多个函数/工具调用以实现目的。\n如果没有一个函数可以使用,请直接使用自然语言回复用户,以助手:开头。\n如果给定的问题缺少函数所需的参数,请使用自然语言进行提问,向用户询问必要信息,以助手:开头。\n如果调用结果已经足够回答用户问题,请对历史结果进行总结,使用自然语言回复用户,以助手:开头。\n你应该只在工具调用部分返回函数调用。如果你决定调用任何函数,你必须将其格式化为<tool_calls>[{\"name\": \"func_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}},...]</tool_calls>。你不应该在回复中包含任何其他文本。以下是你可以调用的函数列表,格式为JSON。\n' %}\n {% set content_tmp = content_tmp + '\n' + tools | tojson + '\n' %}\n {% if message['role'] == 'system' %}\n {% set content_tmp = content_tmp + '\n额外要求:\n' + content + '\n\n如果你决定返回函数调用,请将其格式化为<tool_calls>[{\"name\": \"func_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}},...]</tool_calls>,不得包含其他文本。如果额外要求里有格式要求,请忽略,以此处为准。\n否则,请参考开头说的三种情况,以助手:开头进行回复。\n\n如果额外要求里有时间信息,就以额外要求里的时间为准,否则,参考当前时间:' + datetime_str %}\n {% set content = '<|startoftext|>' + content_tmp + '<|extra_4|>' %}\n {% elif message['role'] == 'user' %}\n {% set content_tmp = content_tmp + '\n如果你决定返回函数调用,请将其格式化为<tool_calls>[{\"name\": \"func_name1\", \"arguments\": {\"argument1\": \"value1\", \"argument2\": \"value2\"}},...]</tool_calls>,不得包含其他文本。\n否则,请参考开头说的三种情况,以助手:开头进行回复。\n\n当前时间:' + datetime_str %}\n {% set content_tmp = '<|startoftext|>' + content_tmp + '<|extra_4|>'%}\n {% set content = content_tmp + '用户:' + content + '<|extra_0|>' %}\n {% endif %}\n {% else %}\n {% if message['role'] == 'user' %}\n {% set content = '用户:' + content + '<|extra_0|>' %}\n {% elif message['role'] == 'assistant' %}\n {% if 'tool_calls' in message %}\n {% set tool_calls = message['tool_calls'] %}\n {% set ns = namespace(tool_calls=\"[\") %}\n {% for tool_call in tool_calls %}\n {% set function = tool_call['function'] %}\n {% set name = function['name'] %}\n {% set ns.tool_calls = ns.tool_calls + '{\"name\": \"' + name + '\", '%}\n {% set arguments = function['arguments'] %}\n {% if arguments is not string %}\n {% set arguments = arguments | tojson %}\n {% endif %}\n {% set ns.tool_calls = ns.tool_calls + '\"arguments\": ' + arguments + '}' %}\n {% if not loop.last %}\n {% set ns.tool_calls = ns.tool_calls + ', '%}\n {% endif %}\n {% endfor %}\n {% set ns.tool_calls = ns.tool_calls + ']' %}\n {% set content = content + '<tool_calls>' + ns.tool_calls + '</tool_calls>' %}\n {% else %}\n {% set content = '助手:' + content %}\n {% endif %}\n {% set content = content + '<|eos|>' %}\n {% elif message['role'] == 'tool' %}\n {% if content is not string %}\n {set content = content | tojson }\n {% endif %}\n {% set content = '<tool_response>' + content + '</tool_response>' %}\n {% set content = content + '<|extra_0|>' %}\n {% endif %}\n {% endif %}\n {{- content -}}\n {% endfor %}\n{% else %}\n {% set context = {'has_head': true} %}\n {% for message in loop_messages %}\n {% if 'content' in message %}\n {% set content = message['content'] %}\n {% else %}\n {% set content = '' %}\n {% endif %}\n {% if loop.index0 == 0 %}\n {% if content == '' %}\n {% set _ = context.update({'has_head': false}) %}\n {% elif message['role'] == 'system' %}\n {% set content = '<|startoftext|>' + content + '<|extra_4|>' %}\n {% endif %}\n {% endif %}\n {% if message['role'] == 'user' %}\n {% if loop.index0 == 1 and not context.has_head %}\n {% set content = '<|startoftext|>' + content %}\n {% endif %}\n {% if loop.index0 == 1 and context.has_head %}\n {% set content = content + '<|extra_0|>' %}\n {% else %}\n {% set content = '<|startoftext|>' + content + '<|extra_0|>' %}\n {% endif %}\n {% elif message['role'] == 'assistant' %}\n {% set content = content + '<|eos|>' %}\n {% elif message['role'] == 'tool' %}\n {% set content = content + '<|extra_0|>' %}\n {% endif %}\n {{- content -}}\n {% endfor %}\n{% endif %}\n{%- if enable_thinking is defined and enable_thinking is false %}\n {{- '<think>\\n\\n</think>\\n' }}\n{%- endif %}"
18
  }