update functioncall

#16

by kamuy-shennai - opened 30 days ago

base: refs/heads/main

←

from: refs/pr/16

Discussion Files changed

+173

-107

This PR is in draft mode

Files changed (3) hide show

docs/function_call_guide.md +86 -53
docs/function_call_guide_cn.md +86 -53
tokenizer_config.json +1 -1

docs/function_call_guide.md CHANGED Viewed

@@ -18,21 +18,19 @@ from transformers import AutoTokenizer
 def get_default_tools():
     return [
         {
-          {
-            "name": "get_current_weather",
-            "description": "Get the latest weather for a location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "A certain city, such as Beijing, Shanghai"
-                    }
-                },
-            }
-            "required": ["location"],
-            "type": "object"
           }
         }
     ]
@@ -54,6 +52,22 @@ text = tokenizer.apply_chat_template(
     add_generation_prompt=True,
     tools=tools
 )
 ```
 ## 🛠️ Function Call Definition
@@ -102,9 +116,9 @@ Function calls need to be defined in the `tools` field of the request body. Each
 When processed internally by the model, function definitions are converted to a special format and concatenated to the input text:
 ```
-]~!b[]~b]system ai_setting=MiniMax AI
-MiniMax AI is an AI assistant independently developed by MiniMax. [e~[
-]~b]system tool_setting=tools
 You are provided with these tools:
 <tools>
 {"name": "search_web", "description": "Search function.", "parameters": {"properties": {"query_list": {"description": "Keywords for search, with list element count of 1.", "items": {"type": "string"}, "type": "array"}, "query_tag": {"description": "Classification of the query", "items": {"type": "string"}, "type": "array"}}, "required": ["query_list", "query_tag"], "type": "object"}}
@@ -114,10 +128,10 @@ If you need to call tools, please respond with <tool_calls></tool_calls> XML tag
 <tool_calls>
 {"name": <tool-name>, "arguments": <args-json-object>}
 ...
-</tool_calls>[e~[
-]~b]user name=User
-When were the most recent launch events for OpenAI and Gemini?[e~[
-]~b]ai name=MiniMax AI
 ```
 ### Model Output Format
@@ -193,23 +207,33 @@ def execute_function_call(function_name: str, arguments: dict):
         # Build function execution result
         return {
             "role": "tool",
-            "name": function_name,
-            "content": json.dumps({
-                "location": location,
-                "temperature": "25",
-                "unit": "celsius",
-                "weather": "Sunny"
-            }, ensure_ascii=False)
-        }
     elif function_name == "search_web":
         query_list = arguments.get("query_list", [])
         query_tag = arguments.get("query_tag", [])
         # Simulate search results
         return {
             "role": "tool",
-            "name": function_name,
-            "content": f"Search keywords: {query_list}, Categories: {query_tag}\nSearch results: Relevant information found"
-        }
     return None
 ```
@@ -224,47 +248,56 @@ If the model decides to call `search_web`, we suggest you to return the function
 ```json
 {
-  "data": [
-     {
-       "role": "tool",
-       "name": "search_web",
-       "content": "search_result"
-     }
   ]
 }
 ```
 Corresponding model input format:
 ```
-]~b]tool name=search_web
-search_result[e~[
 ```
 #### Multiple Result
-If the model decides to call `search_web` and `get_current_weather` at the same time, we suggest you to return the multiple function results in the following format, with the `name` field set to "tools", and use the `content` field to contain multiple results.
 ```json
 {
-  "data": [
-     {
-       "role": "tool",
-       "name": "tools",
-       "content": "Tool name: search_web\nTool result: test_result1\n\nTool name: get_current_weather\nTool result: test_result2"
-     }
   ]
 }
 ```
 Corresponding model input format:
 ```
-]~b]tool name=tools
-Tool name: search_web
-Tool result: test_result1
-Tool name: get_current_weather
-Tool result: test_result2[e~[
 ```
-While we suggest following the above formats, as long as the model input is easy to understand, the specific values of `name` and `content` is entirely up to the caller.

 def get_default_tools():
     return [
         {
+          "name": "get_current_weather",
+          "description": "Get the latest weather for a location",
+          "parameters": {
+              "type": "object",
+              "properties": {
+                  "location": {
+                      "type": "string",
+                      "description": "A certain city, such as Beijing, Shanghai"
+                  }
+              },
           }
+          "required": ["location"],
+          "type": "object"
         }
     ]
     add_generation_prompt=True,
     tools=tools
 )
+# Post request
+import requests
+payload = {
+    "model": "MiniMaxAI/MiniMax-M1-40k",
+    "prompt": text,
+    "max_tokens": 4000
+}
+response = requests.post(
+                          "http://localhost:8000/v1/completions",
+                          headers={"Content-Type": "application/json"},
+                          json=payload,
+                          stream=False,
+                        )
+print(response.json()["choices"][0]["text"])
 ```
 ## 🛠️ Function Call Definition
 When processed internally by the model, function definitions are converted to a special format and concatenated to the input text:
 ```
+<begin_of_document><beginning_of_sentence>system ai_setting=MiniMax AI
+MiniMax AI is an AI assistant independently developed by MiniMax. <end_of_sentence>
+<beginning_of_sentence>system tool_setting=tools
 You are provided with these tools:
 <tools>
 {"name": "search_web", "description": "Search function.", "parameters": {"properties": {"query_list": {"description": "Keywords for search, with list element count of 1.", "items": {"type": "string"}, "type": "array"}, "query_tag": {"description": "Classification of the query", "items": {"type": "string"}, "type": "array"}}, "required": ["query_list", "query_tag"], "type": "object"}}
 <tool_calls>
 {"name": <tool-name>, "arguments": <args-json-object>}
 ...
+</tool_calls><end_of_sentence>
+<beginning_of_sentence>user name=User
+When were the most recent launch events for OpenAI and Gemini?<end_of_sentence>
+<beginning_of_sentence>ai name=MiniMax AI
 ```
 ### Model Output Format
         # Build function execution result
         return {
             "role": "tool",
+            "content": [
+              {
+                "name": function_name,
+                "type": "text",
+                "text": json.dumps({
+                    "location": location,
+                    "temperature": "25",
+                    "unit": "celsius",
+                    "weather": "Sunny"
+                }, ensure_ascii=False)
+              }
+            ]
+          }
     elif function_name == "search_web":
         query_list = arguments.get("query_list", [])
         query_tag = arguments.get("query_tag", [])
         # Simulate search results
         return {
             "role": "tool",
+            "content": [
+              {
+                "name": function_name,
+                "type": "text",
+                "text": f"Search keywords: {query_list}, Categories: {query_tag}\nSearch results: Relevant information found"
+              }
+            ]
+          }
     return None
 ```
 ```json
 {
+  "role": "tool",
+  "content": [
+    {
+      "name": "search_web",
+      "type": "text",
+      "text": "test_result"
+    }
   ]
 }
 ```
 Corresponding model input format:
 ```
+<beginning_of_sentence>tool name=tools
+tool name: search_web
+tool result: test_result
+<end_of_sentence>
 ```
 #### Multiple Result
+If the model decides to call `search_web` and `get_current_weather` at the same time, we suggest you to return the multiple function results in the following format, use the `content` field to contain multiple results.
 ```json
 {
+  "role": "tool",
+  "content": [
+    {
+      "name": "search_web",
+      "type": "text",
+      "text": "test_result1"
+    },
+    {
+      "name": "get_current_weather",
+      "type": "text",
+      "text": "test_result2"
+    }
   ]
 }
 ```
 Corresponding model input format:
 ```
+<beginning_of_sentence>tool name=tools
+tool name: search_web
+tool result: test_result1
+tool name: get_current_weather
+tool result: test_result2<end_of_sentence>
 ```
+While we suggest following the above formats, as long as the model input is easy to understand, the specific values of `name` and `text` is entirely up to the caller.

docs/function_call_guide_cn.md CHANGED Viewed

@@ -16,21 +16,19 @@ from transformers import AutoTokenizer
 def get_default_tools():
     return [
         {
-          {
-            "name": "get_current_weather",
-            "description": "Get the latest weather for a location",
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "A certain city, such as Beijing, Shanghai"
-                    }
-                },
-            }
-            "required": ["location"],
-            "type": "object"
           }
         }
     ]
@@ -52,6 +50,22 @@ text = tokenizer.apply_chat_template(
     add_generation_prompt=True,
     tools=tools
 )
 ```
 ## 🛠️ 函数调用的定义
@@ -100,9 +114,9 @@ text = tokenizer.apply_chat_template(
 在模型内部处理时，函数定义会被转换为特殊格式并拼接到输入文本中：
 ```
-]~!b[]~b]system ai_setting=MiniMax AI
-MiniMax AI是由上海稀宇科技有限公司（MiniMax）自主研发的AI助理。[e~[
-]~b]system tool_setting=tools
 You are provided with these tools:
 <tools>
 {"name": "search_web", "description": "搜索函数。", "parameters": {"properties": {"query_list": {"description": "进行搜索的关键词，列表元素个数为1。", "items": {"type": "string"}, "type": "array"}, "query_tag": {"description": "query的分类", "items": {"type": "string"}, "type": "array"}}, "required": ["query_list", "query_tag"], "type": "object"}}
@@ -112,10 +126,10 @@ If you need to call tools, please respond with <tool_calls></tool_calls> XML tag
 <tool_calls>
 {"name": <tool-name>, "arguments": <args-json-object>}
 ...
-</tool_calls>[e~[
-]~b]user name=用户
-OpenAI 和 Gemini 的最近一次发布会都是什么时候?[e~[
-]~b]ai name=MiniMax AI
 ```
 ### 模型输出格式
@@ -191,23 +205,33 @@ def execute_function_call(function_name: str, arguments: dict):
         # 构建函数执行结果
         return {
             "role": "tool",
-            "name": function_name,
-            "content": json.dumps({
-                "location": location,
-                "temperature": "25",
-                "unit": "celsius",
-                "weather": "晴朗"
-            }, ensure_ascii=False)
-        }
     elif function_name == "search_web":
         query_list = arguments.get("query_list", [])
         query_tag = arguments.get("query_tag", [])
         # 模拟搜索结果
         return {
             "role": "tool",
-            "name": function_name,
-            "content": f"搜索关键词: {query_list}, 分类: {query_tag}\n搜索结果: 相关信息已找到"
-        }
     return None
 ```
@@ -222,46 +246,55 @@ def execute_function_call(function_name: str, arguments: dict):
 ```json
 {
-  "data": [
-     {
-       "role": "tool",
-       "name": "search_web",
-       "content": "search_result"
-     }
   ]
 }
 ```
 对应如下的模型输入格式：
 ```
-]~b]tool name=search_web
-search_result[e~[
 ```
 #### 多个结果
-假如模型同时调用了 `search_web` 和 `get_current_weather` 函数，您可以参考如下格式添加执行结果，`name` 字段为"tools"，`content`包含多个结果。
 ```json
 {
-  "data": [
-     {
-       "role": "tool",
-       "name": "tools",
-       "content": "Tool name: search_web\nTool result: test_result1\n\nTool name: get_current_weather\nTool result: test_result2"
-     }
   ]
 }
 ```
 对应如下的模型输入格式：
 ```
-]~b]tool name=tools
-Tool name: search_web
-Tool result: test_result1
-Tool name: get_current_weather
-Tool result: test_result2[e~[
 ```
-虽然我们建议您参考以上格式，但只要返回给模型的输入易于理解，`name` 和 `content` 的具体内容完全由您自主决定。

 def get_default_tools():
     return [
         {
+          "name": "get_current_weather",
+          "description": "Get the latest weather for a location",
+          "parameters": {
+              "type": "object",
+              "properties": {
+                  "location": {
+                      "type": "string",
+                      "description": "A certain city, such as Beijing, Shanghai"
+                  }
+              },
           }
+          "required": ["location"],
+          "type": "object"
         }
     ]
     add_generation_prompt=True,
     tools=tools
 )
+# 发送请求
+import requests
+payload = {
+    "model": "MiniMaxAI/MiniMax-M1-40k",
+    "prompt": text,
+    "max_tokens": 4000
+}
+response = requests.post(
+                          "http://localhost:8000/v1/completions",
+                          headers={"Content-Type": "application/json"},
+                          json=payload,
+                          stream=False,
+                        )
+print(response.json()["choices"][0]["text"])
 ```
 ## 🛠️ 函数调用的定义
 在模型内部处理时，函数定义会被转换为特殊格式并拼接到输入文本中：
 ```
+<begin_of_document><beginning_of_sentence>system ai_setting=MiniMax AI
+MiniMax AI是由上海稀宇科技有限公司（MiniMax）自主研发的AI助理。<end_of_sentence>
+<beginning_of_sentence>system tool_setting=tools
 You are provided with these tools:
 <tools>
 {"name": "search_web", "description": "搜索函数。", "parameters": {"properties": {"query_list": {"description": "进行搜索的关键词，列表元素个数为1。", "items": {"type": "string"}, "type": "array"}, "query_tag": {"description": "query的分类", "items": {"type": "string"}, "type": "array"}}, "required": ["query_list", "query_tag"], "type": "object"}}
 <tool_calls>
 {"name": <tool-name>, "arguments": <args-json-object>}
 ...
+</tool_calls><end_of_sentence>
+<beginning_of_sentence>user name=用户
+OpenAI 和 Gemini 的最近一次发布会都是什么时候?<end_of_sentence>
+<beginning_of_sentence>ai name=MiniMax AI
 ```
 ### 模型输出格式
         # 构建函数执行结果
         return {
             "role": "tool",
+            "content": [
+              {
+                "name": function_name,
+                "type": "text",
+                "text": json.dumps({
+                    "location": location,
+                    "temperature": "25",
+                    "unit": "celsius",
+                    "weather": "晴朗"
+                }, ensure_ascii=False)
+              }
+            ]
+          }
     elif function_name == "search_web":
         query_list = arguments.get("query_list", [])
         query_tag = arguments.get("query_tag", [])
         # 模拟搜索结果
         return {
             "role": "tool",
+            "content": [
+              {
+                "name": function_name,
+                "type": "text",
+                "text": f"搜索关键词: {query_list}, 分类: {query_tag}\n搜索结果: 相关信息已找到"
+              }
+            ]
+          }
     return None
 ```
 ```json
 {
+  "role": "tool",
+  "content": [
+    {
+      "name": "search_web",
+      "type": "text",
+      "text": "test_result"
+    }
   ]
 }
 ```
 对应如下的模型输入格式：
 ```
+<beginning_of_sentence>tool name=tools
+tool name: search_web
+tool result: test_result
+<end_of_sentence>
 ```
 #### 多个结果
+假如模型同时调用了 `search_web` 和 `get_current_weather` 函数，您可以参考如下格式添加执行结果，`content`包含多个结果。
 ```json
 {
+  "role": "tool",
+  "content": [
+    {
+      "name": "search_web",
+      "type": "text",
+      "text": "test_result1"
+    },
+    {
+      "name": "get_current_weather",
+      "type": "text",
+      "text": "test_result2"
+    }
   ]
 }
 ```
 对应如下的模型输入格式：
 ```
+<beginning_of_sentence>tool name=tools
+tool name: search_web
+tool result: test_result1
+tool name: get_current_weather
+tool result: test_result2<end_of_sentence>
 ```
+虽然我们建议您参考以上格式，但只要返回给模型的输入易于理解，`name` 和 `text` 的具体内容完全由您自主决定。

tokenizer_config.json CHANGED Viewed

@@ -6,5 +6,5 @@
   "model_max_length": 40960000,
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<end_of_document>",
-  "chat_template": "{{ '<begin_of_document>' -}}{% set ns = namespace(system_prompt='') -%}{% for message in messages -%}{% if message['role'] == 'system' -%}{% set ns.system_prompt = ns.system_prompt + message['content'][0]['text'] -%}{% endif -%}{%- endfor -%}{% if ns.system_prompt != '' -%}{{ '<beginning_of_sentence>system ai_setting=assistant\n' + ns.system_prompt + '<end_of_sentence>\n' -}}{%- endif -%}{% if tools -%}{{ '<beginning_of_sentence>system tool_setting=tools\nYou are provided with these tools:\n<tools>\n' -}}{% for tool in tools -%}{{ tool | tojson ~ '\n' -}}{%- endfor -%}{{ '</tools>\n\nIf you need to call tools, please respond with <tool_calls></tool_calls> XML tags, and provide tool-name and json-object of arguments, following the format below:\n<tool_calls>\n{''name'': <tool-name-1>, ''arguments'': <args-json-object-1>}\n...\n</tool_calls><end_of_sentence>\n' -}}{%- endif -%}{% for message in messages -%}{% if message['role'] == 'user' -%}{{ '<beginning_of_sentence>user name=user\n' + message['content'][0]['text'] + '<end_of_sentence>\n' -}}{% elif message['role'] == 'assistant' -%}{{ '<beginning_of_sentence>ai name=assistant\n' -}}{% for content in message['content'] | selectattr('type', 'equalto', 'text') -%}{{ content['text'] -}}{%- endfor -%}{{ '<end_of_sentence>\n' -}}{% elif message['role'] == 'tool' -%}{{ '<beginning_of_sentence>tool name=tools\n' }} {%- for content in message['content'] -%}{{- 'tool name: ' + content['name'] + '\n' + 'tool result: ' + content['text'] + '\n\n' -}} {%- endfor -%}{{- '<end_of_sentence>\n' -}}{% endif -%}{%- endfor -%}{% if add_generation_prompt -%}{{ '<beginning_of_sentence>ai name=assistant\n' -}}{%- endif -%}"
 }

   "model_max_length": 40960000,
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<end_of_document>",
+  "chat_template": "{{ '<begin_of_document>' -}}{% set ns = namespace(system_prompt='') -%}{% for message in messages -%}{% if message['role'] == 'system' -%}{% set ns.system_prompt = ns.system_prompt + message['content'][0]['text'] -%}{% endif -%}{%- endfor -%}{% if ns.system_prompt != '' -%}{{ '<beginning_of_sentence>system ai_setting=assistant\n' + ns.system_prompt + '<end_of_sentence>\n' -}}{%- endif -%}{% if tools -%}{{ '<beginning_of_sentence>system tool_setting=tools\nYou are provided with these tools:\n<tools>\n' -}}{% for tool in tools -%}{{ tool | tojson ~ '\n' -}}{%- endfor -%}{{ '</tools>\n\nIf you need to call tools, please respond with <tool_calls></tool_calls> XML tags, and provide tool-name and json-object of arguments, following the format below:\n<tool_calls>\n{''\"name\"'': <tool-name-1>, ''\"arguments\"'': <args-json-object-1>}\n...\n</tool_calls><end_of_sentence>\n' -}}{%- endif -%}{% for message in messages -%}{% if message['role'] == 'user' -%}{{ '<beginning_of_sentence>user name=user\n' + message['content'][0]['text'] + '<end_of_sentence>\n' -}}{% elif message['role'] == 'assistant' -%}{{ '<beginning_of_sentence>ai name=assistant\n' -}}{% for content in message['content'] | selectattr('type', 'equalto', 'text') -%}{{ content['text'] -}}{%- endfor -%}{{ '<end_of_sentence>\n' -}}{% elif message['role'] == 'tool' -%}{{ '<beginning_of_sentence>tool name=tools\n' }} {%- for content in message['content'] -%}{{- 'tool name: ' + content['name'] + '\n' + 'tool result: ' + content['text'] + '\n\n' -}} {%- endfor -%}{{- '<end_of_sentence>\n' -}}{% endif -%}{%- endfor -%}{% if add_generation_prompt -%}{{ '<beginning_of_sentence>ai name=assistant\n' -}}{%- endif -%}"
 }