nehcgs commited on
Commit
dedd109
1 Parent(s): ffb5d7b

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -2,16 +2,16 @@
2
  license: other
3
  license_name: katanemo-research
4
  license_link: >-
5
- https://huggingface.co/katanemolabs/Arch-Function-1.5B/blob/main/LICENSE
6
  base_model:
7
- - Qwen/Qwen2.5-1.5B-Instruct
8
  language:
9
  - en
10
  pipeline_tag: text-generation
11
  library_name: transformers
12
  ---
13
 
14
- # katanemo/Arch-Function-1.5B
15
 
16
  ## Overview
17
  The Katanemo Arch-Function collection of large language models (LLMs) is a collection state-of-the-art (SOTA) LLMs specifically designed for **function calling** tasks. The models are designed to understand complex function signatures, identify required parameters, and produce accurate function call outputs based on natural language prompts. Achieving performance on par with GPT-4, these models set a new benchmark in the domain of function-oriented tasks, making them suitable for scenarios where automated API interaction and function execution is crucial.
@@ -84,6 +84,17 @@ We evaluate Katanemo Arch-Function series on the [Berkeley Function-Calling Lead
84
  <td>63.41%</td>
85
  <td>82.93%</td>
86
  </tr>
 
 
 
 
 
 
 
 
 
 
 
87
  <tr style="text-align: center; vertical-align: middle;">
88
  <td>6</td>
89
  <td>o1-preview-2024-09-12 (Prompt)</td>
@@ -95,17 +106,6 @@ We evaluate Katanemo Arch-Function series on the [Berkeley Function-Calling Lead
95
  <td>73.17%</td>
96
  <td>74.60%</td>
97
  </tr>
98
- <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
99
- <td> </td>
100
- <td>Arch-Function-7B</td>
101
- <td>58.44%</td>
102
- <td>85.58%</td>
103
- <td>88.14%</td>
104
- <td>69.08%</td>
105
- <td>20.50%</td>
106
- <td>92.68%</td>
107
- <td>74.05%</td>
108
- </tr>
109
  <tr style="text-align: center; vertical-align: middle; ">
110
  <td>9</td>
111
  <td>Gemini-1.5-Flash-002 (Prompt)</td>
@@ -117,6 +117,17 @@ We evaluate Katanemo Arch-Function series on the [Berkeley Function-Calling Lead
117
  <td>85.37%</td>
118
  <td>78.54%</td>
119
  </tr>
 
 
 
 
 
 
 
 
 
 
 
120
  <tr style="text-align: center; vertical-align: middle; ">
121
  <td>12</td>
122
  <td>Claude-3.5-Sonnet-20240620 (FC)</td>
@@ -139,30 +150,17 @@ We evaluate Katanemo Arch-Function series on the [Berkeley Function-Calling Lead
139
  <td>75.61%</td>
140
  <td>49.44%</td>
141
  </tr>
142
- <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
143
- <td> </td>
144
- <td>Arch-Function-3B</td>
145
- <td>56.57%</td>
146
- <td>83.62%</td>
147
- <td>85.36%</td>
148
- <td>66.90%</td>
149
- <td>19.50%</td>
150
- <td>97.56%</td>
151
- <td>70.99%</td>
152
- </tr>
153
- </tr>
154
  <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
155
  <td> </td>
156
  <td>Arch-Function-1.5B</td>
157
- <td>54.52%</td>
158
- <td>80.31%</td>
159
- <td>82.04%</td>
160
- <td>66.19%</td>
161
- <td>17.25%</td>
162
- <td>97.56%</td>
163
- <td>69.95%</td>
164
  </tr>
165
-
166
  <tr style="text-align: center; vertical-align: middle; ">
167
  <td>21</td>
168
  <td>Llama-3.1-70B-Instruct (Prompt)</td>
@@ -189,7 +187,7 @@ We evaluate Katanemo Arch-Function series on the [Berkeley Function-Calling Lead
189
 
190
 
191
  # Requirements
192
- The code of Arch-Function-1.5B has been in the Hugging Face `transformers` library and we advise you to install latest version:
193
  ```bash
194
  pip install transformers>=4.37.0
195
  ```
@@ -205,7 +203,7 @@ import json
205
  from typing import Any, Dict, List
206
  from transformers import AutoModelForCausalLM, AutoTokenizer
207
 
208
- model_name = "katanemo/Arch-Function-1.5B"
209
  model = AutoModelForCausalLM.from_pretrained(
210
  model_name, device_map="auto", torch_dtype="auto", trust_remote_code=True
211
  )
@@ -344,4 +342,4 @@ The current temperature in Seattle is 62 degrees in Fahrenheit.
344
 
345
 
346
  # License
347
- Katanemo Arch-Function collection is distributed under the [Katanemo license](https://huggingface.co/katanemolabs/Arch-Function-1.5B/blob/main/LICENSE).
 
2
  license: other
3
  license_name: katanemo-research
4
  license_link: >-
5
+ https://huggingface.co/katanemolabs/Arch-Function-3B/blob/main/LICENSE
6
  base_model:
7
+ - Qwen/Qwen2.5-Coder-3B-Instruct
8
  language:
9
  - en
10
  pipeline_tag: text-generation
11
  library_name: transformers
12
  ---
13
 
14
+ # katanemo/Arch-Function-3B
15
 
16
  ## Overview
17
  The Katanemo Arch-Function collection of large language models (LLMs) is a collection state-of-the-art (SOTA) LLMs specifically designed for **function calling** tasks. The models are designed to understand complex function signatures, identify required parameters, and produce accurate function call outputs based on natural language prompts. Achieving performance on par with GPT-4, these models set a new benchmark in the domain of function-oriented tasks, making them suitable for scenarios where automated API interaction and function execution is crucial.
 
84
  <td>63.41%</td>
85
  <td>82.93%</td>
86
  </tr>
87
+ <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
88
+ <td> </td>
89
+ <td>Arch-Function-7B</td>
90
+ <td>59.62%</td>
91
+ <td>86.83%</td>
92
+ <td>88.07%</td>
93
+ <td>71.57%</td>
94
+ <td>21.00%</td>
95
+ <td>95.12%</td>
96
+ <td>73.63%</td>
97
+ </tr>
98
  <tr style="text-align: center; vertical-align: middle;">
99
  <td>6</td>
100
  <td>o1-preview-2024-09-12 (Prompt)</td>
 
106
  <td>73.17%</td>
107
  <td>74.60%</td>
108
  </tr>
 
 
 
 
 
 
 
 
 
 
 
109
  <tr style="text-align: center; vertical-align: middle; ">
110
  <td>9</td>
111
  <td>Gemini-1.5-Flash-002 (Prompt)</td>
 
117
  <td>85.37%</td>
118
  <td>78.54%</td>
119
  </tr>
120
+ <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
121
+ <td> </td>
122
+ <td>Arch-Function-3B</td>
123
+ <td>57.69%</td>
124
+ <td>85.19%</td>
125
+ <td>86.18%</td>
126
+ <td>71.21%</td>
127
+ <td>17.50%</td>
128
+ <td>90.24%</td>
129
+ <td>72.88%</td>
130
+ </tr>
131
  <tr style="text-align: center; vertical-align: middle; ">
132
  <td>12</td>
133
  <td>Claude-3.5-Sonnet-20240620 (FC)</td>
 
150
  <td>75.61%</td>
151
  <td>49.44%</td>
152
  </tr>
 
 
 
 
 
 
 
 
 
 
 
 
153
  <tr style="text-align: center; vertical-align: middle; font-weight: bold;">
154
  <td> </td>
155
  <td>Arch-Function-1.5B</td>
156
+ <td>56.20%</td>
157
+ <td>84.40%</td>
158
+ <td>83.96%</td>
159
+ <td>69.36%</td>
160
+ <td>15.88%</td>
161
+ <td>87.80%</td>
162
+ <td>74.39%</td>
163
  </tr>
 
164
  <tr style="text-align: center; vertical-align: middle; ">
165
  <td>21</td>
166
  <td>Llama-3.1-70B-Instruct (Prompt)</td>
 
187
 
188
 
189
  # Requirements
190
+ The code of Arch-Function-3B has been in the Hugging Face `transformers` library and we advise you to install latest version:
191
  ```bash
192
  pip install transformers>=4.37.0
193
  ```
 
203
  from typing import Any, Dict, List
204
  from transformers import AutoModelForCausalLM, AutoTokenizer
205
 
206
+ model_name = "katanemo/Arch-Function-3B"
207
  model = AutoModelForCausalLM.from_pretrained(
208
  model_name, device_map="auto", torch_dtype="auto", trust_remote_code=True
209
  )
 
342
 
343
 
344
  # License
345
+ Katanemo Arch-Function collection is distributed under the [Katanemo license](https://huggingface.co/katanemolabs/Arch-Function-3B/blob/main/LICENSE).
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "Qwen/Qwen2.5-3B-Instruct",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
@@ -11,7 +11,7 @@
11
  "initializer_range": 0.02,
12
  "intermediate_size": 11008,
13
  "max_position_embeddings": 32768,
14
- "max_window_layers": 70,
15
  "model_type": "qwen2",
16
  "num_attention_heads": 16,
17
  "num_hidden_layers": 36,
 
1
  {
2
+ "_name_or_path": "Qwen/Qwen2.5-Coder-3B-Instruct",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
 
11
  "initializer_range": 0.02,
12
  "intermediate_size": 11008,
13
  "max_position_embeddings": 32768,
14
+ "max_window_layers": 36,
15
  "model_type": "qwen2",
16
  "num_attention_heads": 16,
17
  "num_hidden_layers": 36,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d13942083f5a22837c4b9bd5341221f18f5303f12a53afadaceea02ac79b771
3
  size 4957560304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:560e2820aa53df64f7f7ec6691d7de5ed00aacb33e9c0beaed86a92a60e25513
3
  size 4957560304
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1445b032668c261f394138703eef06d2ec97c1eb568fe5c00ed8494158564c93
3
  size 1214366696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d924cf6711bbab2e8458aab67f62c718f1264cfa9b4ed305cbea925762e2adab
3
  size 1214366696
tokenizer_config.json CHANGED
@@ -199,9 +199,9 @@
199
  "clean_up_tokenization_spaces": false,
200
  "eos_token": "<|im_end|>",
201
  "errors": "replace",
202
- "model_max_length": 131072,
203
  "pad_token": "<|endoftext|>",
204
  "split_special_tokens": false,
205
  "tokenizer_class": "Qwen2Tokenizer",
206
  "unk_token": null
207
- }
 
199
  "clean_up_tokenization_spaces": false,
200
  "eos_token": "<|im_end|>",
201
  "errors": "replace",
202
+ "model_max_length": 32768,
203
  "pad_token": "<|endoftext|>",
204
  "split_special_tokens": false,
205
  "tokenizer_class": "Qwen2Tokenizer",
206
  "unk_token": null
207
+ }