bullerwins commited on
Commit
f7c2394
·
verified ·
1 Parent(s): fd19256

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ DeepSeek-V2.5-Q5_K_M-00001-of-00004.gguf filter=lfs diff=lfs merge=lfs -text
37
+ DeepSeek-V2.5-Q5_K_M-00002-of-00004.gguf filter=lfs diff=lfs merge=lfs -text
38
+ DeepSeek-V2.5-Q5_K_M-00003-of-00004.gguf filter=lfs diff=lfs merge=lfs -text
39
+ DeepSeek-V2.5-Q5_K_M-00004-of-00004.gguf filter=lfs diff=lfs merge=lfs -text
40
+ DeepSeek-V2.5-Q5_K_S-00001-of-00004.gguf filter=lfs diff=lfs merge=lfs -text
41
+ DeepSeek-V2.5-Q5_K_S-00002-of-00004.gguf filter=lfs diff=lfs merge=lfs -text
42
+ DeepSeek-V2.5-Q5_K_S-00003-of-00004.gguf filter=lfs diff=lfs merge=lfs -text
43
+ DeepSeek-V2.5-Q5_K_S-00004-of-00004.gguf filter=lfs diff=lfs merge=lfs -text
44
+ DeepSeek-V2.5-Q6_K-00001-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
45
+ DeepSeek-V2.5-Q6_K-00002-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
46
+ DeepSeek-V2.5-Q6_K-00003-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
47
+ DeepSeek-V2.5-Q6_K-00004-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
48
+ DeepSeek-V2.5-Q6_K-00005-of-00005.gguf filter=lfs diff=lfs merge=lfs -text
DeepSeek-V2.5-Q5_K_M-00001-of-00004.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5ee071ea3063c91d633edd0634153f85bfc121098617c969968569896f6ed63
3
+ size 44565329568
DeepSeek-V2.5-Q5_K_M-00002-of-00004.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63e0e2266a57932f9d0ea83790261bba929a20c7a8c25aa39fba0b8ded0bd5db
3
+ size 44583395680
DeepSeek-V2.5-Q5_K_M-00003-of-00004.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f5e8f435ee43086f6f2bddd125a70869942a7ca3827e19f1e5f62f6e08b9925
3
+ size 44752601440
DeepSeek-V2.5-Q5_K_M-00004-of-00004.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be12ff96f09317c0b7acf1c1312575ac633a7f7768ae4af6bf7fdf3b422ff726
3
+ size 33322462688
DeepSeek-V2.5-Q5_K_S-00001-of-00004.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18798154d359da50c6ea3950c8eebe68574cfe9c8bd907eda5463b7198f4069e
3
+ size 44902632032
DeepSeek-V2.5-Q5_K_S-00002-of-00004.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1455995ed610a396dd6f7e942b6c8236d8bfa341f73e5706b515f4486ae2a5be
3
+ size 44602442144
DeepSeek-V2.5-Q5_K_S-00003-of-00004.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee60022e1b4fdcc4ffb29c413017b8741109ea8a7c9d5e8a695ee9fec73d63c9
3
+ size 44602442144
DeepSeek-V2.5-Q5_K_S-00004-of-00004.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87934d3f12c68fe4dbe2974bf4969a8281e2594f7c1de2c3b4ed766e09a3093a
3
+ size 28200950144
DeepSeek-V2.5-Q6_K-00001-of-00005.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8cbf64bfc5f8e05a556b8b709e9dc52fd20dfa90fabdf346ce8e948c9cd1e97
3
+ size 44731344000
DeepSeek-V2.5-Q6_K-00002-of-00005.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c25ba60e66700249fedd5bc7f83a33bb8ee4dcc86c40ddab05154f622c9e704a
3
+ size 44622112000
DeepSeek-V2.5-Q6_K-00003-of-00005.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fe525e1c413a1d6253b5c52823297311e5c8b7008604d3c9d00eca043d3844c
3
+ size 44457666496
DeepSeek-V2.5-Q6_K-00004-of-00005.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5850d2867b160eae8427284317cd59d0843d3470185d59fc54972a04b99f03ab
3
+ size 44622112000
DeepSeek-V2.5-Q6_K-00005-of-00005.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5249436db64b2e81c5bf9879094536f5993bb38c4cd0b956b469fe7a7a196456
3
+ size 15108491648
README.md ADDED
@@ -0,0 +1,278 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ license_name: deepseek
4
+ license_link: https://github.com/deepseek-ai/DeepSeek-V2/blob/main/LICENSE-MODEL
5
+ base_model: deepseek-ai/DeepSeek-V2.5
6
+ ---
7
+
8
+ <!-- markdownlint-disable first-line-h1 -->
9
+ <!-- markdownlint-disable html -->
10
+ <!-- markdownlint-disable no-duplicate-header -->
11
+
12
+ <div align="center">
13
+ <img src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/logo.svg?raw=true" width="60%" alt="DeepSeek-V2" />
14
+ </div>
15
+ <hr>
16
+ <div align="center" style="line-height: 1;">
17
+ <a href="https://www.deepseek.com/" target="_blank" style="margin: 2px;">
18
+ <img alt="Homepage" src="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/badge.svg?raw=true" style="display: inline-block; vertical-align: middle;"/>
19
+ </a>
20
+ <a href="https://chat.deepseek.com/" target="_blank" style="margin: 2px;">
21
+ <img alt="Chat" src="https://img.shields.io/badge/🤖%20Chat-DeepSeek%20V2-536af5?color=536af5&logoColor=white" style="display: inline-block; vertical-align: middle;"/>
22
+ </a>
23
+ <a href="https://huggingface.co/deepseek-ai" target="_blank" style="margin: 2px;">
24
+ <img alt="Hugging Face" src="https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-DeepSeek%20AI-ffc107?color=ffc107&logoColor=white" style="display: inline-block; vertical-align: middle;"/>
25
+ </a>
26
+ </div>
27
+
28
+ <div align="center" style="line-height: 1;">
29
+ <a href="https://discord.gg/Tc7c45Zzu5" target="_blank" style="margin: 2px;">
30
+ <img alt="Discord" src="https://img.shields.io/badge/Discord-DeepSeek%20AI-7289da?logo=discord&logoColor=white&color=7289da" style="display: inline-block; vertical-align: middle;"/>
31
+ </a>
32
+ <a href="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/figures/qr.jpeg?raw=true" target="_blank" style="margin: 2px;">
33
+ <img alt="Wechat" src="https://img.shields.io/badge/WeChat-DeepSeek%20AI-brightgreen?logo=wechat&logoColor=white" style="display: inline-block; vertical-align: middle;"/>
34
+ </a>
35
+ <a href="https://twitter.com/deepseek_ai" target="_blank" style="margin: 2px;">
36
+ <img alt="Twitter Follow" src="https://img.shields.io/badge/Twitter-deepseek_ai-white?logo=x&logoColor=white" style="display: inline-block; vertical-align: middle;"/>
37
+ </a>
38
+ </div>
39
+
40
+ <div align="center" style="line-height: 1;">
41
+ <a href="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/LICENSE-CODE" style="margin: 2px;">
42
+ <img alt="Code License" src="https://img.shields.io/badge/Code_License-MIT-f5de53?&color=f5de53" style="display: inline-block; vertical-align: middle;"/>
43
+ </a>
44
+ <a href="https://github.com/deepseek-ai/DeepSeek-V2/blob/main/LICENSE-MODEL" style="margin: 2px;">
45
+ <img alt="Model License" src="https://img.shields.io/badge/Model_License-Model_Agreement-f5de53?&color=f5de53" style="display: inline-block; vertical-align: middle;"/>
46
+ </a>
47
+ </div>
48
+
49
+ <p align="center">
50
+ <a href="https://arxiv.org/abs/2405.04434"><b>Paper Link</b>👁️</a>
51
+ </p>
52
+
53
+ GGUF quantized version with llama.cpp
54
+
55
+ Original model [deepseek-ai/DeepSeek-V2.5](https://huggingface.co/deepseek-ai/DeepSeek-V2.5)
56
+
57
+ # DeepSeek-V2.5
58
+
59
+ ## 1. Introduction
60
+
61
+ DeepSeek-V2.5 is an upgraded version that combines DeepSeek-V2-Chat and DeepSeek-Coder-V2-Instruct. The new model integrates the general and coding abilities of the two previous versions.
62
+ For model details, please visit [DeepSeek-V2 page](https://github.com/deepseek-ai/DeepSeek-V2) for more information.
63
+
64
+ DeepSeek-V2.5 better aligns with human preferences and has been optimized in various aspects, including writing and instruction following:
65
+
66
+ | Metric | DeepSeek-V2-0628 | DeepSeek-Coder-V2-0724 | DeepSeek-V2.5 |
67
+ |:-----------------------|:-----------------|:-----------------------|:--------------|
68
+ | AlpacaEval 2.0 | 46.6 | 44.5 | 50.5 |
69
+ | ArenaHard | 68.3 | 66.3 | 76.2 |
70
+ | AlignBench | 7.88 | 7.91 | 8.04 |
71
+ | MT-Bench | 8.85 | 8.91 | 9.02 |
72
+ | HumanEval python | 84.5 | 87.2 | 89 |
73
+ | HumanEval Multi | 73.8 | 74.8 | 73.8 |
74
+ | LiveCodeBench(01-09) | 36.6 | 39.7 | 41.8 |
75
+ | Aider | 69.9 | 72.9 | 72.2 |
76
+ | SWE-verified | N/A | 19 | 16.8 |
77
+ | DS-FIM-Eval | N/A | 73.2 | 78.3 |
78
+ | DS-Arena-Code | N/A | 49.5 | 63.1 |
79
+
80
+
81
+
82
+ ## 2. How to run locally
83
+
84
+ **To utilize DeepSeek-V2.5 in BF16 format for inference, 80GB*8 GPUs are required.**
85
+ ### Inference with Huggingface's Transformers
86
+ You can directly employ [Huggingface's Transformers](https://github.com/huggingface/transformers) for model inference.
87
+
88
+ ```python
89
+ import torch
90
+ from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
91
+
92
+ model_name = "deepseek-ai/DeepSeek-V2.5"
93
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
94
+ # `max_memory` should be set based on your devices
95
+ max_memory = {i: "75GB" for i in range(8)}
96
+ # `device_map` cannot be set to `auto`
97
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map="sequential", torch_dtype=torch.bfloat16, max_memory=max_memory, attn_implementation="eager")
98
+ model.generation_config = GenerationConfig.from_pretrained(model_name)
99
+ model.generation_config.pad_token_id = model.generation_config.eos_token_id
100
+
101
+ messages = [
102
+ {"role": "user", "content": "Write a piece of quicksort code in C++"}
103
+ ]
104
+ input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
105
+ outputs = model.generate(input_tensor.to(model.device), max_new_tokens=100)
106
+
107
+ result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True)
108
+ print(result)
109
+ ```
110
+
111
+ The complete chat template can be found within `tokenizer_config.json` located in the huggingface model repository.
112
+
113
+ **Note: The chat template has been updated compared to the previous DeepSeek-V2-Chat version.**
114
+
115
+ An example of chat template is as belows:
116
+
117
+ ```bash
118
+ <|begin▁of▁sentence|><|User|>{user_message_1}<|Assistant|>{assistant_message_1}<|end▁of▁sentence|><|User|>{user_message_2}<|Assistant|>
119
+ ```
120
+
121
+ You can also add an optional system message:
122
+
123
+ ```bash
124
+ <|begin▁of▁sentence|>{system_message}<|User|>{user_message_1}<|Assistant|>{assistant_message_1}<|end▁of▁sentence|><|User|>{user_message_2}<|Assistant|>
125
+ ```
126
+
127
+ ### Inference with vLLM (recommended)
128
+ To utilize [vLLM](https://github.com/vllm-project/vllm) for model inference, please merge this Pull Request into your vLLM codebase: https://github.com/vllm-project/vllm/pull/4650.
129
+
130
+ ```python
131
+ from transformers import AutoTokenizer
132
+ from vllm import LLM, SamplingParams
133
+
134
+ max_model_len, tp_size = 8192, 8
135
+ model_name = "deepseek-ai/DeepSeek-V2.5"
136
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
137
+ llm = LLM(model=model_name, tensor_parallel_size=tp_size, max_model_len=max_model_len, trust_remote_code=True, enforce_eager=True)
138
+ sampling_params = SamplingParams(temperature=0.3, max_tokens=256, stop_token_ids=[tokenizer.eos_token_id])
139
+
140
+ messages_list = [
141
+ [{"role": "user", "content": "Who are you?"}],
142
+ [{"role": "user", "content": "Translate the following content into Chinese directly: DeepSeek-V2 adopts innovative architectures to guarantee economical training and efficient inference."}],
143
+ [{"role": "user", "content": "Write a piece of quicksort code in C++."}],
144
+ ]
145
+
146
+ prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True) for messages in messages_list]
147
+
148
+ outputs = llm.generate(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params)
149
+
150
+ generated_text = [output.outputs[0].text for output in outputs]
151
+ print(generated_text)
152
+ ```
153
+
154
+ ### Function calling
155
+
156
+ Function calling allows the model to call external tools to enhance its capabilities.
157
+
158
+ Here is an example:
159
+
160
+ ```python
161
+ # Assume that `model` and `tokenizer` are loaded
162
+ model.generation_config = GenerationConfig(do_sample=False, max_new_tokens=128, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id)
163
+
164
+ tool_system_prompt = """You are a helpful Assistant.
165
+
166
+ ## Tools
167
+
168
+ ### Function
169
+
170
+ You have the following functions available:
171
+
172
+ - `get_current_weather`:
173
+ ```json
174
+ {
175
+ "name": "get_current_weather",
176
+ "description": "Get the current weather in a given location",
177
+ "parameters": {
178
+ "type": "object",
179
+ "properties": {
180
+ "location": {
181
+ "type": "string",
182
+ "description": "The city and state, e.g. San Francisco, CA"
183
+ },
184
+ "unit": {
185
+ "type": "string",
186
+ "enum": [
187
+ "celsius",
188
+ "fahrenheit"
189
+ ]
190
+ }
191
+ },
192
+ "required": [
193
+ "location"
194
+ ]
195
+ }
196
+ }
197
+ ```"""
198
+
199
+ tool_call_messages = [{"role": "system", "content": tool_system_prompt}, {"role": "user", "content": "What's the weather like in Tokyo and Paris?"}]
200
+ tool_call_inputs = tokenizer.apply_chat_template(tool_call_messages, add_generation_prompt=True, return_tensors="pt")
201
+ tool_call_outputs = model.generate(tool_call_inputs.to(model.device))
202
+ # Generated text: '<|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "Tokyo"}\n```<|tool▁call▁end|>\n<|tool▁call▁begin|>function<|tool▁sep|>get_current_weather\n```json\n{"location": "Paris"}\n```<|tool▁call▁end|><|tool▁calls▁end|><|end▁of▁sentence|>'
203
+
204
+ # Mock response of calling `get_current_weather`
205
+ tool_messages = [{"role": "tool", "content": '{"location": "Tokyo", "temperature": "10", "unit": null}'}, {"role": "tool", "content": '{"location": "Paris", "temperature": "22", "unit": null}'}]
206
+ tool_inputs = tokenizer.apply_chat_template(tool_messages, add_generation_prompt=False, return_tensors="pt")[:, 1:]
207
+ tool_inputs = torch.cat([tool_call_outputs, tool_inputs.to(model.device)], dim=1)
208
+ tool_outputs = model.generate(tool_inputs)
209
+ # Generated text: The current weather in Tokyo is 10 degrees, and in Paris, it is 22 degrees.<|end▁of▁sentence|>
210
+ ```
211
+
212
+ ### JSON output
213
+
214
+ You can use JSON Output Mode to ensure the model generates a valid JSON object. To active this mode, a special instruction should be appended to your system prompt.
215
+
216
+ ```python
217
+ # Assume that `model` and `tokenizer` are loaded
218
+ model.generation_config = GenerationConfig(do_sample=False, max_new_tokens=128, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id)
219
+
220
+ user_system_prompt = 'The user will provide some exam text. Please parse the "question" and "answer" and output them in JSON format.'
221
+ json_system_prompt = f"""{user_system_prompt}
222
+
223
+ ## Response Format
224
+
225
+ Reply with JSON object ONLY."""
226
+
227
+ json_messages = [{"role": "system", "content": json_system_prompt}, {"role": "user", "content": "Which is the highest mountain in the world? Mount Everest."}]
228
+ json_inputs = tokenizer.apply_chat_template(json_messages, add_generation_prompt=True, return_tensors="pt")
229
+ json_outpus = model.generate(json_inputs.to(model.device))
230
+ # Generated text: '```json\n{\n "question": "Which is the highest mountain in the world?",\n "answer": "Mount Everest."\n}\n```<|end▁of▁sentence|>'
231
+ ```
232
+
233
+ ### FIM completion
234
+
235
+ In FIM (Fill In the Middle) completion, you can provide a prefix and an optional suffix, and the model will complete the content in between.
236
+
237
+ ```python
238
+ # Assume that `model` and `tokenizer` are loaded
239
+ model.generation_config = GenerationConfig(do_sample=False, max_new_tokens=128, eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id)
240
+
241
+ prefix = """def quick_sort(arr):
242
+ if len(arr) <= 1:
243
+ return arr
244
+ pivot = arr[0]
245
+ left = []
246
+ right = []
247
+ """
248
+
249
+ suffix = """
250
+ if arr[i] < pivot:
251
+ left.append(arr[i])
252
+ else:
253
+ right.append(arr[i])
254
+ return quick_sort(left) + [pivot] + quick_sort(right)"""
255
+
256
+ fim_prompt = f"<|fim▁begin|>{prefix}<|fim▁hole|>{suffix}<|fim▁end|>"
257
+ fim_inputs = tokenizer(fim_prompt, add_special_tokens=True, return_tensors="pt").input_ids
258
+ fim_outputs = model.generate(fim_inputs.to(model.device))
259
+ # Generated text: " for i in range(1, len(arr)):<|end▁of▁sentence|>"
260
+ ```
261
+
262
+ ## 3. License
263
+ This code repository is licensed under the MIT License. The use of DeepSeek-V2 Base/Chat models is subject to [the Model License](LICENSE). DeepSeek-V2 series (including Base and Chat) supports commercial use.
264
+
265
+ ## 4. Citation
266
+ ```
267
+ @misc{deepseekv2,
268
+ title={DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model},
269
+ author={DeepSeek-AI},
270
+ year={2024},
271
+ eprint={2405.04434},
272
+ archivePrefix={arXiv},
273
+ primaryClass={cs.CL}
274
+ }
275
+ ```
276
+
277
+ ## 5. Contact
278
+ If you have any questions, please raise an issue or contact us at [[email protected]]([email protected]).