Add files using upload-large-folder tool
Browse files- .gitattributes +1 -0
- LICENSE +21 -0
- README.md +119 -3
- added_tokens.json +18 -0
- chat_template.jinja +1 -0
- config.json +83 -0
- generation_config.json +12 -0
- merges.txt +0 -0
- model-00001-of-00029.safetensors +3 -0
- model-00002-of-00029.safetensors +3 -0
- model-00003-of-00029.safetensors +3 -0
- model-00004-of-00029.safetensors +3 -0
- model-00005-of-00029.safetensors +3 -0
- model-00006-of-00029.safetensors +3 -0
- model-00007-of-00029.safetensors +3 -0
- model-00008-of-00029.safetensors +3 -0
- model-00009-of-00029.safetensors +3 -0
- model-00010-of-00029.safetensors +3 -0
- model-00011-of-00029.safetensors +3 -0
- model-00012-of-00029.safetensors +3 -0
- model-00013-of-00029.safetensors +3 -0
- model-00014-of-00029.safetensors +3 -0
- model-00015-of-00029.safetensors +3 -0
- model-00016-of-00029.safetensors +3 -0
- model-00017-of-00029.safetensors +3 -0
- model-00018-of-00029.safetensors +3 -0
- model-00019-of-00029.safetensors +3 -0
- model-00020-of-00029.safetensors +3 -0
- model-00021-of-00029.safetensors +3 -0
- model-00022-of-00029.safetensors +3 -0
- model-00023-of-00029.safetensors +3 -0
- model-00024-of-00029.safetensors +3 -0
- model-00025-of-00029.safetensors +3 -0
- model-00026-of-00029.safetensors +3 -0
- model-00027-of-00029.safetensors +3 -0
- model-00028-of-00029.safetensors +3 -0
- model-00029-of-00029.safetensors +3 -0
- model.safetensors.index.json +0 -0
- special_tokens_map.json +33 -0
- tokenizer.json +3 -0
- tokenizer_config.json +160 -0
- vocab.json +0 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2025 rednote-hilab
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
README.md
CHANGED
@@ -1,3 +1,119 @@
|
|
1 |
-
---
|
2 |
-
license: mit
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: mit
|
3 |
+
license_link: https://huggingface.co/rednote-hilab/dots.llm1.inst-FP8-dynamic/blob/main/LICENSE
|
4 |
+
pipeline_tag: text-generation
|
5 |
+
base_model: rednote-hilab/dots.llm1.inst
|
6 |
+
tags:
|
7 |
+
- chat
|
8 |
+
library_name: transformers
|
9 |
+
language:
|
10 |
+
- en
|
11 |
+
- zh
|
12 |
+
---
|
13 |
+
|
14 |
+
# dots1
|
15 |
+
|
16 |
+
<p align="center">
|
17 |
+
<img src="figures/new_logo2.png" width="300"/>
|
18 |
+
<p>
|
19 |
+
|
20 |
+
<p align="center">
|
21 |
+
  🤗 <a href="https://huggingface.co/rednote-hilab">Hugging Face</a>   |    📑 <a href="https://www.arxiv.org/abs/2506.05767">Paper</a>   
|
22 |
+
<br>
|
23 |
+
🖥️ <a href="https://huggingface.co/spaces/rednote-hilab/dots-demo">Demo</a>   |   💬 <a href="figures/wechat.png">WeChat (微信)</a>   |   📕 <a href="https://www.xiaohongshu.com/user/profile/683ffe42000000001d021a4c">rednote</a>  
|
24 |
+
</p>
|
25 |
+
|
26 |
+
|
27 |
+
Visit our Hugging Face (click links above), search checkpoints with names starting with `dots.llm1` or visit the [dots1 collection](https://huggingface.co/collections/rednote-hilab/dotsllm1-68246aaaaba3363374a8aa7c), and you will find all you need! Enjoy!
|
28 |
+
|
29 |
+
|
30 |
+
## News
|
31 |
+
|
32 |
+
- 2025.06.06: We released the `dots.llm1` series. Check our [report](https://github.com/rednote-hilab/dots.llm1/blob/main/dots1_tech_report.pdf) for more details!
|
33 |
+
|
34 |
+
|
35 |
+
## 1. Introduction
|
36 |
+
|
37 |
+
|
38 |
+
The `dots.llm1` model is a large-scale MoE model that activates 14B parameters out of a total of 142B parameters, delivering performance on par with state-of-the-art models.
|
39 |
+
Leveraging our meticulously crafted and efficient data processing pipeline, `dots.llm1` achieves performance comparable to Qwen2.5-72B after pretrained on high-quality corpus without synthetic data. To foster further research, we open-source intermediate training checkpoints spanning the entire training process, providing valuable insights into the learning dynamics of large language models.
|
40 |
+
|
41 |
+
|
42 |
+
<p align="center">
|
43 |
+
<img width="90%" src="./figures/performance.png">
|
44 |
+
</p>
|
45 |
+
|
46 |
+
## 2. Model Summary
|
47 |
+
|
48 |
+
**This repo contains the base and instruction-tuned `dots.llm1` model**. which has the following features:
|
49 |
+
|
50 |
+
- Type: A MoE model with 14B activated and 142B total parameters trained on high-quality corpus.
|
51 |
+
- Training Stages: Pretraining and SFT.
|
52 |
+
- Architecture: Multi-head Attention with QK-Norm in attention Layer, fine-grained MoE utilizing top-6 out of 128 routed experts, plus 2 shared experts.
|
53 |
+
- Number of Layers: 62
|
54 |
+
- Number of Attention Heads: 32
|
55 |
+
- Supported Languages: English, Chinese
|
56 |
+
- Context Length: 32,768 tokens
|
57 |
+
- License: MIT
|
58 |
+
|
59 |
+
The highlights from `dots.llm1` include:
|
60 |
+
|
61 |
+
- **Enhanced Data Processing**: We propose a scalable and fine-grained *three-stage* data processing framework designed to generate large-scale, high-quality and diverse data for pretraining.
|
62 |
+
- **No Synthetic Data during Pretraining**: High-quality non-synthetic tokens was used in base model pretraining.
|
63 |
+
- **Performance and Cost Efficiency**: `dots.llm1` is an open-source model that activates only *14B* parameters at inference, delivering both comprehensive capabilities and high computational efficiency.
|
64 |
+
- **Infrastructure**: We introduce an innovative MoE all-to-all communication and computation overlapping recipe based on interleaved 1F1B pipeline scheduling and an efficient grouped GEMM implementation to boost computational efficiency.
|
65 |
+
- **Open Accessibility to Model Dynamics**: Intermediate model checkpoints are released spanning the entire training process, facilitating future research into the learning dynamics of large language models.
|
66 |
+
|
67 |
+
## 3. dots.llm1.inst.FP8-dynamic
|
68 |
+
|
69 |
+
### Docker (vllm)
|
70 |
+
|
71 |
+
We release the quantized `dots.llm1.inst.FP8-dynamic` model, which retains approximately 98% of the original performance after quantization.
|
72 |
+
|
73 |
+
run vllm inference using docker `rednote-hilab/vllm-openai-v0.9.1`. The docker images are available on [Docker Hub](https://hub.docker.com/repository/docker/rednotehilab/dots1/tags), based on the official images.
|
74 |
+
|
75 |
+
```bash
|
76 |
+
python3 -m vllm.entrypoints.openai.api_server --model rednote-hilab/dots.llm1.inst.FP8-dynamic --tensor-parallel-size 4 --pipeline-parallel-size 1 --trust-remote-code --served-model-name dots1
|
77 |
+
```
|
78 |
+
|
79 |
+
|
80 |
+
### Inference with huggingface
|
81 |
+
|
82 |
+
We are working to merge it into Transformers ([PR #38143](https://github.com/huggingface/transformers/pull/38143)).
|
83 |
+
|
84 |
+
#### Chat Completion
|
85 |
+
|
86 |
+
```python
|
87 |
+
import torch
|
88 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
|
89 |
+
|
90 |
+
model_name = "rednote-hilab/dots.llm1.inst-FP8-dynamic"
|
91 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
92 |
+
|
93 |
+
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype="auto")
|
94 |
+
|
95 |
+
messages = [
|
96 |
+
{"role": "user", "content": "Write a piece of quicksort code in C++"}
|
97 |
+
]
|
98 |
+
input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt")
|
99 |
+
outputs = model.generate(input_tensor.to(model.device), max_new_tokens=200)
|
100 |
+
|
101 |
+
result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True)
|
102 |
+
print(result)
|
103 |
+
```
|
104 |
+
|
105 |
+
## Citation
|
106 |
+
|
107 |
+
If you find `dots.llm1` is useful or want to use in your projects, please kindly cite our paper:
|
108 |
+
|
109 |
+
```
|
110 |
+
@misc{huo2025dotsllm1technicalreport,
|
111 |
+
title={dots.llm1 Technical Report},
|
112 |
+
author={Bi Huo and Bin Tu and Cheng Qin and Da Zheng and Debing Zhang and Dongjie Zhang and En Li and Fu Guo and Jian Yao and Jie Lou and Junfeng Tian and Li Hu and Ran Zhu and Shengdong Chen and Shuo Liu and Su Guang and Te Wo and Weijun Zhang and Xiaoming Shi and Xinxin Peng and Xing Wu and Yawen Liu and Yuqiu Ji and Ze Wen and Zhenhai Liu and Zichao Li and Zilong Liao},
|
113 |
+
year={2025},
|
114 |
+
eprint={2506.05767},
|
115 |
+
archivePrefix={arXiv},
|
116 |
+
primaryClass={cs.CL},
|
117 |
+
url={https://arxiv.org/abs/2506.05767},
|
118 |
+
}
|
119 |
+
```
|
added_tokens.json
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<|endofexecution|>": 151655,
|
3 |
+
"<|endofobservation|>": 151653,
|
4 |
+
"<|endofresponse|>": 151649,
|
5 |
+
"<|endofsystem|>": 151651,
|
6 |
+
"<|endoftext|>": 151643,
|
7 |
+
"<|endofuserprompt|>": 151647,
|
8 |
+
"<|execution|>": 151654,
|
9 |
+
"<|im_end|>": 151645,
|
10 |
+
"<|im_start|>": 151644,
|
11 |
+
"<|observation|>": 151652,
|
12 |
+
"<|reject-unknown|>": 151656,
|
13 |
+
"<|response|>": 151648,
|
14 |
+
"<|sec-cot|>": 151657,
|
15 |
+
"<|sec-end-cot|>": 151658,
|
16 |
+
"<|system|>": 151650,
|
17 |
+
"<|userprompt|>": 151646
|
18 |
+
}
|
chat_template.jinja
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{% if messages[0]['role'] == 'system' %}<|system|>{{ messages[0]['content'] }}<|endofsystem|>{% set start_idx = 1 %}{% else %}<|system|>You are a helpful assistant.<|endofsystem|>{% set start_idx = 0 %}{% endif %}{% for idx in range(start_idx, messages|length) %}{% if messages[idx]['role'] == 'user' %}<|userprompt|>{{ messages[idx]['content'] }}<|endofuserprompt|>{% elif messages[idx]['role'] == 'assistant' %}<|response|>{{ messages[idx]['content'] }}<|endofresponse|>{% endif %}{% endfor %}{% if add_generation_prompt and messages[-1]['role'] == 'user' %}<|response|>{% endif %}
|
config.json
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"Dots1ForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_bias": false,
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"eos_token_id": 151645,
|
8 |
+
"first_k_dense_replace": 1,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 4096,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 10944,
|
13 |
+
"max_position_embeddings": 32768,
|
14 |
+
"max_window_layers": 62,
|
15 |
+
"model_type": "dots1",
|
16 |
+
"moe_intermediate_size": 1408,
|
17 |
+
"moe_layer_freq": 1,
|
18 |
+
"n_group": 1,
|
19 |
+
"n_routed_experts": 128,
|
20 |
+
"n_shared_experts": 2,
|
21 |
+
"norm_topk_prob": true,
|
22 |
+
"num_attention_heads": 32,
|
23 |
+
"num_experts_per_tok": 6,
|
24 |
+
"num_hidden_layers": 62,
|
25 |
+
"num_key_value_heads": 32,
|
26 |
+
"pretraining_tp": 1,
|
27 |
+
"quantization_config": {
|
28 |
+
"config_groups": {
|
29 |
+
"group_0": {
|
30 |
+
"input_activations": {
|
31 |
+
"actorder": null,
|
32 |
+
"block_structure": null,
|
33 |
+
"dynamic": true,
|
34 |
+
"group_size": null,
|
35 |
+
"num_bits": 8,
|
36 |
+
"observer": null,
|
37 |
+
"observer_kwargs": {},
|
38 |
+
"strategy": "token",
|
39 |
+
"symmetric": true,
|
40 |
+
"type": "float"
|
41 |
+
},
|
42 |
+
"output_activations": null,
|
43 |
+
"targets": [
|
44 |
+
"Linear"
|
45 |
+
],
|
46 |
+
"weights": {
|
47 |
+
"actorder": null,
|
48 |
+
"block_structure": null,
|
49 |
+
"dynamic": false,
|
50 |
+
"group_size": null,
|
51 |
+
"num_bits": 8,
|
52 |
+
"observer": "minmax",
|
53 |
+
"observer_kwargs": {},
|
54 |
+
"strategy": "channel",
|
55 |
+
"symmetric": true,
|
56 |
+
"type": "float"
|
57 |
+
}
|
58 |
+
}
|
59 |
+
},
|
60 |
+
"format": "float-quantized",
|
61 |
+
"global_compression_ratio": null,
|
62 |
+
"ignore": [
|
63 |
+
"lm_head"
|
64 |
+
],
|
65 |
+
"kv_cache_scheme": null,
|
66 |
+
"quant_method": "compressed-tensors",
|
67 |
+
"quantization_status": "compressed"
|
68 |
+
},
|
69 |
+
"rms_norm_eps": 1e-05,
|
70 |
+
"rope_scaling": null,
|
71 |
+
"rope_theta": 10000000,
|
72 |
+
"routed_scaling_factor": 2.5,
|
73 |
+
"scoring_func": "sigmoid",
|
74 |
+
"sliding_window": null,
|
75 |
+
"tie_word_embeddings": false,
|
76 |
+
"topk_group": 1,
|
77 |
+
"topk_method": "noaux_tc",
|
78 |
+
"torch_dtype": "bfloat16",
|
79 |
+
"transformers_version": "4.53.0.dev0",
|
80 |
+
"use_cache": true,
|
81 |
+
"use_sliding_window": false,
|
82 |
+
"vocab_size": 152064
|
83 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 151643,
|
4 |
+
"do_sample": true,
|
5 |
+
"eos_token_id": [
|
6 |
+
151643,
|
7 |
+
151649
|
8 |
+
],
|
9 |
+
"temperature": 0.7,
|
10 |
+
"top_p": 0.8,
|
11 |
+
"transformers_version": "4.53.0.dev0"
|
12 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
model-00001-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9ab1f1fc578ea20548e29a5f1ff6932256a72659026c6e4919fefa0eaca70621
|
3 |
+
size 4996875472
|
model-00002-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce4b176ee00241f9572c364a56af1e7fa314bd8b45d3e09952dce00036d972ac
|
3 |
+
size 4994592496
|
model-00003-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a05e21bef9f58317374a171679d3043a557adefd62fb091c8e3379b170105829
|
3 |
+
size 4994592544
|
model-00004-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9099b5df18510058851059e3b74d8583ad723c4da51123bbff22245470e728b
|
3 |
+
size 4995652432
|
model-00005-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c741564d9251a64263fe0fb30e7330d9a966699bd50c2bd260757d7d858c08b2
|
3 |
+
size 4998222344
|
model-00006-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b97849b6d3ebc0370f72c8b4d03ff7c6606e8af1db44a222ec923a10fd9c0de
|
3 |
+
size 4994604952
|
model-00007-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef12b060e0e56790fa8b31f5fd01e36097b54cd82fb70d34690a7ff4f2057a1c
|
3 |
+
size 4994594192
|
model-00008-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d12900e2778b1340b109fa44c750cb582af52b6bdc1be31dfd6828adbb330c2
|
3 |
+
size 4994594192
|
model-00009-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ddfa7d05519635cf297171ae1394fccbbd8855f80fae7be07cafb3c4aa0073d
|
3 |
+
size 4994604952
|
model-00010-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abcd9f8ff2d57475d032afd00bd86248064e660724c40a256b53624231e69421
|
3 |
+
size 4994594304
|
model-00011-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3e0422c6bae38ccf3f0b6d4f3324aca9781db2f04a87ec12897b1c1bb916467c
|
3 |
+
size 4999283072
|
model-00012-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9876344fe8335b47579a3e7ec29cf5fc5cb7b9c4f4792859c9da063a030171b9
|
3 |
+
size 4994594152
|
model-00013-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2b765c56c5d7b4aba5982dc9dc9e01f9bb5ab6cf5dd5b7811f8cbc5219b5982
|
3 |
+
size 4994594192
|
model-00014-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad594c5d811737d4c30462eec90ed886cf5620cde5cbbcaa5eec11ea6883b294
|
3 |
+
size 4994604952
|
model-00015-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b7f691deec14d78ea8fdee35f30d10928fb78e34372fb5d20df7a76180394d61
|
3 |
+
size 4994594192
|
model-00016-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa3f03a37da6525e616df47d49f9b7bc311bc4a00f670f98dd64a1a4a21184ad
|
3 |
+
size 4994594224
|
model-00017-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:832b011f298ba71f113aa515f7ecc7969bc3b1ae0730114f9bdec3b7252c26f6
|
3 |
+
size 4994605080
|
model-00018-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef09dd1f595775dd03ea6760f1e4beba5eed4bcf68bb672c14d5588e7f4d62fc
|
3 |
+
size 4999272232
|
model-00019-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1cbdad6f4d2719ce6488f472383905fabcde7748166f4f7f2091ab115129c47
|
3 |
+
size 4994604952
|
model-00020-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2946e4f4e962fd18ea2c69ed72bbbbbcd968990b8eccad63c73b368879996151
|
3 |
+
size 4994594192
|
model-00021-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:384e894a3e16bb96ea578df909338538109e2403bdfff2b09058cdba57698a70
|
3 |
+
size 4994594192
|
model-00022-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc87eda93850f44dab1ffbea4167f8d1feac577b9d1b06e3fa216e0ba20f5c4a
|
3 |
+
size 4994604952
|
model-00023-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d596d4066330d4f55260ef5b79b1c5b4bc6cfa30693364ce0ca7cc6d310f633d
|
3 |
+
size 4994594280
|
model-00024-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5bc96b7a71ac1e92a0e62f14b9d346eedeed346bf0b346f6f8e0afdd442345e
|
3 |
+
size 4999283112
|
model-00025-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90c2c4f20a32c79542384c4c2a134ad557ffd887169d26e73fe3ec301b071fb4
|
3 |
+
size 4994594136
|
model-00026-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba89ef52e3ec217842ad83bea244455bb0a8769d05ff588b357af697b10a1514
|
3 |
+
size 4994594192
|
model-00027-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bee99c440239515e9f7db4b72de0cc88349ae3c68452b8463f99bb22dcdf3f30
|
3 |
+
size 4994604952
|
model-00028-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:505152e94e861cf3207ddcbff5cf86c85e18e92b74d68c2be3c6256a089a891f
|
3 |
+
size 4994594192
|
model-00029-of-00029.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:933ecc46966f99923332dd4f2b477292300b5e6e49f86d4ec217b315f0a874ff
|
3 |
+
size 4411269184
|
model.safetensors.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<|im_start|>",
|
4 |
+
"<|im_end|>",
|
5 |
+
"<|userprompt|>",
|
6 |
+
"<|endofuserprompt|>",
|
7 |
+
"<|response|>",
|
8 |
+
"<|endofresponse|>",
|
9 |
+
"<|system|>",
|
10 |
+
"<|endofsystem|>",
|
11 |
+
"<|observation|>",
|
12 |
+
"<|endofobservation|>",
|
13 |
+
"<|execution|>",
|
14 |
+
"<|endofexecution|>",
|
15 |
+
"<|reject-unknown|>",
|
16 |
+
"<|sec-cot|>",
|
17 |
+
"<|sec-end-cot|>"
|
18 |
+
],
|
19 |
+
"eos_token": {
|
20 |
+
"content": "<|endofresponse|>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false
|
25 |
+
},
|
26 |
+
"pad_token": {
|
27 |
+
"content": "<|endofresponse|>",
|
28 |
+
"lstrip": false,
|
29 |
+
"normalized": false,
|
30 |
+
"rstrip": false,
|
31 |
+
"single_word": false
|
32 |
+
}
|
33 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe750f7b1f42fb8f760ee7ee91fa5fb3974795b3dc14c269819b83a086f5e98d
|
3 |
+
size 11420764
|
tokenizer_config.json
ADDED
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"added_tokens_decoder": {
|
4 |
+
"151643": {
|
5 |
+
"content": "<|endoftext|>",
|
6 |
+
"lstrip": false,
|
7 |
+
"normalized": false,
|
8 |
+
"rstrip": false,
|
9 |
+
"single_word": false,
|
10 |
+
"special": true
|
11 |
+
},
|
12 |
+
"151644": {
|
13 |
+
"content": "<|im_start|>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": false,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false,
|
18 |
+
"special": true
|
19 |
+
},
|
20 |
+
"151645": {
|
21 |
+
"content": "<|im_end|>",
|
22 |
+
"lstrip": false,
|
23 |
+
"normalized": false,
|
24 |
+
"rstrip": false,
|
25 |
+
"single_word": false,
|
26 |
+
"special": true
|
27 |
+
},
|
28 |
+
"151646": {
|
29 |
+
"content": "<|userprompt|>",
|
30 |
+
"lstrip": false,
|
31 |
+
"normalized": false,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false,
|
34 |
+
"special": true
|
35 |
+
},
|
36 |
+
"151647": {
|
37 |
+
"content": "<|endofuserprompt|>",
|
38 |
+
"lstrip": false,
|
39 |
+
"normalized": false,
|
40 |
+
"rstrip": false,
|
41 |
+
"single_word": false,
|
42 |
+
"special": true
|
43 |
+
},
|
44 |
+
"151648": {
|
45 |
+
"content": "<|response|>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false,
|
50 |
+
"special": true
|
51 |
+
},
|
52 |
+
"151649": {
|
53 |
+
"content": "<|endofresponse|>",
|
54 |
+
"lstrip": false,
|
55 |
+
"normalized": false,
|
56 |
+
"rstrip": false,
|
57 |
+
"single_word": false,
|
58 |
+
"special": true
|
59 |
+
},
|
60 |
+
"151650": {
|
61 |
+
"content": "<|system|>",
|
62 |
+
"lstrip": false,
|
63 |
+
"normalized": false,
|
64 |
+
"rstrip": false,
|
65 |
+
"single_word": false,
|
66 |
+
"special": true
|
67 |
+
},
|
68 |
+
"151651": {
|
69 |
+
"content": "<|endofsystem|>",
|
70 |
+
"lstrip": false,
|
71 |
+
"normalized": false,
|
72 |
+
"rstrip": false,
|
73 |
+
"single_word": false,
|
74 |
+
"special": true
|
75 |
+
},
|
76 |
+
"151652": {
|
77 |
+
"content": "<|observation|>",
|
78 |
+
"lstrip": false,
|
79 |
+
"normalized": false,
|
80 |
+
"rstrip": false,
|
81 |
+
"single_word": false,
|
82 |
+
"special": true
|
83 |
+
},
|
84 |
+
"151653": {
|
85 |
+
"content": "<|endofobservation|>",
|
86 |
+
"lstrip": false,
|
87 |
+
"normalized": false,
|
88 |
+
"rstrip": false,
|
89 |
+
"single_word": false,
|
90 |
+
"special": true
|
91 |
+
},
|
92 |
+
"151654": {
|
93 |
+
"content": "<|execution|>",
|
94 |
+
"lstrip": false,
|
95 |
+
"normalized": false,
|
96 |
+
"rstrip": false,
|
97 |
+
"single_word": false,
|
98 |
+
"special": true
|
99 |
+
},
|
100 |
+
"151655": {
|
101 |
+
"content": "<|endofexecution|>",
|
102 |
+
"lstrip": false,
|
103 |
+
"normalized": false,
|
104 |
+
"rstrip": false,
|
105 |
+
"single_word": false,
|
106 |
+
"special": true
|
107 |
+
},
|
108 |
+
"151656": {
|
109 |
+
"content": "<|reject-unknown|>",
|
110 |
+
"lstrip": false,
|
111 |
+
"normalized": false,
|
112 |
+
"rstrip": false,
|
113 |
+
"single_word": false,
|
114 |
+
"special": true
|
115 |
+
},
|
116 |
+
"151657": {
|
117 |
+
"content": "<|sec-cot|>",
|
118 |
+
"lstrip": false,
|
119 |
+
"normalized": false,
|
120 |
+
"rstrip": false,
|
121 |
+
"single_word": false,
|
122 |
+
"special": true
|
123 |
+
},
|
124 |
+
"151658": {
|
125 |
+
"content": "<|sec-end-cot|>",
|
126 |
+
"lstrip": false,
|
127 |
+
"normalized": false,
|
128 |
+
"rstrip": false,
|
129 |
+
"single_word": false,
|
130 |
+
"special": true
|
131 |
+
}
|
132 |
+
},
|
133 |
+
"additional_special_tokens": [
|
134 |
+
"<|im_start|>",
|
135 |
+
"<|im_end|>",
|
136 |
+
"<|userprompt|>",
|
137 |
+
"<|endofuserprompt|>",
|
138 |
+
"<|response|>",
|
139 |
+
"<|endofresponse|>",
|
140 |
+
"<|system|>",
|
141 |
+
"<|endofsystem|>",
|
142 |
+
"<|observation|>",
|
143 |
+
"<|endofobservation|>",
|
144 |
+
"<|execution|>",
|
145 |
+
"<|endofexecution|>",
|
146 |
+
"<|reject-unknown|>",
|
147 |
+
"<|sec-cot|>",
|
148 |
+
"<|sec-end-cot|>"
|
149 |
+
],
|
150 |
+
"bos_token": null,
|
151 |
+
"clean_up_tokenization_spaces": false,
|
152 |
+
"eos_token": "<|endofresponse|>",
|
153 |
+
"errors": "replace",
|
154 |
+
"extra_special_tokens": {},
|
155 |
+
"model_max_length": 131072,
|
156 |
+
"pad_token": "<|endofresponse|>",
|
157 |
+
"split_special_tokens": false,
|
158 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
159 |
+
"unk_token": null
|
160 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|