hchung1017
commited on
Upload folder using huggingface_hub
Browse files- LICENSE +149 -0
- README.md +207 -0
- config.json +26 -0
- generation_config.json +8 -0
- model-00001-of-00015.safetensors +3 -0
- model-00002-of-00015.safetensors +3 -0
- model-00003-of-00015.safetensors +3 -0
- model-00004-of-00015.safetensors +3 -0
- model-00005-of-00015.safetensors +3 -0
- model-00006-of-00015.safetensors +3 -0
- model-00007-of-00015.safetensors +3 -0
- model-00008-of-00015.safetensors +3 -0
- model-00009-of-00015.safetensors +3 -0
- model-00010-of-00015.safetensors +3 -0
- model-00011-of-00015.safetensors +3 -0
- model-00012-of-00015.safetensors +3 -0
- model-00013-of-00015.safetensors +3 -0
- model-00014-of-00015.safetensors +3 -0
- model-00015-of-00015.safetensors +3 -0
- model.safetensors.index.json +330 -0
- pytorch_model-00001-of-00015.bin +3 -0
- pytorch_model-00002-of-00015.bin +3 -0
- pytorch_model-00003-of-00015.bin +3 -0
- pytorch_model-00004-of-00015.bin +3 -0
- pytorch_model-00005-of-00015.bin +3 -0
- pytorch_model-00006-of-00015.bin +3 -0
- pytorch_model-00007-of-00015.bin +3 -0
- pytorch_model-00008-of-00015.bin +3 -0
- pytorch_model-00009-of-00015.bin +3 -0
- pytorch_model-00010-of-00015.bin +3 -0
- pytorch_model-00011-of-00015.bin +3 -0
- pytorch_model-00012-of-00015.bin +3 -0
- pytorch_model-00013-of-00015.bin +3 -0
- pytorch_model-00014-of-00015.bin +3 -0
- pytorch_model-00015-of-00015.bin +3 -0
- pytorch_model.bin.index.json +330 -0
- special_tokens_map.json +23 -0
- tokenizer.json +0 -0
- tokenizer_config.json +39 -0
LICENSE
ADDED
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Llama-2-Ko 7b MIT License under LLAMA 2 COMMUNITY LICENSE AGREEMENT
|
2 |
+
|
3 |
+
Copyright (c) 2023 L. Junbum (Beomi)
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
22 |
+
|
23 |
+
---
|
24 |
+
|
25 |
+
LLAMA 2 COMMUNITY LICENSE AGREEMENT
|
26 |
+
Llama 2 Version Release Date: July 18, 2023
|
27 |
+
|
28 |
+
"Agreement" means the terms and conditions for use, reproduction, distribution and
|
29 |
+
modification of the Llama Materials set forth herein.
|
30 |
+
|
31 |
+
"Documentation" means the specifications, manuals and documentation
|
32 |
+
accompanying Llama 2 distributed by Meta at ai.meta.com/resources/models-and-
|
33 |
+
libraries/llama-downloads/.
|
34 |
+
|
35 |
+
"Licensee" or "you" means you, or your employer or any other person or entity (if
|
36 |
+
you are entering into this Agreement on such person or entity's behalf), of the age
|
37 |
+
required under applicable laws, rules or regulations to provide legal consent and that
|
38 |
+
has legal authority to bind your employer or such other person or entity if you are
|
39 |
+
entering in this Agreement on their behalf.
|
40 |
+
|
41 |
+
"Llama 2" means the foundational large language models and software and
|
42 |
+
algorithms, including machine-learning model code, trained model weights,
|
43 |
+
inference-enabling code, training-enabling code, fine-tuning enabling code and other
|
44 |
+
elements of the foregoing distributed by Meta at ai.meta.com/resources/models-and-
|
45 |
+
libraries/llama-downloads/.
|
46 |
+
|
47 |
+
"Llama Materials" means, collectively, Meta's proprietary Llama 2 and
|
48 |
+
Documentation (and any portion thereof) made available under this Agreement.
|
49 |
+
|
50 |
+
"Meta" or "we" means Meta Platforms Ireland Limited (if you are located in or, if you
|
51 |
+
are an entity, your principal place of business is in the EEA or Switzerland) and Meta
|
52 |
+
Platforms, Inc. (if you are located outside of the EEA or Switzerland).
|
53 |
+
|
54 |
+
By clicking "I Accept" below or by using or distributing any portion or element of the
|
55 |
+
Llama Materials, you agree to be bound by this Agreement.
|
56 |
+
|
57 |
+
1. License Rights and Redistribution.
|
58 |
+
|
59 |
+
a. Grant of Rights. You are granted a non-exclusive, worldwide, non-
|
60 |
+
transferable and royalty-free limited license under Meta's intellectual property or
|
61 |
+
other rights owned by Meta embodied in the Llama Materials to use, reproduce,
|
62 |
+
distribute, copy, create derivative works of, and make modifications to the Llama
|
63 |
+
Materials.
|
64 |
+
|
65 |
+
b. Redistribution and Use.
|
66 |
+
|
67 |
+
i. If you distribute or make the Llama Materials, or any derivative works
|
68 |
+
thereof, available to a third party, you shall provide a copy of this Agreement to such
|
69 |
+
third party.
|
70 |
+
ii. If you receive Llama Materials, or any derivative works thereof, from
|
71 |
+
a Licensee as part of an integrated end user product, then Section 2 of this
|
72 |
+
Agreement will not apply to you.
|
73 |
+
|
74 |
+
iii. You must retain in all copies of the Llama Materials that you
|
75 |
+
distribute the following attribution notice within a "Notice" text file distributed as a
|
76 |
+
part of such copies: "Llama 2 is licensed under the LLAMA 2 Community License,
|
77 |
+
Copyright (c) Meta Platforms, Inc. All Rights Reserved."
|
78 |
+
|
79 |
+
iv. Your use of the Llama Materials must comply with applicable laws
|
80 |
+
and regulations (including trade compliance laws and regulations) and adhere to the
|
81 |
+
Acceptable Use Policy for the Llama Materials (available at
|
82 |
+
https://ai.meta.com/llama/use-policy), which is hereby incorporated by reference into
|
83 |
+
this Agreement.
|
84 |
+
|
85 |
+
v. You will not use the Llama Materials or any output or results of the
|
86 |
+
Llama Materials to improve any other large language model (excluding Llama 2 or
|
87 |
+
derivative works thereof).
|
88 |
+
|
89 |
+
2. Additional Commercial Terms. If, on the Llama 2 version release date, the
|
90 |
+
monthly active users of the products or services made available by or for Licensee,
|
91 |
+
or Licensee's affiliates, is greater than 700 million monthly active users in the
|
92 |
+
preceding calendar month, you must request a license from Meta, which Meta may
|
93 |
+
grant to you in its sole discretion, and you are not authorized to exercise any of the
|
94 |
+
rights under this Agreement unless or until Meta otherwise expressly grants you
|
95 |
+
such rights.
|
96 |
+
|
97 |
+
3. Disclaimer of Warranty. UNLESS REQUIRED BY APPLICABLE LAW, THE
|
98 |
+
LLAMA MATERIALS AND ANY OUTPUT AND RESULTS THEREFROM ARE
|
99 |
+
PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND,
|
100 |
+
EITHER EXPRESS OR IMPLIED, INCLUDING, WITHOUT LIMITATION, ANY
|
101 |
+
WARRANTIES OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY, OR
|
102 |
+
FITNESS FOR A PARTICULAR PURPOSE. YOU ARE SOLELY RESPONSIBLE
|
103 |
+
FOR DETERMINING THE APPROPRIATENESS OF USING OR REDISTRIBUTING
|
104 |
+
THE LLAMA MATERIALS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR
|
105 |
+
USE OF THE LLAMA MATERIALS AND ANY OUTPUT AND RESULTS.
|
106 |
+
|
107 |
+
4. Limitation of Liability. IN NO EVENT WILL META OR ITS AFFILIATES BE
|
108 |
+
LIABLE UNDER ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, TORT,
|
109 |
+
NEGLIGENCE, PRODUCTS LIABILITY, OR OTHERWISE, ARISING OUT OF THIS
|
110 |
+
AGREEMENT, FOR ANY LOST PROFITS OR ANY INDIRECT, SPECIAL,
|
111 |
+
CONSEQUENTIAL, INCIDENTAL, EXEMPLARY OR PUNITIVE DAMAGES, EVEN
|
112 |
+
IF META OR ITS AFFILIATES HAVE BEEN ADVISED OF THE POSSIBILITY OF
|
113 |
+
ANY OF THE FOREGOING.
|
114 |
+
|
115 |
+
5. Intellectual Property.
|
116 |
+
|
117 |
+
a. No trademark licenses are granted under this Agreement, and in
|
118 |
+
connection with the Llama Materials, neither Meta nor Licensee may use any name
|
119 |
+
or mark owned by or associated with the other or any of its affiliates, except as
|
120 |
+
required for reasonable and customary use in describing and redistributing the
|
121 |
+
Llama Materials.
|
122 |
+
|
123 |
+
b. Subject to Meta's ownership of Llama Materials and derivatives made by or
|
124 |
+
for Meta, with respect to any derivative works and modifications of the Llama
|
125 |
+
Materials that are made by you, as between you and Meta, you are and will be the
|
126 |
+
owner of such derivative works and modifications.
|
127 |
+
|
128 |
+
c. If you institute litigation or other proceedings against Meta or any entity
|
129 |
+
(including a cross-claim or counterclaim in a lawsuit) alleging that the Llama
|
130 |
+
Materials or Llama 2 outputs or results, or any portion of any of the foregoing,
|
131 |
+
constitutes infringement of intellectual property or other rights owned or licensable
|
132 |
+
by you, then any licenses granted to you under this Agreement shall terminate as of
|
133 |
+
the date such litigation or claim is filed or instituted. You will indemnify and hold
|
134 |
+
harmless Meta from and against any claim by any third party arising out of or related
|
135 |
+
to your use or distribution of the Llama Materials.
|
136 |
+
|
137 |
+
6. Term and Termination. The term of this Agreement will commence upon your
|
138 |
+
acceptance of this Agreement or access to the Llama Materials and will continue in
|
139 |
+
full force and effect until terminated in accordance with the terms and conditions
|
140 |
+
herein. Meta may terminate this Agreement if you are in breach of any term or
|
141 |
+
condition of this Agreement. Upon termination of this Agreement, you shall delete
|
142 |
+
and cease use of the Llama Materials. Sections 3, 4 and 7 shall survive the
|
143 |
+
termination of this Agreement.
|
144 |
+
|
145 |
+
7. Governing Law and Jurisdiction. This Agreement will be governed and
|
146 |
+
construed under the laws of the State of California without regard to choice of law
|
147 |
+
principles, and the UN Convention on Contracts for the International Sale of Goods
|
148 |
+
does not apply to this Agreement. The courts of California shall have exclusive
|
149 |
+
jurisdiction of any dispute arising out of this Agreement.
|
README.md
ADDED
@@ -0,0 +1,207 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
- ko
|
5 |
+
pipeline_tag: text-generation
|
6 |
+
inference: false
|
7 |
+
tags:
|
8 |
+
- facebook
|
9 |
+
- meta
|
10 |
+
- pytorch
|
11 |
+
- llama
|
12 |
+
- llama-2
|
13 |
+
- kollama
|
14 |
+
- llama-2-ko
|
15 |
+
---
|
16 |
+
|
17 |
+
**Update Log**
|
18 |
+
|
19 |
+
- 2023.12.27
|
20 |
+
- New Model is here! Trained with only open-accessible Korean text corpus: https://huggingface.co/beomi/open-llama-2-ko-7b
|
21 |
+
- 2023.10.19
|
22 |
+
- Fix Tokenizer bug(space not applied when decoding) after `transforemrs>=4.34.0`
|
23 |
+
|
24 |
+
|
25 |
+
# **Llama-2-Ko** 🦙🇰🇷
|
26 |
+
|
27 |
+
Llama-2-Ko serves as an advanced iteration of Llama 2, benefiting from an expanded vocabulary and the inclusion of a Korean corpus in its further pretraining. Just like its predecessor, Llama-2-Ko operates within the broad range of generative text models that stretch from 7 billion to 70 billion parameters. This repository focuses on the 7B pretrained version, which is tailored to fit the Hugging Face Transformers format. For access to the other models, feel free to consult the index provided below.
|
28 |
+
|
29 |
+
## Model Details
|
30 |
+
|
31 |
+
**Model Developers** Junbum Lee (Beomi)
|
32 |
+
|
33 |
+
**Variations** Llama-2-Ko will come in a range of parameter sizes — 7B, 13B, and 70B — as well as pretrained and fine-tuned variations.
|
34 |
+
|
35 |
+
**Input** Models input text only.
|
36 |
+
|
37 |
+
**Output** Models generate text only.
|
38 |
+
|
39 |
+
**Model Architecture**
|
40 |
+
|
41 |
+
Llama-2-Ko is an auto-regressive language model that uses an optimized transformer architecture based on Llama-2.
|
42 |
+
|
43 |
+
||Training Data|Params|Content Length|GQA|Tokens|LR|
|
44 |
+
|---|---|---|---|---|---|---|
|
45 |
+
|Llama 2|*A new mix of Korean online data*|7B|4k|✗|>40B*|1e<sup>-5</sup>|
|
46 |
+
*Plan to train upto 200B tokens
|
47 |
+
|
48 |
+
**Vocab Expansion**
|
49 |
+
|
50 |
+
| Model Name | Vocabulary Size | Description |
|
51 |
+
| --- | --- | --- |
|
52 |
+
| Original Llama-2 | 32000 | Sentencepiece BPE |
|
53 |
+
| **Expanded Llama-2-Ko** | 46336 | Sentencepiece BPE. Added Korean vocab and merges |
|
54 |
+
|
55 |
+
**Tokenizing "안녕하세요, 오늘은 날씨가 좋네요."**
|
56 |
+
|
57 |
+
| Model | Tokens |
|
58 |
+
| --- | --- |
|
59 |
+
| Llama-2 | `['▁', '안', '<0xEB>', '<0x85>', '<0x95>', '하', '세', '요', ',', '▁', '오', '<0xEB>', '<0x8A>', '<0x98>', '은', '▁', '<0xEB>', '<0x82>', '<0xA0>', '씨', '가', '▁', '<0xEC>', '<0xA2>', '<0x8B>', '<0xEB>', '<0x84>', '<0xA4>', '요']` |
|
60 |
+
| Llama-2-Ko | `['▁안녕', '하세요', ',', '▁오늘은', '▁날', '씨가', '▁좋네요']` |
|
61 |
+
|
62 |
+
**Tokenizing "Llama 2: Open Foundation and Fine-Tuned Chat Models"**
|
63 |
+
|
64 |
+
| Model | Tokens |
|
65 |
+
| --- | --- |
|
66 |
+
| Llama-2 | `['▁L', 'l', 'ama', '▁', '2', ':', '▁Open', '▁Foundation', '▁and', '▁Fine', '-', 'T', 'un', 'ed', '▁Ch', 'at', '▁Mod', 'els']` |
|
67 |
+
| Llama-2-Ko | `['▁L', 'l', 'ama', '▁', '2', ':', '▁Open', '▁Foundation', '▁and', '▁Fine', '-', 'T', 'un', 'ed', '▁Ch', 'at', '▁Mod', 'els']` |
|
68 |
+
|
69 |
+
# **Model Benchmark**
|
70 |
+
|
71 |
+
## LM Eval Harness - Korean (polyglot branch)
|
72 |
+
|
73 |
+
- Used EleutherAI's lm-evaluation-harness https://github.com/EleutherAI/lm-evaluation-harness/tree/polyglot
|
74 |
+
|
75 |
+
### NSMC (Acc) - 50000 full test
|
76 |
+
|
77 |
+
TBD
|
78 |
+
|
79 |
+
### COPA (F1)
|
80 |
+
|
81 |
+
<img src=https://user-images.githubusercontent.com/11323660/255575809-c037bc6e-0566-436a-a6c1-2329ac92187a.png style="max-width: 700px; width: 100%" />
|
82 |
+
|
83 |
+
| Model | 0-shot | 5-shot | 10-shot | 50-shot |
|
84 |
+
| --- | --- | --- | --- | --- |
|
85 |
+
| https://huggingface.co/skt/ko-gpt-trinity-1.2B-v0.5 | 0.6696 | 0.6477 | 0.6419 | 0.6514 |
|
86 |
+
| https://huggingface.co/kakaobrain/kogpt | 0.7345 | 0.7287 | 0.7277 | 0.7479 |
|
87 |
+
| https://huggingface.co/facebook/xglm-7.5B | 0.6723 | 0.6731 | 0.6769 | 0.7119 |
|
88 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-1.3b | 0.7196 | 0.7193 | 0.7204 | 0.7206 |
|
89 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-3.8b | 0.7595 | 0.7608 | 0.7638 | 0.7788 |
|
90 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-5.8b | 0.7745 | 0.7676 | 0.7775 | 0.7887 |
|
91 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-12.8b | 0.7937 | 0.8108 | 0.8037 | 0.8369 |
|
92 |
+
| Llama-2 Original 7B* | 0.562033 | 0.575982 | 0.576216 | 0.595532 |
|
93 |
+
| Llama-2-Ko-7b 20B (10k) | 0.738780 | 0.762639 | 0.780761 | 0.797863 |
|
94 |
+
| Llama-2-Ko-7b 40B (20k) | 0.743630 | 0.792716 | 0.803746 | 0.825944 |
|
95 |
+
*Llama-2 Original 7B used https://huggingface.co/meta-llama/Llama-2-7b-hf (w/o tokenizer updated)
|
96 |
+
|
97 |
+
### HellaSwag (F1)
|
98 |
+
|
99 |
+
<img src=https://user-images.githubusercontent.com/11323660/255576090-a2bfc1ae-d117-44b7-9f7b-262e41179ec1.png style="max-width: 700px; width: 100%" />
|
100 |
+
|
101 |
+
| Model | 0-shot | 5-shot | 10-shot | 50-shot |
|
102 |
+
| --- | --- | --- | --- | --- |
|
103 |
+
| https://huggingface.co/skt/ko-gpt-trinity-1.2B-v0.5 | 0.5243 | 0.5272 | 0.5166 | 0.5352 |
|
104 |
+
| https://huggingface.co/kakaobrain/kogpt | 0.5590 | 0.5833 | 0.5828 | 0.5907 |
|
105 |
+
| https://huggingface.co/facebook/xglm-7.5B | 0.5665 | 0.5689 | 0.5565 | 0.5622 |
|
106 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-1.3b | 0.5247 | 0.5260 | 0.5278 | 0.5427 |
|
107 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-3.8b | 0.5707 | 0.5830 | 0.5670 | 0.5787 |
|
108 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-5.8b | 0.5976 | 0.5998 | 0.5979 | 0.6208 |
|
109 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-12.8b | 0.5954 | 0.6306 | 0.6098 | 0.6118 |
|
110 |
+
| Llama-2 Original 7B* | 0.415390 | 0.431382 | 0.421342 | 0.442003 |
|
111 |
+
| Llama-2-Ko-7b 20B (10k) | 0.451757 | 0.466751 | 0.472607 | 0.482776 |
|
112 |
+
| Llama-2-Ko-7b 40B (20k) | 0.456246 | 0.465665 | 0.469810 | 0.477374 |
|
113 |
+
*Llama-2 Original 7B used https://huggingface.co/meta-llama/Llama-2-7b-hf (w/o tokenizer updated)
|
114 |
+
|
115 |
+
### BoolQ (F1)
|
116 |
+
|
117 |
+
<img src=https://user-images.githubusercontent.com/11323660/255576343-5d847a6f-3b6a-41a7-af37-0f11940a5ea4.png style="max-width: 700px; width: 100%" />
|
118 |
+
|
119 |
+
| Model | 0-shot | 5-shot | 10-shot | 50-shot |
|
120 |
+
| --- | --- | --- | --- | --- |
|
121 |
+
| https://huggingface.co/skt/ko-gpt-trinity-1.2B-v0.5 | 0.3356 | 0.4014 | 0.3640 | 0.3560 |
|
122 |
+
| https://huggingface.co/kakaobrain/kogpt | 0.4514 | 0.5981 | 0.5499 | 0.5202 |
|
123 |
+
| https://huggingface.co/facebook/xglm-7.5B | 0.4464 | 0.3324 | 0.3324 | 0.3324 |
|
124 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-1.3b | 0.3552 | 0.4751 | 0.4109 | 0.4038 |
|
125 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-3.8b | 0.4320 | 0.5263 | 0.4930 | 0.4038 |
|
126 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-5.8b | 0.4356 | 0.5698 | 0.5187 | 0.5236 |
|
127 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-12.8b | 0.4818 | 0.6041 | 0.6289 | 0.6448 |
|
128 |
+
| Llama-2 Original 7B* | 0.352050 | 0.563238 | 0.474788 | 0.419222 |
|
129 |
+
| Llama-2-Ko-7b 20B (10k) | 0.360656 | 0.679743 | 0.680109 | 0.662152 |
|
130 |
+
| Llama-2-Ko-7b 40B (20k) | 0.578640 | 0.697747 | 0.708358 | 0.714423 |
|
131 |
+
*Llama-2 Original 7B used https://huggingface.co/meta-llama/Llama-2-7b-hf (w/o tokenizer updated)
|
132 |
+
|
133 |
+
### SentiNeg (F1)
|
134 |
+
|
135 |
+
<img src=https://user-images.githubusercontent.com/11323660/255576572-b005a81d-fa4d-4709-b48a-f0fe4eed17a3.png style="max-width: 700px; width: 100%" />
|
136 |
+
|
137 |
+
| Model | 0-shot | 5-shot | 10-shot | 50-shot |
|
138 |
+
| --- | --- | --- | --- | --- |
|
139 |
+
| https://huggingface.co/skt/ko-gpt-trinity-1.2B-v0.5 | 0.6065 | 0.6878 | 0.7280 | 0.8413 |
|
140 |
+
| https://huggingface.co/kakaobrain/kogpt | 0.3747 | 0.8942 | 0.9294 | 0.9698 |
|
141 |
+
| https://huggingface.co/facebook/xglm-7.5B | 0.3578 | 0.4471 | 0.3964 | 0.5271 |
|
142 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-1.3b | 0.6790 | 0.6257 | 0.5514 | 0.7851 |
|
143 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-3.8b | 0.4858 | 0.7950 | 0.7320 | 0.7851 |
|
144 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-5.8b | 0.3394 | 0.8841 | 0.8808 | 0.9521 |
|
145 |
+
| https://huggingface.co/EleutherAI/polyglot-ko-12.8b | 0.9117 | 0.9015 | 0.9345 | 0.9723 |
|
146 |
+
| Llama-2 Original 7B* | 0.347502 | 0.529124 | 0.480641 | 0.788457 |
|
147 |
+
| Llama-2-Ko-7b 20B (10k) | 0.485546 | 0.829503 | 0.871141 | 0.851253 |
|
148 |
+
| Llama-2-Ko-7b 40B (20k) | 0.459447 | 0.761079 | 0.727611 | 0.936988 |
|
149 |
+
*Llama-2 Original 7B used https://huggingface.co/meta-llama/Llama-2-7b-hf (w/o tokenizer updated)
|
150 |
+
|
151 |
+
|
152 |
+
## Note for oobabooga/text-generation-webui
|
153 |
+
|
154 |
+
Remove `ValueError` at `load_tokenizer` function(line 109 or near), in `modules/models.py`.
|
155 |
+
|
156 |
+
```python
|
157 |
+
diff --git a/modules/models.py b/modules/models.py
|
158 |
+
index 232d5fa..de5b7a0 100644
|
159 |
+
--- a/modules/models.py
|
160 |
+
+++ b/modules/models.py
|
161 |
+
@@ -106,7 +106,7 @@ def load_tokenizer(model_name, model):
|
162 |
+
trust_remote_code=shared.args.trust_remote_code,
|
163 |
+
use_fast=False
|
164 |
+
)
|
165 |
+
- except ValueError:
|
166 |
+
+ except:
|
167 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
168 |
+
path_to_model,
|
169 |
+
trust_remote_code=shared.args.trust_remote_code,
|
170 |
+
```
|
171 |
+
|
172 |
+
Since Llama-2-Ko uses FastTokenizer provided by HF tokenizers NOT sentencepiece package,
|
173 |
+
it is required to use `use_fast=True` option when initialize tokenizer.
|
174 |
+
|
175 |
+
Apple Sillicon does not support BF16 computing, use CPU instead. (BF16 is supported when using NVIDIA GPU)
|
176 |
+
|
177 |
+
## Citation
|
178 |
+
|
179 |
+
```
|
180 |
+
@misc {l._junbum_2023,
|
181 |
+
author = { {L. Junbum} },
|
182 |
+
title = { llama-2-ko-7b (Revision 4a9993e) },
|
183 |
+
year = 2023,
|
184 |
+
url = { https://huggingface.co/beomi/llama-2-ko-7b },
|
185 |
+
doi = { 10.57967/hf/1098 },
|
186 |
+
publisher = { Hugging Face }
|
187 |
+
}
|
188 |
+
```
|
189 |
+
|
190 |
+
## Acknowledgement
|
191 |
+
|
192 |
+
The training is supported by [TPU Research Cloud](https://sites.research.google/trc/) program.
|
193 |
+
|
194 |
+
|
195 |
+
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
|
196 |
+
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_beomi__llama-2-ko-7b)
|
197 |
+
|
198 |
+
| Metric | Value |
|
199 |
+
|-----------------------|---------------------------|
|
200 |
+
| Avg. | 39.43 |
|
201 |
+
| ARC (25-shot) | 48.46 |
|
202 |
+
| HellaSwag (10-shot) | 75.28 |
|
203 |
+
| MMLU (5-shot) | 39.56 |
|
204 |
+
| TruthfulQA (0-shot) | 34.49 |
|
205 |
+
| Winogrande (5-shot) | 72.14 |
|
206 |
+
| GSM8K (5-shot) | 1.97 |
|
207 |
+
| DROP (3-shot) | 4.1 |
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"LlamaForCausalLM"
|
4 |
+
],
|
5 |
+
"bos_token_id": 1,
|
6 |
+
"eos_token_id": 2,
|
7 |
+
"hidden_act": "silu",
|
8 |
+
"hidden_size": 4096,
|
9 |
+
"initializer_range": 0.02,
|
10 |
+
"intermediate_size": 11008,
|
11 |
+
"max_length": 4096,
|
12 |
+
"max_position_embeddings": 2048,
|
13 |
+
"model_type": "llama",
|
14 |
+
"num_attention_heads": 32,
|
15 |
+
"num_hidden_layers": 32,
|
16 |
+
"num_key_value_heads": 32,
|
17 |
+
"pad_token_id": 0,
|
18 |
+
"pretraining_tp": 1,
|
19 |
+
"rms_norm_eps": 1e-05,
|
20 |
+
"rope_scaling": null,
|
21 |
+
"tie_word_embeddings": false,
|
22 |
+
"torch_dtype": "bfloat16",
|
23 |
+
"transformers_version": "4.28.0.dev0",
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 46336
|
26 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 1,
|
4 |
+
"eos_token_id": 2,
|
5 |
+
"max_length": 4096,
|
6 |
+
"pad_token_id": 0,
|
7 |
+
"transformers_version": "4.28.0.dev0"
|
8 |
+
}
|
model-00001-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ff9c36f7fa88101794c114772b44c84093eea0679ccd6bf5afc1a0eaee63a721
|
3 |
+
size 918571296
|
model-00002-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f61295a25767eb80c47fe16e36d1b249c4d2b0cc5968c84e42c3b605bc07a9a
|
3 |
+
size 989891520
|
model-00003-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6309c59c025ece6ad4b127ec2f5a96f31df46adc660ff8ff8907ea98a7bff3f
|
3 |
+
size 966839576
|
model-00004-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f23231d7c88bc3130694787bc410de0c6ed50bee74e9ef8427ea4a22192b9372
|
3 |
+
size 966823328
|
model-00005-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b033c31c166cab4254a0beac00e45b8cc3a81b0464a30fd673f897fddc1c89a
|
3 |
+
size 989908144
|
model-00006-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:114f3151eece4c44c5085c8feab6a04fc69b5094dc794540945b8d97ab8afb65
|
3 |
+
size 943754792
|
model-00007-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ceaedeb4b7ece45ba4185b30d1ccee709ea8132fe27fd3fb2585a1f5ff9decf7
|
3 |
+
size 989891544
|
model-00008-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c6111b434eac97233c81224fe712fbb74ee01b46395e4bf974f636730f1dbc51
|
3 |
+
size 966839600
|
model-00009-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f6986fbe739e40bcf83ce679c687b98aa9dce2b0621604e70e80f76ad5e13ada
|
3 |
+
size 966823352
|
model-00010-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:824354aba3909363d96c7958c4d1ef2d46cfd0150ad7dbf4a12ae7e6388dc8ab
|
3 |
+
size 989908160
|
model-00011-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c1e84c1c922beb3a14a987d67a281490f33aebf7084cd2532425f717486ed1c6
|
3 |
+
size 943754792
|
model-00012-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89b127dcb3c85427a3c9224224c60f344d45eacdb729e4352a42224bdbed76ba
|
3 |
+
size 989891544
|
model-00013-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dc7f7c52371bacb5d92a2ca5bc16ea10c8306a8e4d673b0385e8cb879d9e56f
|
3 |
+
size 966839600
|
model-00014-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ba3c4093ef0648818030617103646e294eff74d181433225ce51aef304e9877
|
3 |
+
size 742435432
|
model-00015-of-00015.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bdd5a478fcb8941874cd27a76eb4ddaa260bcfb15060aa1996868a099292100
|
3 |
+
size 379584640
|
model.safetensors.index.json
ADDED
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"total_size": 13711720448
|
4 |
+
},
|
5 |
+
"weight_map": {
|
6 |
+
"lm_head.weight": "model-00015-of-00015.safetensors",
|
7 |
+
"model.embed_tokens.weight": "model-00001-of-00015.safetensors",
|
8 |
+
"model.layers.0.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
9 |
+
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00015.safetensors",
|
10 |
+
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00015.safetensors",
|
11 |
+
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00015.safetensors",
|
12 |
+
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
13 |
+
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
|
14 |
+
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
|
15 |
+
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
|
16 |
+
"model.layers.0.self_attn.rotary_emb.inv_freq": "model-00001-of-00015.safetensors",
|
17 |
+
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
|
18 |
+
"model.layers.1.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
19 |
+
"model.layers.1.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
|
20 |
+
"model.layers.1.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
|
21 |
+
"model.layers.1.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
|
22 |
+
"model.layers.1.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
23 |
+
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00015.safetensors",
|
24 |
+
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00015.safetensors",
|
25 |
+
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00015.safetensors",
|
26 |
+
"model.layers.1.self_attn.rotary_emb.inv_freq": "model-00001-of-00015.safetensors",
|
27 |
+
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00015.safetensors",
|
28 |
+
"model.layers.10.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
29 |
+
"model.layers.10.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
|
30 |
+
"model.layers.10.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
|
31 |
+
"model.layers.10.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
32 |
+
"model.layers.10.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
33 |
+
"model.layers.10.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
|
34 |
+
"model.layers.10.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
|
35 |
+
"model.layers.10.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
|
36 |
+
"model.layers.10.self_attn.rotary_emb.inv_freq": "model-00005-of-00015.safetensors",
|
37 |
+
"model.layers.10.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
|
38 |
+
"model.layers.11.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
39 |
+
"model.layers.11.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
|
40 |
+
"model.layers.11.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
|
41 |
+
"model.layers.11.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
|
42 |
+
"model.layers.11.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
43 |
+
"model.layers.11.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
44 |
+
"model.layers.11.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
45 |
+
"model.layers.11.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
46 |
+
"model.layers.11.self_attn.rotary_emb.inv_freq": "model-00006-of-00015.safetensors",
|
47 |
+
"model.layers.11.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
48 |
+
"model.layers.12.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
49 |
+
"model.layers.12.mlp.down_proj.weight": "model-00006-of-00015.safetensors",
|
50 |
+
"model.layers.12.mlp.gate_proj.weight": "model-00006-of-00015.safetensors",
|
51 |
+
"model.layers.12.mlp.up_proj.weight": "model-00006-of-00015.safetensors",
|
52 |
+
"model.layers.12.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
53 |
+
"model.layers.12.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
54 |
+
"model.layers.12.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
55 |
+
"model.layers.12.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
56 |
+
"model.layers.12.self_attn.rotary_emb.inv_freq": "model-00006-of-00015.safetensors",
|
57 |
+
"model.layers.12.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
58 |
+
"model.layers.13.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
59 |
+
"model.layers.13.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
|
60 |
+
"model.layers.13.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
|
61 |
+
"model.layers.13.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
|
62 |
+
"model.layers.13.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
63 |
+
"model.layers.13.self_attn.k_proj.weight": "model-00006-of-00015.safetensors",
|
64 |
+
"model.layers.13.self_attn.o_proj.weight": "model-00006-of-00015.safetensors",
|
65 |
+
"model.layers.13.self_attn.q_proj.weight": "model-00006-of-00015.safetensors",
|
66 |
+
"model.layers.13.self_attn.rotary_emb.inv_freq": "model-00006-of-00015.safetensors",
|
67 |
+
"model.layers.13.self_attn.v_proj.weight": "model-00006-of-00015.safetensors",
|
68 |
+
"model.layers.14.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
69 |
+
"model.layers.14.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
|
70 |
+
"model.layers.14.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
|
71 |
+
"model.layers.14.mlp.up_proj.weight": "model-00007-of-00015.safetensors",
|
72 |
+
"model.layers.14.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
73 |
+
"model.layers.14.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
|
74 |
+
"model.layers.14.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
|
75 |
+
"model.layers.14.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
|
76 |
+
"model.layers.14.self_attn.rotary_emb.inv_freq": "model-00007-of-00015.safetensors",
|
77 |
+
"model.layers.14.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
|
78 |
+
"model.layers.15.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
79 |
+
"model.layers.15.mlp.down_proj.weight": "model-00007-of-00015.safetensors",
|
80 |
+
"model.layers.15.mlp.gate_proj.weight": "model-00007-of-00015.safetensors",
|
81 |
+
"model.layers.15.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
82 |
+
"model.layers.15.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
83 |
+
"model.layers.15.self_attn.k_proj.weight": "model-00007-of-00015.safetensors",
|
84 |
+
"model.layers.15.self_attn.o_proj.weight": "model-00007-of-00015.safetensors",
|
85 |
+
"model.layers.15.self_attn.q_proj.weight": "model-00007-of-00015.safetensors",
|
86 |
+
"model.layers.15.self_attn.rotary_emb.inv_freq": "model-00007-of-00015.safetensors",
|
87 |
+
"model.layers.15.self_attn.v_proj.weight": "model-00007-of-00015.safetensors",
|
88 |
+
"model.layers.16.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
89 |
+
"model.layers.16.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
|
90 |
+
"model.layers.16.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
|
91 |
+
"model.layers.16.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
92 |
+
"model.layers.16.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
93 |
+
"model.layers.16.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
|
94 |
+
"model.layers.16.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
|
95 |
+
"model.layers.16.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
|
96 |
+
"model.layers.16.self_attn.rotary_emb.inv_freq": "model-00008-of-00015.safetensors",
|
97 |
+
"model.layers.16.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
|
98 |
+
"model.layers.17.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
99 |
+
"model.layers.17.mlp.down_proj.weight": "model-00008-of-00015.safetensors",
|
100 |
+
"model.layers.17.mlp.gate_proj.weight": "model-00008-of-00015.safetensors",
|
101 |
+
"model.layers.17.mlp.up_proj.weight": "model-00008-of-00015.safetensors",
|
102 |
+
"model.layers.17.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
103 |
+
"model.layers.17.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
|
104 |
+
"model.layers.17.self_attn.o_proj.weight": "model-00008-of-00015.safetensors",
|
105 |
+
"model.layers.17.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
|
106 |
+
"model.layers.17.self_attn.rotary_emb.inv_freq": "model-00008-of-00015.safetensors",
|
107 |
+
"model.layers.17.self_attn.v_proj.weight": "model-00008-of-00015.safetensors",
|
108 |
+
"model.layers.18.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
109 |
+
"model.layers.18.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
|
110 |
+
"model.layers.18.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
|
111 |
+
"model.layers.18.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
|
112 |
+
"model.layers.18.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
113 |
+
"model.layers.18.self_attn.k_proj.weight": "model-00008-of-00015.safetensors",
|
114 |
+
"model.layers.18.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
115 |
+
"model.layers.18.self_attn.q_proj.weight": "model-00008-of-00015.safetensors",
|
116 |
+
"model.layers.18.self_attn.rotary_emb.inv_freq": "model-00009-of-00015.safetensors",
|
117 |
+
"model.layers.18.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
118 |
+
"model.layers.19.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
119 |
+
"model.layers.19.mlp.down_proj.weight": "model-00009-of-00015.safetensors",
|
120 |
+
"model.layers.19.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
|
121 |
+
"model.layers.19.mlp.up_proj.weight": "model-00009-of-00015.safetensors",
|
122 |
+
"model.layers.19.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
123 |
+
"model.layers.19.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
|
124 |
+
"model.layers.19.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
125 |
+
"model.layers.19.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
|
126 |
+
"model.layers.19.self_attn.rotary_emb.inv_freq": "model-00009-of-00015.safetensors",
|
127 |
+
"model.layers.19.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
128 |
+
"model.layers.2.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
129 |
+
"model.layers.2.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
|
130 |
+
"model.layers.2.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
|
131 |
+
"model.layers.2.mlp.up_proj.weight": "model-00002-of-00015.safetensors",
|
132 |
+
"model.layers.2.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
133 |
+
"model.layers.2.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
|
134 |
+
"model.layers.2.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
|
135 |
+
"model.layers.2.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
|
136 |
+
"model.layers.2.self_attn.rotary_emb.inv_freq": "model-00002-of-00015.safetensors",
|
137 |
+
"model.layers.2.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
|
138 |
+
"model.layers.20.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
139 |
+
"model.layers.20.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
|
140 |
+
"model.layers.20.mlp.gate_proj.weight": "model-00009-of-00015.safetensors",
|
141 |
+
"model.layers.20.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
|
142 |
+
"model.layers.20.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
143 |
+
"model.layers.20.self_attn.k_proj.weight": "model-00009-of-00015.safetensors",
|
144 |
+
"model.layers.20.self_attn.o_proj.weight": "model-00009-of-00015.safetensors",
|
145 |
+
"model.layers.20.self_attn.q_proj.weight": "model-00009-of-00015.safetensors",
|
146 |
+
"model.layers.20.self_attn.rotary_emb.inv_freq": "model-00009-of-00015.safetensors",
|
147 |
+
"model.layers.20.self_attn.v_proj.weight": "model-00009-of-00015.safetensors",
|
148 |
+
"model.layers.21.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
149 |
+
"model.layers.21.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
|
150 |
+
"model.layers.21.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
|
151 |
+
"model.layers.21.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
|
152 |
+
"model.layers.21.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
153 |
+
"model.layers.21.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
|
154 |
+
"model.layers.21.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
|
155 |
+
"model.layers.21.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
|
156 |
+
"model.layers.21.self_attn.rotary_emb.inv_freq": "model-00010-of-00015.safetensors",
|
157 |
+
"model.layers.21.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
|
158 |
+
"model.layers.22.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
159 |
+
"model.layers.22.mlp.down_proj.weight": "model-00010-of-00015.safetensors",
|
160 |
+
"model.layers.22.mlp.gate_proj.weight": "model-00010-of-00015.safetensors",
|
161 |
+
"model.layers.22.mlp.up_proj.weight": "model-00010-of-00015.safetensors",
|
162 |
+
"model.layers.22.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
163 |
+
"model.layers.22.self_attn.k_proj.weight": "model-00010-of-00015.safetensors",
|
164 |
+
"model.layers.22.self_attn.o_proj.weight": "model-00010-of-00015.safetensors",
|
165 |
+
"model.layers.22.self_attn.q_proj.weight": "model-00010-of-00015.safetensors",
|
166 |
+
"model.layers.22.self_attn.rotary_emb.inv_freq": "model-00010-of-00015.safetensors",
|
167 |
+
"model.layers.22.self_attn.v_proj.weight": "model-00010-of-00015.safetensors",
|
168 |
+
"model.layers.23.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
169 |
+
"model.layers.23.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
|
170 |
+
"model.layers.23.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
|
171 |
+
"model.layers.23.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
|
172 |
+
"model.layers.23.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
173 |
+
"model.layers.23.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
|
174 |
+
"model.layers.23.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
|
175 |
+
"model.layers.23.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
|
176 |
+
"model.layers.23.self_attn.rotary_emb.inv_freq": "model-00011-of-00015.safetensors",
|
177 |
+
"model.layers.23.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
|
178 |
+
"model.layers.24.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
179 |
+
"model.layers.24.mlp.down_proj.weight": "model-00011-of-00015.safetensors",
|
180 |
+
"model.layers.24.mlp.gate_proj.weight": "model-00011-of-00015.safetensors",
|
181 |
+
"model.layers.24.mlp.up_proj.weight": "model-00011-of-00015.safetensors",
|
182 |
+
"model.layers.24.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
183 |
+
"model.layers.24.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
|
184 |
+
"model.layers.24.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
|
185 |
+
"model.layers.24.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
|
186 |
+
"model.layers.24.self_attn.rotary_emb.inv_freq": "model-00011-of-00015.safetensors",
|
187 |
+
"model.layers.24.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
|
188 |
+
"model.layers.25.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
189 |
+
"model.layers.25.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
190 |
+
"model.layers.25.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
|
191 |
+
"model.layers.25.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
|
192 |
+
"model.layers.25.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
193 |
+
"model.layers.25.self_attn.k_proj.weight": "model-00011-of-00015.safetensors",
|
194 |
+
"model.layers.25.self_attn.o_proj.weight": "model-00011-of-00015.safetensors",
|
195 |
+
"model.layers.25.self_attn.q_proj.weight": "model-00011-of-00015.safetensors",
|
196 |
+
"model.layers.25.self_attn.rotary_emb.inv_freq": "model-00011-of-00015.safetensors",
|
197 |
+
"model.layers.25.self_attn.v_proj.weight": "model-00011-of-00015.safetensors",
|
198 |
+
"model.layers.26.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
199 |
+
"model.layers.26.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
200 |
+
"model.layers.26.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
|
201 |
+
"model.layers.26.mlp.up_proj.weight": "model-00012-of-00015.safetensors",
|
202 |
+
"model.layers.26.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
203 |
+
"model.layers.26.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
|
204 |
+
"model.layers.26.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
|
205 |
+
"model.layers.26.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
|
206 |
+
"model.layers.26.self_attn.rotary_emb.inv_freq": "model-00012-of-00015.safetensors",
|
207 |
+
"model.layers.26.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
|
208 |
+
"model.layers.27.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
209 |
+
"model.layers.27.mlp.down_proj.weight": "model-00012-of-00015.safetensors",
|
210 |
+
"model.layers.27.mlp.gate_proj.weight": "model-00012-of-00015.safetensors",
|
211 |
+
"model.layers.27.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
|
212 |
+
"model.layers.27.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
213 |
+
"model.layers.27.self_attn.k_proj.weight": "model-00012-of-00015.safetensors",
|
214 |
+
"model.layers.27.self_attn.o_proj.weight": "model-00012-of-00015.safetensors",
|
215 |
+
"model.layers.27.self_attn.q_proj.weight": "model-00012-of-00015.safetensors",
|
216 |
+
"model.layers.27.self_attn.rotary_emb.inv_freq": "model-00012-of-00015.safetensors",
|
217 |
+
"model.layers.27.self_attn.v_proj.weight": "model-00012-of-00015.safetensors",
|
218 |
+
"model.layers.28.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
219 |
+
"model.layers.28.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
|
220 |
+
"model.layers.28.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
|
221 |
+
"model.layers.28.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
|
222 |
+
"model.layers.28.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
223 |
+
"model.layers.28.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
|
224 |
+
"model.layers.28.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
|
225 |
+
"model.layers.28.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
|
226 |
+
"model.layers.28.self_attn.rotary_emb.inv_freq": "model-00013-of-00015.safetensors",
|
227 |
+
"model.layers.28.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
|
228 |
+
"model.layers.29.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
229 |
+
"model.layers.29.mlp.down_proj.weight": "model-00013-of-00015.safetensors",
|
230 |
+
"model.layers.29.mlp.gate_proj.weight": "model-00013-of-00015.safetensors",
|
231 |
+
"model.layers.29.mlp.up_proj.weight": "model-00013-of-00015.safetensors",
|
232 |
+
"model.layers.29.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
233 |
+
"model.layers.29.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
|
234 |
+
"model.layers.29.self_attn.o_proj.weight": "model-00013-of-00015.safetensors",
|
235 |
+
"model.layers.29.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
|
236 |
+
"model.layers.29.self_attn.rotary_emb.inv_freq": "model-00013-of-00015.safetensors",
|
237 |
+
"model.layers.29.self_attn.v_proj.weight": "model-00013-of-00015.safetensors",
|
238 |
+
"model.layers.3.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
239 |
+
"model.layers.3.mlp.down_proj.weight": "model-00002-of-00015.safetensors",
|
240 |
+
"model.layers.3.mlp.gate_proj.weight": "model-00002-of-00015.safetensors",
|
241 |
+
"model.layers.3.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
|
242 |
+
"model.layers.3.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
243 |
+
"model.layers.3.self_attn.k_proj.weight": "model-00002-of-00015.safetensors",
|
244 |
+
"model.layers.3.self_attn.o_proj.weight": "model-00002-of-00015.safetensors",
|
245 |
+
"model.layers.3.self_attn.q_proj.weight": "model-00002-of-00015.safetensors",
|
246 |
+
"model.layers.3.self_attn.rotary_emb.inv_freq": "model-00002-of-00015.safetensors",
|
247 |
+
"model.layers.3.self_attn.v_proj.weight": "model-00002-of-00015.safetensors",
|
248 |
+
"model.layers.30.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
249 |
+
"model.layers.30.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
|
250 |
+
"model.layers.30.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
|
251 |
+
"model.layers.30.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
|
252 |
+
"model.layers.30.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
253 |
+
"model.layers.30.self_attn.k_proj.weight": "model-00013-of-00015.safetensors",
|
254 |
+
"model.layers.30.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
|
255 |
+
"model.layers.30.self_attn.q_proj.weight": "model-00013-of-00015.safetensors",
|
256 |
+
"model.layers.30.self_attn.rotary_emb.inv_freq": "model-00014-of-00015.safetensors",
|
257 |
+
"model.layers.30.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
|
258 |
+
"model.layers.31.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
259 |
+
"model.layers.31.mlp.down_proj.weight": "model-00014-of-00015.safetensors",
|
260 |
+
"model.layers.31.mlp.gate_proj.weight": "model-00014-of-00015.safetensors",
|
261 |
+
"model.layers.31.mlp.up_proj.weight": "model-00014-of-00015.safetensors",
|
262 |
+
"model.layers.31.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
263 |
+
"model.layers.31.self_attn.k_proj.weight": "model-00014-of-00015.safetensors",
|
264 |
+
"model.layers.31.self_attn.o_proj.weight": "model-00014-of-00015.safetensors",
|
265 |
+
"model.layers.31.self_attn.q_proj.weight": "model-00014-of-00015.safetensors",
|
266 |
+
"model.layers.31.self_attn.rotary_emb.inv_freq": "model-00014-of-00015.safetensors",
|
267 |
+
"model.layers.31.self_attn.v_proj.weight": "model-00014-of-00015.safetensors",
|
268 |
+
"model.layers.4.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
269 |
+
"model.layers.4.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
|
270 |
+
"model.layers.4.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
|
271 |
+
"model.layers.4.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
|
272 |
+
"model.layers.4.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
273 |
+
"model.layers.4.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
274 |
+
"model.layers.4.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
|
275 |
+
"model.layers.4.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
276 |
+
"model.layers.4.self_attn.rotary_emb.inv_freq": "model-00003-of-00015.safetensors",
|
277 |
+
"model.layers.4.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
|
278 |
+
"model.layers.5.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
279 |
+
"model.layers.5.mlp.down_proj.weight": "model-00003-of-00015.safetensors",
|
280 |
+
"model.layers.5.mlp.gate_proj.weight": "model-00003-of-00015.safetensors",
|
281 |
+
"model.layers.5.mlp.up_proj.weight": "model-00003-of-00015.safetensors",
|
282 |
+
"model.layers.5.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
283 |
+
"model.layers.5.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
284 |
+
"model.layers.5.self_attn.o_proj.weight": "model-00003-of-00015.safetensors",
|
285 |
+
"model.layers.5.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
286 |
+
"model.layers.5.self_attn.rotary_emb.inv_freq": "model-00003-of-00015.safetensors",
|
287 |
+
"model.layers.5.self_attn.v_proj.weight": "model-00003-of-00015.safetensors",
|
288 |
+
"model.layers.6.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
289 |
+
"model.layers.6.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
|
290 |
+
"model.layers.6.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
|
291 |
+
"model.layers.6.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
|
292 |
+
"model.layers.6.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
293 |
+
"model.layers.6.self_attn.k_proj.weight": "model-00003-of-00015.safetensors",
|
294 |
+
"model.layers.6.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
|
295 |
+
"model.layers.6.self_attn.q_proj.weight": "model-00003-of-00015.safetensors",
|
296 |
+
"model.layers.6.self_attn.rotary_emb.inv_freq": "model-00004-of-00015.safetensors",
|
297 |
+
"model.layers.6.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
|
298 |
+
"model.layers.7.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
299 |
+
"model.layers.7.mlp.down_proj.weight": "model-00004-of-00015.safetensors",
|
300 |
+
"model.layers.7.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
|
301 |
+
"model.layers.7.mlp.up_proj.weight": "model-00004-of-00015.safetensors",
|
302 |
+
"model.layers.7.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
303 |
+
"model.layers.7.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
|
304 |
+
"model.layers.7.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
|
305 |
+
"model.layers.7.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
|
306 |
+
"model.layers.7.self_attn.rotary_emb.inv_freq": "model-00004-of-00015.safetensors",
|
307 |
+
"model.layers.7.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
|
308 |
+
"model.layers.8.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
309 |
+
"model.layers.8.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
|
310 |
+
"model.layers.8.mlp.gate_proj.weight": "model-00004-of-00015.safetensors",
|
311 |
+
"model.layers.8.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
312 |
+
"model.layers.8.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
313 |
+
"model.layers.8.self_attn.k_proj.weight": "model-00004-of-00015.safetensors",
|
314 |
+
"model.layers.8.self_attn.o_proj.weight": "model-00004-of-00015.safetensors",
|
315 |
+
"model.layers.8.self_attn.q_proj.weight": "model-00004-of-00015.safetensors",
|
316 |
+
"model.layers.8.self_attn.rotary_emb.inv_freq": "model-00004-of-00015.safetensors",
|
317 |
+
"model.layers.8.self_attn.v_proj.weight": "model-00004-of-00015.safetensors",
|
318 |
+
"model.layers.9.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
319 |
+
"model.layers.9.mlp.down_proj.weight": "model-00005-of-00015.safetensors",
|
320 |
+
"model.layers.9.mlp.gate_proj.weight": "model-00005-of-00015.safetensors",
|
321 |
+
"model.layers.9.mlp.up_proj.weight": "model-00005-of-00015.safetensors",
|
322 |
+
"model.layers.9.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
323 |
+
"model.layers.9.self_attn.k_proj.weight": "model-00005-of-00015.safetensors",
|
324 |
+
"model.layers.9.self_attn.o_proj.weight": "model-00005-of-00015.safetensors",
|
325 |
+
"model.layers.9.self_attn.q_proj.weight": "model-00005-of-00015.safetensors",
|
326 |
+
"model.layers.9.self_attn.rotary_emb.inv_freq": "model-00005-of-00015.safetensors",
|
327 |
+
"model.layers.9.self_attn.v_proj.weight": "model-00005-of-00015.safetensors",
|
328 |
+
"model.norm.weight": "model-00014-of-00015.safetensors"
|
329 |
+
}
|
330 |
+
}
|
pytorch_model-00001-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0af62f384154d4e1c1c8aee834acdd00708e15264a1f07e56ce99822063687e
|
3 |
+
size 918575087
|
pytorch_model-00002-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf45c96a59d280c62aa5a20897b06880feab737d6823d359169752c68abd3b53
|
3 |
+
size 989896539
|
pytorch_model-00003-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e22feb1ea86cb4d9d4f8b5778d3b5eaf1b1d4ca3f6a4d5ac0e462f713f483040
|
3 |
+
size 966845201
|
pytorch_model-00004-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:37d23d9d306bb236885bac296af2ea8d4373d660fd987ef9f47df027b8d5c912
|
3 |
+
size 966828735
|
pytorch_model-00005-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c5b72d89af911f376e885d5e0ffa850220e81860db1497a6d174341fc904c1f
|
3 |
+
size 989913535
|
pytorch_model-00006-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a5c9f3a9b85f37257d05cbcd3d3f0334b44eb8c70cf54dbc4f67a58b4b61a89
|
3 |
+
size 943760401
|
pytorch_model-00007-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c9e5032a493764b0044ab2a66a76e51e34f8ae0c73699a23cdd4548d6d11329
|
3 |
+
size 989896539
|
pytorch_model-00008-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5fbe84ade25b11afd7f1282ff5ceab357dc693d177aac59bfd5e6842beca5d4
|
3 |
+
size 966845201
|
pytorch_model-00009-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f720fa710126cc21e1da1c39dc6437ac78f96ceedad4e8ba1da1738387ba85c0
|
3 |
+
size 966828799
|
pytorch_model-00010-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ebeaa24b28bc22e843aa2c086c8ecb3120c595e4a1a63825f47c53f2bf8d8ced
|
3 |
+
size 989913535
|
pytorch_model-00011-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:951565ee409ae885830287f8b06e07d0b3553853513dbdb9c9196cfc9ddd6aa6
|
3 |
+
size 943760401
|
pytorch_model-00012-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48a7fb3485138521ea17a243ac2b8878e7d614f7da05610acc17b7345a1e0728
|
3 |
+
size 989896539
|
pytorch_model-00013-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0800e4946b434fe0c4f477c1b7bea47fbd2d482c9e97238af6150c6350238a96
|
3 |
+
size 966845201
|
pytorch_model-00014-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de56542a3464c1f988120524f5af891440b7696c84a3246855e9ec84119a94c0
|
3 |
+
size 742439845
|
pytorch_model-00015-of-00015.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d05bef079707f958c8c2689f4d8d945a9e58d5555761df3cb6b0ea21943cf517
|
3 |
+
size 379585450
|
pytorch_model.bin.index.json
ADDED
@@ -0,0 +1,330 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"metadata": {
|
3 |
+
"total_size": 13711720448
|
4 |
+
},
|
5 |
+
"weight_map": {
|
6 |
+
"lm_head.weight": "pytorch_model-00015-of-00015.bin",
|
7 |
+
"model.embed_tokens.weight": "pytorch_model-00001-of-00015.bin",
|
8 |
+
"model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00015.bin",
|
9 |
+
"model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00015.bin",
|
10 |
+
"model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00015.bin",
|
11 |
+
"model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00015.bin",
|
12 |
+
"model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00015.bin",
|
13 |
+
"model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00015.bin",
|
14 |
+
"model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00015.bin",
|
15 |
+
"model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00015.bin",
|
16 |
+
"model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00015.bin",
|
17 |
+
"model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00015.bin",
|
18 |
+
"model.layers.1.input_layernorm.weight": "pytorch_model-00002-of-00015.bin",
|
19 |
+
"model.layers.1.mlp.down_proj.weight": "pytorch_model-00002-of-00015.bin",
|
20 |
+
"model.layers.1.mlp.gate_proj.weight": "pytorch_model-00002-of-00015.bin",
|
21 |
+
"model.layers.1.mlp.up_proj.weight": "pytorch_model-00002-of-00015.bin",
|
22 |
+
"model.layers.1.post_attention_layernorm.weight": "pytorch_model-00002-of-00015.bin",
|
23 |
+
"model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00015.bin",
|
24 |
+
"model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00015.bin",
|
25 |
+
"model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00015.bin",
|
26 |
+
"model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00015.bin",
|
27 |
+
"model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00015.bin",
|
28 |
+
"model.layers.10.input_layernorm.weight": "pytorch_model-00005-of-00015.bin",
|
29 |
+
"model.layers.10.mlp.down_proj.weight": "pytorch_model-00005-of-00015.bin",
|
30 |
+
"model.layers.10.mlp.gate_proj.weight": "pytorch_model-00005-of-00015.bin",
|
31 |
+
"model.layers.10.mlp.up_proj.weight": "pytorch_model-00005-of-00015.bin",
|
32 |
+
"model.layers.10.post_attention_layernorm.weight": "pytorch_model-00005-of-00015.bin",
|
33 |
+
"model.layers.10.self_attn.k_proj.weight": "pytorch_model-00005-of-00015.bin",
|
34 |
+
"model.layers.10.self_attn.o_proj.weight": "pytorch_model-00005-of-00015.bin",
|
35 |
+
"model.layers.10.self_attn.q_proj.weight": "pytorch_model-00005-of-00015.bin",
|
36 |
+
"model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00015.bin",
|
37 |
+
"model.layers.10.self_attn.v_proj.weight": "pytorch_model-00005-of-00015.bin",
|
38 |
+
"model.layers.11.input_layernorm.weight": "pytorch_model-00006-of-00015.bin",
|
39 |
+
"model.layers.11.mlp.down_proj.weight": "pytorch_model-00006-of-00015.bin",
|
40 |
+
"model.layers.11.mlp.gate_proj.weight": "pytorch_model-00006-of-00015.bin",
|
41 |
+
"model.layers.11.mlp.up_proj.weight": "pytorch_model-00006-of-00015.bin",
|
42 |
+
"model.layers.11.post_attention_layernorm.weight": "pytorch_model-00006-of-00015.bin",
|
43 |
+
"model.layers.11.self_attn.k_proj.weight": "pytorch_model-00006-of-00015.bin",
|
44 |
+
"model.layers.11.self_attn.o_proj.weight": "pytorch_model-00006-of-00015.bin",
|
45 |
+
"model.layers.11.self_attn.q_proj.weight": "pytorch_model-00006-of-00015.bin",
|
46 |
+
"model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00015.bin",
|
47 |
+
"model.layers.11.self_attn.v_proj.weight": "pytorch_model-00006-of-00015.bin",
|
48 |
+
"model.layers.12.input_layernorm.weight": "pytorch_model-00006-of-00015.bin",
|
49 |
+
"model.layers.12.mlp.down_proj.weight": "pytorch_model-00006-of-00015.bin",
|
50 |
+
"model.layers.12.mlp.gate_proj.weight": "pytorch_model-00006-of-00015.bin",
|
51 |
+
"model.layers.12.mlp.up_proj.weight": "pytorch_model-00006-of-00015.bin",
|
52 |
+
"model.layers.12.post_attention_layernorm.weight": "pytorch_model-00006-of-00015.bin",
|
53 |
+
"model.layers.12.self_attn.k_proj.weight": "pytorch_model-00006-of-00015.bin",
|
54 |
+
"model.layers.12.self_attn.o_proj.weight": "pytorch_model-00006-of-00015.bin",
|
55 |
+
"model.layers.12.self_attn.q_proj.weight": "pytorch_model-00006-of-00015.bin",
|
56 |
+
"model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00015.bin",
|
57 |
+
"model.layers.12.self_attn.v_proj.weight": "pytorch_model-00006-of-00015.bin",
|
58 |
+
"model.layers.13.input_layernorm.weight": "pytorch_model-00007-of-00015.bin",
|
59 |
+
"model.layers.13.mlp.down_proj.weight": "pytorch_model-00007-of-00015.bin",
|
60 |
+
"model.layers.13.mlp.gate_proj.weight": "pytorch_model-00007-of-00015.bin",
|
61 |
+
"model.layers.13.mlp.up_proj.weight": "pytorch_model-00007-of-00015.bin",
|
62 |
+
"model.layers.13.post_attention_layernorm.weight": "pytorch_model-00007-of-00015.bin",
|
63 |
+
"model.layers.13.self_attn.k_proj.weight": "pytorch_model-00006-of-00015.bin",
|
64 |
+
"model.layers.13.self_attn.o_proj.weight": "pytorch_model-00006-of-00015.bin",
|
65 |
+
"model.layers.13.self_attn.q_proj.weight": "pytorch_model-00006-of-00015.bin",
|
66 |
+
"model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00015.bin",
|
67 |
+
"model.layers.13.self_attn.v_proj.weight": "pytorch_model-00006-of-00015.bin",
|
68 |
+
"model.layers.14.input_layernorm.weight": "pytorch_model-00007-of-00015.bin",
|
69 |
+
"model.layers.14.mlp.down_proj.weight": "pytorch_model-00007-of-00015.bin",
|
70 |
+
"model.layers.14.mlp.gate_proj.weight": "pytorch_model-00007-of-00015.bin",
|
71 |
+
"model.layers.14.mlp.up_proj.weight": "pytorch_model-00007-of-00015.bin",
|
72 |
+
"model.layers.14.post_attention_layernorm.weight": "pytorch_model-00007-of-00015.bin",
|
73 |
+
"model.layers.14.self_attn.k_proj.weight": "pytorch_model-00007-of-00015.bin",
|
74 |
+
"model.layers.14.self_attn.o_proj.weight": "pytorch_model-00007-of-00015.bin",
|
75 |
+
"model.layers.14.self_attn.q_proj.weight": "pytorch_model-00007-of-00015.bin",
|
76 |
+
"model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00015.bin",
|
77 |
+
"model.layers.14.self_attn.v_proj.weight": "pytorch_model-00007-of-00015.bin",
|
78 |
+
"model.layers.15.input_layernorm.weight": "pytorch_model-00008-of-00015.bin",
|
79 |
+
"model.layers.15.mlp.down_proj.weight": "pytorch_model-00007-of-00015.bin",
|
80 |
+
"model.layers.15.mlp.gate_proj.weight": "pytorch_model-00007-of-00015.bin",
|
81 |
+
"model.layers.15.mlp.up_proj.weight": "pytorch_model-00008-of-00015.bin",
|
82 |
+
"model.layers.15.post_attention_layernorm.weight": "pytorch_model-00008-of-00015.bin",
|
83 |
+
"model.layers.15.self_attn.k_proj.weight": "pytorch_model-00007-of-00015.bin",
|
84 |
+
"model.layers.15.self_attn.o_proj.weight": "pytorch_model-00007-of-00015.bin",
|
85 |
+
"model.layers.15.self_attn.q_proj.weight": "pytorch_model-00007-of-00015.bin",
|
86 |
+
"model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00015.bin",
|
87 |
+
"model.layers.15.self_attn.v_proj.weight": "pytorch_model-00007-of-00015.bin",
|
88 |
+
"model.layers.16.input_layernorm.weight": "pytorch_model-00008-of-00015.bin",
|
89 |
+
"model.layers.16.mlp.down_proj.weight": "pytorch_model-00008-of-00015.bin",
|
90 |
+
"model.layers.16.mlp.gate_proj.weight": "pytorch_model-00008-of-00015.bin",
|
91 |
+
"model.layers.16.mlp.up_proj.weight": "pytorch_model-00008-of-00015.bin",
|
92 |
+
"model.layers.16.post_attention_layernorm.weight": "pytorch_model-00008-of-00015.bin",
|
93 |
+
"model.layers.16.self_attn.k_proj.weight": "pytorch_model-00008-of-00015.bin",
|
94 |
+
"model.layers.16.self_attn.o_proj.weight": "pytorch_model-00008-of-00015.bin",
|
95 |
+
"model.layers.16.self_attn.q_proj.weight": "pytorch_model-00008-of-00015.bin",
|
96 |
+
"model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00015.bin",
|
97 |
+
"model.layers.16.self_attn.v_proj.weight": "pytorch_model-00008-of-00015.bin",
|
98 |
+
"model.layers.17.input_layernorm.weight": "pytorch_model-00008-of-00015.bin",
|
99 |
+
"model.layers.17.mlp.down_proj.weight": "pytorch_model-00008-of-00015.bin",
|
100 |
+
"model.layers.17.mlp.gate_proj.weight": "pytorch_model-00008-of-00015.bin",
|
101 |
+
"model.layers.17.mlp.up_proj.weight": "pytorch_model-00008-of-00015.bin",
|
102 |
+
"model.layers.17.post_attention_layernorm.weight": "pytorch_model-00008-of-00015.bin",
|
103 |
+
"model.layers.17.self_attn.k_proj.weight": "pytorch_model-00008-of-00015.bin",
|
104 |
+
"model.layers.17.self_attn.o_proj.weight": "pytorch_model-00008-of-00015.bin",
|
105 |
+
"model.layers.17.self_attn.q_proj.weight": "pytorch_model-00008-of-00015.bin",
|
106 |
+
"model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00015.bin",
|
107 |
+
"model.layers.17.self_attn.v_proj.weight": "pytorch_model-00008-of-00015.bin",
|
108 |
+
"model.layers.18.input_layernorm.weight": "pytorch_model-00009-of-00015.bin",
|
109 |
+
"model.layers.18.mlp.down_proj.weight": "pytorch_model-00009-of-00015.bin",
|
110 |
+
"model.layers.18.mlp.gate_proj.weight": "pytorch_model-00009-of-00015.bin",
|
111 |
+
"model.layers.18.mlp.up_proj.weight": "pytorch_model-00009-of-00015.bin",
|
112 |
+
"model.layers.18.post_attention_layernorm.weight": "pytorch_model-00009-of-00015.bin",
|
113 |
+
"model.layers.18.self_attn.k_proj.weight": "pytorch_model-00008-of-00015.bin",
|
114 |
+
"model.layers.18.self_attn.o_proj.weight": "pytorch_model-00009-of-00015.bin",
|
115 |
+
"model.layers.18.self_attn.q_proj.weight": "pytorch_model-00008-of-00015.bin",
|
116 |
+
"model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00015.bin",
|
117 |
+
"model.layers.18.self_attn.v_proj.weight": "pytorch_model-00009-of-00015.bin",
|
118 |
+
"model.layers.19.input_layernorm.weight": "pytorch_model-00009-of-00015.bin",
|
119 |
+
"model.layers.19.mlp.down_proj.weight": "pytorch_model-00009-of-00015.bin",
|
120 |
+
"model.layers.19.mlp.gate_proj.weight": "pytorch_model-00009-of-00015.bin",
|
121 |
+
"model.layers.19.mlp.up_proj.weight": "pytorch_model-00009-of-00015.bin",
|
122 |
+
"model.layers.19.post_attention_layernorm.weight": "pytorch_model-00009-of-00015.bin",
|
123 |
+
"model.layers.19.self_attn.k_proj.weight": "pytorch_model-00009-of-00015.bin",
|
124 |
+
"model.layers.19.self_attn.o_proj.weight": "pytorch_model-00009-of-00015.bin",
|
125 |
+
"model.layers.19.self_attn.q_proj.weight": "pytorch_model-00009-of-00015.bin",
|
126 |
+
"model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00015.bin",
|
127 |
+
"model.layers.19.self_attn.v_proj.weight": "pytorch_model-00009-of-00015.bin",
|
128 |
+
"model.layers.2.input_layernorm.weight": "pytorch_model-00002-of-00015.bin",
|
129 |
+
"model.layers.2.mlp.down_proj.weight": "pytorch_model-00002-of-00015.bin",
|
130 |
+
"model.layers.2.mlp.gate_proj.weight": "pytorch_model-00002-of-00015.bin",
|
131 |
+
"model.layers.2.mlp.up_proj.weight": "pytorch_model-00002-of-00015.bin",
|
132 |
+
"model.layers.2.post_attention_layernorm.weight": "pytorch_model-00002-of-00015.bin",
|
133 |
+
"model.layers.2.self_attn.k_proj.weight": "pytorch_model-00002-of-00015.bin",
|
134 |
+
"model.layers.2.self_attn.o_proj.weight": "pytorch_model-00002-of-00015.bin",
|
135 |
+
"model.layers.2.self_attn.q_proj.weight": "pytorch_model-00002-of-00015.bin",
|
136 |
+
"model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00015.bin",
|
137 |
+
"model.layers.2.self_attn.v_proj.weight": "pytorch_model-00002-of-00015.bin",
|
138 |
+
"model.layers.20.input_layernorm.weight": "pytorch_model-00010-of-00015.bin",
|
139 |
+
"model.layers.20.mlp.down_proj.weight": "pytorch_model-00010-of-00015.bin",
|
140 |
+
"model.layers.20.mlp.gate_proj.weight": "pytorch_model-00009-of-00015.bin",
|
141 |
+
"model.layers.20.mlp.up_proj.weight": "pytorch_model-00010-of-00015.bin",
|
142 |
+
"model.layers.20.post_attention_layernorm.weight": "pytorch_model-00010-of-00015.bin",
|
143 |
+
"model.layers.20.self_attn.k_proj.weight": "pytorch_model-00009-of-00015.bin",
|
144 |
+
"model.layers.20.self_attn.o_proj.weight": "pytorch_model-00009-of-00015.bin",
|
145 |
+
"model.layers.20.self_attn.q_proj.weight": "pytorch_model-00009-of-00015.bin",
|
146 |
+
"model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00015.bin",
|
147 |
+
"model.layers.20.self_attn.v_proj.weight": "pytorch_model-00009-of-00015.bin",
|
148 |
+
"model.layers.21.input_layernorm.weight": "pytorch_model-00010-of-00015.bin",
|
149 |
+
"model.layers.21.mlp.down_proj.weight": "pytorch_model-00010-of-00015.bin",
|
150 |
+
"model.layers.21.mlp.gate_proj.weight": "pytorch_model-00010-of-00015.bin",
|
151 |
+
"model.layers.21.mlp.up_proj.weight": "pytorch_model-00010-of-00015.bin",
|
152 |
+
"model.layers.21.post_attention_layernorm.weight": "pytorch_model-00010-of-00015.bin",
|
153 |
+
"model.layers.21.self_attn.k_proj.weight": "pytorch_model-00010-of-00015.bin",
|
154 |
+
"model.layers.21.self_attn.o_proj.weight": "pytorch_model-00010-of-00015.bin",
|
155 |
+
"model.layers.21.self_attn.q_proj.weight": "pytorch_model-00010-of-00015.bin",
|
156 |
+
"model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00015.bin",
|
157 |
+
"model.layers.21.self_attn.v_proj.weight": "pytorch_model-00010-of-00015.bin",
|
158 |
+
"model.layers.22.input_layernorm.weight": "pytorch_model-00010-of-00015.bin",
|
159 |
+
"model.layers.22.mlp.down_proj.weight": "pytorch_model-00010-of-00015.bin",
|
160 |
+
"model.layers.22.mlp.gate_proj.weight": "pytorch_model-00010-of-00015.bin",
|
161 |
+
"model.layers.22.mlp.up_proj.weight": "pytorch_model-00010-of-00015.bin",
|
162 |
+
"model.layers.22.post_attention_layernorm.weight": "pytorch_model-00010-of-00015.bin",
|
163 |
+
"model.layers.22.self_attn.k_proj.weight": "pytorch_model-00010-of-00015.bin",
|
164 |
+
"model.layers.22.self_attn.o_proj.weight": "pytorch_model-00010-of-00015.bin",
|
165 |
+
"model.layers.22.self_attn.q_proj.weight": "pytorch_model-00010-of-00015.bin",
|
166 |
+
"model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00015.bin",
|
167 |
+
"model.layers.22.self_attn.v_proj.weight": "pytorch_model-00010-of-00015.bin",
|
168 |
+
"model.layers.23.input_layernorm.weight": "pytorch_model-00011-of-00015.bin",
|
169 |
+
"model.layers.23.mlp.down_proj.weight": "pytorch_model-00011-of-00015.bin",
|
170 |
+
"model.layers.23.mlp.gate_proj.weight": "pytorch_model-00011-of-00015.bin",
|
171 |
+
"model.layers.23.mlp.up_proj.weight": "pytorch_model-00011-of-00015.bin",
|
172 |
+
"model.layers.23.post_attention_layernorm.weight": "pytorch_model-00011-of-00015.bin",
|
173 |
+
"model.layers.23.self_attn.k_proj.weight": "pytorch_model-00011-of-00015.bin",
|
174 |
+
"model.layers.23.self_attn.o_proj.weight": "pytorch_model-00011-of-00015.bin",
|
175 |
+
"model.layers.23.self_attn.q_proj.weight": "pytorch_model-00011-of-00015.bin",
|
176 |
+
"model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00015.bin",
|
177 |
+
"model.layers.23.self_attn.v_proj.weight": "pytorch_model-00011-of-00015.bin",
|
178 |
+
"model.layers.24.input_layernorm.weight": "pytorch_model-00011-of-00015.bin",
|
179 |
+
"model.layers.24.mlp.down_proj.weight": "pytorch_model-00011-of-00015.bin",
|
180 |
+
"model.layers.24.mlp.gate_proj.weight": "pytorch_model-00011-of-00015.bin",
|
181 |
+
"model.layers.24.mlp.up_proj.weight": "pytorch_model-00011-of-00015.bin",
|
182 |
+
"model.layers.24.post_attention_layernorm.weight": "pytorch_model-00011-of-00015.bin",
|
183 |
+
"model.layers.24.self_attn.k_proj.weight": "pytorch_model-00011-of-00015.bin",
|
184 |
+
"model.layers.24.self_attn.o_proj.weight": "pytorch_model-00011-of-00015.bin",
|
185 |
+
"model.layers.24.self_attn.q_proj.weight": "pytorch_model-00011-of-00015.bin",
|
186 |
+
"model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00015.bin",
|
187 |
+
"model.layers.24.self_attn.v_proj.weight": "pytorch_model-00011-of-00015.bin",
|
188 |
+
"model.layers.25.input_layernorm.weight": "pytorch_model-00012-of-00015.bin",
|
189 |
+
"model.layers.25.mlp.down_proj.weight": "pytorch_model-00012-of-00015.bin",
|
190 |
+
"model.layers.25.mlp.gate_proj.weight": "pytorch_model-00012-of-00015.bin",
|
191 |
+
"model.layers.25.mlp.up_proj.weight": "pytorch_model-00012-of-00015.bin",
|
192 |
+
"model.layers.25.post_attention_layernorm.weight": "pytorch_model-00012-of-00015.bin",
|
193 |
+
"model.layers.25.self_attn.k_proj.weight": "pytorch_model-00011-of-00015.bin",
|
194 |
+
"model.layers.25.self_attn.o_proj.weight": "pytorch_model-00011-of-00015.bin",
|
195 |
+
"model.layers.25.self_attn.q_proj.weight": "pytorch_model-00011-of-00015.bin",
|
196 |
+
"model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00015.bin",
|
197 |
+
"model.layers.25.self_attn.v_proj.weight": "pytorch_model-00011-of-00015.bin",
|
198 |
+
"model.layers.26.input_layernorm.weight": "pytorch_model-00012-of-00015.bin",
|
199 |
+
"model.layers.26.mlp.down_proj.weight": "pytorch_model-00012-of-00015.bin",
|
200 |
+
"model.layers.26.mlp.gate_proj.weight": "pytorch_model-00012-of-00015.bin",
|
201 |
+
"model.layers.26.mlp.up_proj.weight": "pytorch_model-00012-of-00015.bin",
|
202 |
+
"model.layers.26.post_attention_layernorm.weight": "pytorch_model-00012-of-00015.bin",
|
203 |
+
"model.layers.26.self_attn.k_proj.weight": "pytorch_model-00012-of-00015.bin",
|
204 |
+
"model.layers.26.self_attn.o_proj.weight": "pytorch_model-00012-of-00015.bin",
|
205 |
+
"model.layers.26.self_attn.q_proj.weight": "pytorch_model-00012-of-00015.bin",
|
206 |
+
"model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00015.bin",
|
207 |
+
"model.layers.26.self_attn.v_proj.weight": "pytorch_model-00012-of-00015.bin",
|
208 |
+
"model.layers.27.input_layernorm.weight": "pytorch_model-00013-of-00015.bin",
|
209 |
+
"model.layers.27.mlp.down_proj.weight": "pytorch_model-00012-of-00015.bin",
|
210 |
+
"model.layers.27.mlp.gate_proj.weight": "pytorch_model-00012-of-00015.bin",
|
211 |
+
"model.layers.27.mlp.up_proj.weight": "pytorch_model-00013-of-00015.bin",
|
212 |
+
"model.layers.27.post_attention_layernorm.weight": "pytorch_model-00013-of-00015.bin",
|
213 |
+
"model.layers.27.self_attn.k_proj.weight": "pytorch_model-00012-of-00015.bin",
|
214 |
+
"model.layers.27.self_attn.o_proj.weight": "pytorch_model-00012-of-00015.bin",
|
215 |
+
"model.layers.27.self_attn.q_proj.weight": "pytorch_model-00012-of-00015.bin",
|
216 |
+
"model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00015.bin",
|
217 |
+
"model.layers.27.self_attn.v_proj.weight": "pytorch_model-00012-of-00015.bin",
|
218 |
+
"model.layers.28.input_layernorm.weight": "pytorch_model-00013-of-00015.bin",
|
219 |
+
"model.layers.28.mlp.down_proj.weight": "pytorch_model-00013-of-00015.bin",
|
220 |
+
"model.layers.28.mlp.gate_proj.weight": "pytorch_model-00013-of-00015.bin",
|
221 |
+
"model.layers.28.mlp.up_proj.weight": "pytorch_model-00013-of-00015.bin",
|
222 |
+
"model.layers.28.post_attention_layernorm.weight": "pytorch_model-00013-of-00015.bin",
|
223 |
+
"model.layers.28.self_attn.k_proj.weight": "pytorch_model-00013-of-00015.bin",
|
224 |
+
"model.layers.28.self_attn.o_proj.weight": "pytorch_model-00013-of-00015.bin",
|
225 |
+
"model.layers.28.self_attn.q_proj.weight": "pytorch_model-00013-of-00015.bin",
|
226 |
+
"model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00015.bin",
|
227 |
+
"model.layers.28.self_attn.v_proj.weight": "pytorch_model-00013-of-00015.bin",
|
228 |
+
"model.layers.29.input_layernorm.weight": "pytorch_model-00013-of-00015.bin",
|
229 |
+
"model.layers.29.mlp.down_proj.weight": "pytorch_model-00013-of-00015.bin",
|
230 |
+
"model.layers.29.mlp.gate_proj.weight": "pytorch_model-00013-of-00015.bin",
|
231 |
+
"model.layers.29.mlp.up_proj.weight": "pytorch_model-00013-of-00015.bin",
|
232 |
+
"model.layers.29.post_attention_layernorm.weight": "pytorch_model-00013-of-00015.bin",
|
233 |
+
"model.layers.29.self_attn.k_proj.weight": "pytorch_model-00013-of-00015.bin",
|
234 |
+
"model.layers.29.self_attn.o_proj.weight": "pytorch_model-00013-of-00015.bin",
|
235 |
+
"model.layers.29.self_attn.q_proj.weight": "pytorch_model-00013-of-00015.bin",
|
236 |
+
"model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00015.bin",
|
237 |
+
"model.layers.29.self_attn.v_proj.weight": "pytorch_model-00013-of-00015.bin",
|
238 |
+
"model.layers.3.input_layernorm.weight": "pytorch_model-00003-of-00015.bin",
|
239 |
+
"model.layers.3.mlp.down_proj.weight": "pytorch_model-00002-of-00015.bin",
|
240 |
+
"model.layers.3.mlp.gate_proj.weight": "pytorch_model-00002-of-00015.bin",
|
241 |
+
"model.layers.3.mlp.up_proj.weight": "pytorch_model-00003-of-00015.bin",
|
242 |
+
"model.layers.3.post_attention_layernorm.weight": "pytorch_model-00003-of-00015.bin",
|
243 |
+
"model.layers.3.self_attn.k_proj.weight": "pytorch_model-00002-of-00015.bin",
|
244 |
+
"model.layers.3.self_attn.o_proj.weight": "pytorch_model-00002-of-00015.bin",
|
245 |
+
"model.layers.3.self_attn.q_proj.weight": "pytorch_model-00002-of-00015.bin",
|
246 |
+
"model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00015.bin",
|
247 |
+
"model.layers.3.self_attn.v_proj.weight": "pytorch_model-00002-of-00015.bin",
|
248 |
+
"model.layers.30.input_layernorm.weight": "pytorch_model-00014-of-00015.bin",
|
249 |
+
"model.layers.30.mlp.down_proj.weight": "pytorch_model-00014-of-00015.bin",
|
250 |
+
"model.layers.30.mlp.gate_proj.weight": "pytorch_model-00014-of-00015.bin",
|
251 |
+
"model.layers.30.mlp.up_proj.weight": "pytorch_model-00014-of-00015.bin",
|
252 |
+
"model.layers.30.post_attention_layernorm.weight": "pytorch_model-00014-of-00015.bin",
|
253 |
+
"model.layers.30.self_attn.k_proj.weight": "pytorch_model-00013-of-00015.bin",
|
254 |
+
"model.layers.30.self_attn.o_proj.weight": "pytorch_model-00014-of-00015.bin",
|
255 |
+
"model.layers.30.self_attn.q_proj.weight": "pytorch_model-00013-of-00015.bin",
|
256 |
+
"model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00015.bin",
|
257 |
+
"model.layers.30.self_attn.v_proj.weight": "pytorch_model-00014-of-00015.bin",
|
258 |
+
"model.layers.31.input_layernorm.weight": "pytorch_model-00014-of-00015.bin",
|
259 |
+
"model.layers.31.mlp.down_proj.weight": "pytorch_model-00014-of-00015.bin",
|
260 |
+
"model.layers.31.mlp.gate_proj.weight": "pytorch_model-00014-of-00015.bin",
|
261 |
+
"model.layers.31.mlp.up_proj.weight": "pytorch_model-00014-of-00015.bin",
|
262 |
+
"model.layers.31.post_attention_layernorm.weight": "pytorch_model-00014-of-00015.bin",
|
263 |
+
"model.layers.31.self_attn.k_proj.weight": "pytorch_model-00014-of-00015.bin",
|
264 |
+
"model.layers.31.self_attn.o_proj.weight": "pytorch_model-00014-of-00015.bin",
|
265 |
+
"model.layers.31.self_attn.q_proj.weight": "pytorch_model-00014-of-00015.bin",
|
266 |
+
"model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00015.bin",
|
267 |
+
"model.layers.31.self_attn.v_proj.weight": "pytorch_model-00014-of-00015.bin",
|
268 |
+
"model.layers.4.input_layernorm.weight": "pytorch_model-00003-of-00015.bin",
|
269 |
+
"model.layers.4.mlp.down_proj.weight": "pytorch_model-00003-of-00015.bin",
|
270 |
+
"model.layers.4.mlp.gate_proj.weight": "pytorch_model-00003-of-00015.bin",
|
271 |
+
"model.layers.4.mlp.up_proj.weight": "pytorch_model-00003-of-00015.bin",
|
272 |
+
"model.layers.4.post_attention_layernorm.weight": "pytorch_model-00003-of-00015.bin",
|
273 |
+
"model.layers.4.self_attn.k_proj.weight": "pytorch_model-00003-of-00015.bin",
|
274 |
+
"model.layers.4.self_attn.o_proj.weight": "pytorch_model-00003-of-00015.bin",
|
275 |
+
"model.layers.4.self_attn.q_proj.weight": "pytorch_model-00003-of-00015.bin",
|
276 |
+
"model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00015.bin",
|
277 |
+
"model.layers.4.self_attn.v_proj.weight": "pytorch_model-00003-of-00015.bin",
|
278 |
+
"model.layers.5.input_layernorm.weight": "pytorch_model-00003-of-00015.bin",
|
279 |
+
"model.layers.5.mlp.down_proj.weight": "pytorch_model-00003-of-00015.bin",
|
280 |
+
"model.layers.5.mlp.gate_proj.weight": "pytorch_model-00003-of-00015.bin",
|
281 |
+
"model.layers.5.mlp.up_proj.weight": "pytorch_model-00003-of-00015.bin",
|
282 |
+
"model.layers.5.post_attention_layernorm.weight": "pytorch_model-00003-of-00015.bin",
|
283 |
+
"model.layers.5.self_attn.k_proj.weight": "pytorch_model-00003-of-00015.bin",
|
284 |
+
"model.layers.5.self_attn.o_proj.weight": "pytorch_model-00003-of-00015.bin",
|
285 |
+
"model.layers.5.self_attn.q_proj.weight": "pytorch_model-00003-of-00015.bin",
|
286 |
+
"model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00015.bin",
|
287 |
+
"model.layers.5.self_attn.v_proj.weight": "pytorch_model-00003-of-00015.bin",
|
288 |
+
"model.layers.6.input_layernorm.weight": "pytorch_model-00004-of-00015.bin",
|
289 |
+
"model.layers.6.mlp.down_proj.weight": "pytorch_model-00004-of-00015.bin",
|
290 |
+
"model.layers.6.mlp.gate_proj.weight": "pytorch_model-00004-of-00015.bin",
|
291 |
+
"model.layers.6.mlp.up_proj.weight": "pytorch_model-00004-of-00015.bin",
|
292 |
+
"model.layers.6.post_attention_layernorm.weight": "pytorch_model-00004-of-00015.bin",
|
293 |
+
"model.layers.6.self_attn.k_proj.weight": "pytorch_model-00003-of-00015.bin",
|
294 |
+
"model.layers.6.self_attn.o_proj.weight": "pytorch_model-00004-of-00015.bin",
|
295 |
+
"model.layers.6.self_attn.q_proj.weight": "pytorch_model-00003-of-00015.bin",
|
296 |
+
"model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00015.bin",
|
297 |
+
"model.layers.6.self_attn.v_proj.weight": "pytorch_model-00004-of-00015.bin",
|
298 |
+
"model.layers.7.input_layernorm.weight": "pytorch_model-00004-of-00015.bin",
|
299 |
+
"model.layers.7.mlp.down_proj.weight": "pytorch_model-00004-of-00015.bin",
|
300 |
+
"model.layers.7.mlp.gate_proj.weight": "pytorch_model-00004-of-00015.bin",
|
301 |
+
"model.layers.7.mlp.up_proj.weight": "pytorch_model-00004-of-00015.bin",
|
302 |
+
"model.layers.7.post_attention_layernorm.weight": "pytorch_model-00004-of-00015.bin",
|
303 |
+
"model.layers.7.self_attn.k_proj.weight": "pytorch_model-00004-of-00015.bin",
|
304 |
+
"model.layers.7.self_attn.o_proj.weight": "pytorch_model-00004-of-00015.bin",
|
305 |
+
"model.layers.7.self_attn.q_proj.weight": "pytorch_model-00004-of-00015.bin",
|
306 |
+
"model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00015.bin",
|
307 |
+
"model.layers.7.self_attn.v_proj.weight": "pytorch_model-00004-of-00015.bin",
|
308 |
+
"model.layers.8.input_layernorm.weight": "pytorch_model-00005-of-00015.bin",
|
309 |
+
"model.layers.8.mlp.down_proj.weight": "pytorch_model-00005-of-00015.bin",
|
310 |
+
"model.layers.8.mlp.gate_proj.weight": "pytorch_model-00004-of-00015.bin",
|
311 |
+
"model.layers.8.mlp.up_proj.weight": "pytorch_model-00005-of-00015.bin",
|
312 |
+
"model.layers.8.post_attention_layernorm.weight": "pytorch_model-00005-of-00015.bin",
|
313 |
+
"model.layers.8.self_attn.k_proj.weight": "pytorch_model-00004-of-00015.bin",
|
314 |
+
"model.layers.8.self_attn.o_proj.weight": "pytorch_model-00004-of-00015.bin",
|
315 |
+
"model.layers.8.self_attn.q_proj.weight": "pytorch_model-00004-of-00015.bin",
|
316 |
+
"model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00015.bin",
|
317 |
+
"model.layers.8.self_attn.v_proj.weight": "pytorch_model-00004-of-00015.bin",
|
318 |
+
"model.layers.9.input_layernorm.weight": "pytorch_model-00005-of-00015.bin",
|
319 |
+
"model.layers.9.mlp.down_proj.weight": "pytorch_model-00005-of-00015.bin",
|
320 |
+
"model.layers.9.mlp.gate_proj.weight": "pytorch_model-00005-of-00015.bin",
|
321 |
+
"model.layers.9.mlp.up_proj.weight": "pytorch_model-00005-of-00015.bin",
|
322 |
+
"model.layers.9.post_attention_layernorm.weight": "pytorch_model-00005-of-00015.bin",
|
323 |
+
"model.layers.9.self_attn.k_proj.weight": "pytorch_model-00005-of-00015.bin",
|
324 |
+
"model.layers.9.self_attn.o_proj.weight": "pytorch_model-00005-of-00015.bin",
|
325 |
+
"model.layers.9.self_attn.q_proj.weight": "pytorch_model-00005-of-00015.bin",
|
326 |
+
"model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00015.bin",
|
327 |
+
"model.layers.9.self_attn.v_proj.weight": "pytorch_model-00005-of-00015.bin",
|
328 |
+
"model.norm.weight": "pytorch_model-00014-of-00015.bin"
|
329 |
+
}
|
330 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "</s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"unk_token": {
|
17 |
+
"content": "<unk>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
}
|
23 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"__type": "AddedToken",
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false
|
9 |
+
},
|
10 |
+
"clean_up_tokenization_spaces": false,
|
11 |
+
"eos_token": {
|
12 |
+
"__type": "AddedToken",
|
13 |
+
"content": "</s>",
|
14 |
+
"lstrip": false,
|
15 |
+
"normalized": false,
|
16 |
+
"rstrip": false,
|
17 |
+
"single_word": false
|
18 |
+
},
|
19 |
+
"legacy": false,
|
20 |
+
"model_max_length": 1000000000000000019884624838656,
|
21 |
+
"pad_token": {
|
22 |
+
"__type": "AddedToken",
|
23 |
+
"content": "</s>",
|
24 |
+
"lstrip": false,
|
25 |
+
"normalized": false,
|
26 |
+
"rstrip": false,
|
27 |
+
"single_word": false
|
28 |
+
},
|
29 |
+
"sp_model_kwargs": {},
|
30 |
+
"tokenizer_class": "LlamaTokenizer",
|
31 |
+
"unk_token": {
|
32 |
+
"__type": "AddedToken",
|
33 |
+
"content": "<unk>",
|
34 |
+
"lstrip": false,
|
35 |
+
"normalized": false,
|
36 |
+
"rstrip": false,
|
37 |
+
"single_word": false
|
38 |
+
}
|
39 |
+
}
|