Update to latest llama.cpp release.
Browse filesThe model data has not changed but because the new convert.py
generates a little different file (difference of 1 byte only)
I updated the repo here.
This is so the checksums would match if someone converted the
model from scratch using the released toolset.
- .gitignore +1 -0
- Makefile +7 -8
- SHA256SUMS +6 -6
- convert.py.diff +0 -39
- open-llama-3b-f16.bin +1 -1
- open-llama-3b-q4_0.bin +1 -1
- open-llama-3b-q4_1.bin +1 -1
- open-llama-3b-q5_0.bin +1 -1
- open-llama-3b-q5_1.bin +1 -1
- open-llama-3b-q8_0.bin +1 -1
.gitignore
CHANGED
@@ -4,3 +4,4 @@ pytorch_model.bin
|
|
4 |
*.sha
|
5 |
*.tar.gz
|
6 |
tokenizer.model
|
|
|
|
4 |
*.sha
|
5 |
*.tar.gz
|
6 |
tokenizer.model
|
7 |
+
config.json
|
Makefile
CHANGED
@@ -1,12 +1,14 @@
|
|
1 |
MODEL_NAME= open-llama-3b
|
2 |
PYTHON?= python
|
3 |
-
LLAMA_TAG=
|
4 |
LLAMA_TAR= master-$(LLAMA_TAG).tar.gz
|
5 |
LLAMA_DIR= llama.cpp-master-$(LLAMA_TAG)
|
|
|
6 |
HF_REPO= openlm-research/open_llama_3b
|
7 |
HF_REF= main
|
8 |
HF_FILES= pytorch_model.bin \
|
9 |
-
tokenizer.model
|
|
|
10 |
$(HF_FILES): SITE= https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF)
|
11 |
$(LLAMA_TAR): SITE= https://github.com/ggerganov/llama.cpp/archive/refs/tags
|
12 |
|
@@ -25,13 +27,10 @@ $(LLAMA_DIR): | $(LLAMA_TAR)
|
|
25 |
tar -xf $(LLAMA_TAR)
|
26 |
|
27 |
$(LLAMA_DIR)/quantize: | $(LLAMA_DIR)
|
28 |
-
$(MAKE) -C $(LLAMA_DIR)
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
$(MODEL_NAME)-f16.bin: $(HF_FILES) convert.py
|
34 |
-
$(PYTHON) convert.py --outtype f16 --outfile $@ .
|
35 |
|
36 |
$(MODEL_NAME)-q%.bin: $(MODEL_NAME)-f16.bin $(LLAMA_DIR)/quantize
|
37 |
$(LLAMA_DIR)/quantize $< $@ q$*
|
|
|
1 |
MODEL_NAME= open-llama-3b
|
2 |
PYTHON?= python
|
3 |
+
LLAMA_TAG= 7487137
|
4 |
LLAMA_TAR= master-$(LLAMA_TAG).tar.gz
|
5 |
LLAMA_DIR= llama.cpp-master-$(LLAMA_TAG)
|
6 |
+
LLAMA_FLAGS= LLAMA_NO_K_QUANTS=1
|
7 |
HF_REPO= openlm-research/open_llama_3b
|
8 |
HF_REF= main
|
9 |
HF_FILES= pytorch_model.bin \
|
10 |
+
tokenizer.model \
|
11 |
+
config.json
|
12 |
$(HF_FILES): SITE= https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF)
|
13 |
$(LLAMA_TAR): SITE= https://github.com/ggerganov/llama.cpp/archive/refs/tags
|
14 |
|
|
|
27 |
tar -xf $(LLAMA_TAR)
|
28 |
|
29 |
$(LLAMA_DIR)/quantize: | $(LLAMA_DIR)
|
30 |
+
$(MAKE) -C $(LLAMA_DIR) $(LLAMA_FLAGS) quantize
|
31 |
|
32 |
+
$(MODEL_NAME)-f16.bin: $(HF_FILES) | $(LLAMA_DIR)
|
33 |
+
$(PYTHON) $(LLAMA_DIR)/convert.py --outtype f16 --outfile $@ .
|
|
|
|
|
|
|
34 |
|
35 |
$(MODEL_NAME)-q%.bin: $(MODEL_NAME)-f16.bin $(LLAMA_DIR)/quantize
|
36 |
$(LLAMA_DIR)/quantize $< $@ q$*
|
SHA256SUMS
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
|
|
1 |
+
126462bce62f0742227756895b192e9410700f58007a3fdec40125361004d1c9 open-llama-3b-f16.bin
|
2 |
+
57f12972d25bae770e4f38a3f1d843d84c4444b59a9f6218e2a7ed4d63d3b2f9 open-llama-3b-q4_0.bin
|
3 |
+
047967a42b305df792c189887873a87b22effe17fe05e068036a5973c7971356 open-llama-3b-q4_1.bin
|
4 |
+
46cba2efff2cb84626787e54382ffdf4c9c10235a6a37ad2e3c74ccfb0e4b545 open-llama-3b-q5_0.bin
|
5 |
+
3b59291cc0ccc772e7ebe091a9a3e80212eda0b172b86387aa96cdaa1334c32a open-llama-3b-q5_1.bin
|
6 |
+
4631f63a9ec1d0ebabf85d5d675bde51348e702296abb1d5e376301f30a56563 open-llama-3b-q8_0.bin
|
convert.py.diff
DELETED
@@ -1,39 +0,0 @@
|
|
1 |
-
--- a/convert.py 2023-05-30 20:48:07.687486627 +0300
|
2 |
-
+++ b/convert.py 2023-05-30 20:47:55.854142065 +0300
|
3 |
-
@@ -143,12 +143,22 @@
|
4 |
-
def guessed(model: 'LazyModel', file_type: GGMLFileType) -> 'Params':
|
5 |
-
n_vocab, n_embd = model["tok_embeddings.weight"].shape
|
6 |
-
|
7 |
-
+ n_mult=256
|
8 |
-
+ n_head=n_embd // 128
|
9 |
-
+ n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
|
10 |
-
+
|
11 |
-
+ # TODO: hack for open_llama_3b
|
12 |
-
+ if n_embd == 3200:
|
13 |
-
+ n_mult = 216
|
14 |
-
+ n_head = 32
|
15 |
-
+ n_layer = 26
|
16 |
-
+
|
17 |
-
return Params(
|
18 |
-
n_vocab=n_vocab,
|
19 |
-
n_embd=n_embd,
|
20 |
-
- n_mult=256,
|
21 |
-
- n_head=n_embd // 128,
|
22 |
-
- n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model),
|
23 |
-
+ n_mult=n_mult,
|
24 |
-
+ n_head=n_head,
|
25 |
-
+ n_layer=n_layer,
|
26 |
-
file_type=file_type,
|
27 |
-
)
|
28 |
-
|
29 |
-
@@ -597,7 +607,9 @@
|
30 |
-
out["norm.weight"] = model["model.norm.weight"]
|
31 |
-
out["output.weight"] = model["lm_head.weight"]
|
32 |
-
|
33 |
-
- n_head = model["model.layers.0.self_attn.q_proj.weight"].shape[1] // 128
|
34 |
-
+ # TODO: hack for open_llama_3b
|
35 |
-
+ n_embd = model["model.layers.0.self_attn.q_proj.weight"].shape[1]
|
36 |
-
+ n_head = 32 if n_embd == 3200 else n_embd // 128
|
37 |
-
for i in itertools.count():
|
38 |
-
if f"model.layers.{i}.self_attn.q_proj.weight" not in model:
|
39 |
-
break
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
open-llama-3b-f16.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6853758208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:126462bce62f0742227756895b192e9410700f58007a3fdec40125361004d1c9
|
3 |
size 6853758208
|
open-llama-3b-q4_0.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1928446208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57f12972d25bae770e4f38a3f1d843d84c4444b59a9f6218e2a7ed4d63d3b2f9
|
3 |
size 1928446208
|
open-llama-3b-q4_1.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2142590208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:047967a42b305df792c189887873a87b22effe17fe05e068036a5973c7971356
|
3 |
size 2142590208
|
open-llama-3b-q5_0.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2356734208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46cba2efff2cb84626787e54382ffdf4c9c10235a6a37ad2e3c74ccfb0e4b545
|
3 |
size 2356734208
|
open-llama-3b-q5_1.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2570878208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3b59291cc0ccc772e7ebe091a9a3e80212eda0b172b86387aa96cdaa1334c32a
|
3 |
size 2570878208
|
open-llama-3b-q8_0.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3641598208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4631f63a9ec1d0ebabf85d5d675bde51348e702296abb1d5e376301f30a56563
|
3 |
size 3641598208
|