SlyEcho
/

open_llama_3b_ggml

Model card Files Files and versions Community

SlyEcho commited on Jun 22, 2023

Commit

795a194

verified ·

1 Parent(s): 1e64051

Update to latest llama.cpp release.

Browse files

The model data has not changed but because the new convert.py
generates a little different file (difference of 1 byte only)
I updated the repo here.

This is so the checksums would match if someone converted the
model from scratch using the released toolset.

Files changed (10) hide show

.gitignore +1 -0
Makefile +7 -8
SHA256SUMS +6 -6
convert.py.diff +0 -39
open-llama-3b-f16.bin +1 -1
open-llama-3b-q4_0.bin +1 -1
open-llama-3b-q4_1.bin +1 -1
open-llama-3b-q5_0.bin +1 -1
open-llama-3b-q5_1.bin +1 -1
open-llama-3b-q8_0.bin +1 -1

.gitignore CHANGED Viewed

@@ -4,3 +4,4 @@ pytorch_model.bin
 *.sha
 *.tar.gz
 tokenizer.model

 *.sha
 *.tar.gz
 tokenizer.model
+config.json

Makefile CHANGED Viewed

@@ -1,12 +1,14 @@
 MODEL_NAME=	open-llama-3b
 PYTHON?=	python
-LLAMA_TAG=	cb40dfc
 LLAMA_TAR=	master-$(LLAMA_TAG).tar.gz
 LLAMA_DIR=	llama.cpp-master-$(LLAMA_TAG)
 HF_REPO=	openlm-research/open_llama_3b
 HF_REF=		main
 HF_FILES=	pytorch_model.bin \
-		tokenizer.model
 $(HF_FILES): 	SITE=	https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF)
 $(LLAMA_TAR): 	SITE=	https://github.com/ggerganov/llama.cpp/archive/refs/tags
@@ -25,13 +27,10 @@ $(LLAMA_DIR): | $(LLAMA_TAR)
 	tar -xf $(LLAMA_TAR)
 $(LLAMA_DIR)/quantize: | $(LLAMA_DIR)
-	$(MAKE) -C $(LLAMA_DIR) LLAMA_NO_K_QUANTS=1 quantize
-convert.py: convert.py.diff | $(LLAMA_DIR)
-	patch -ru $(LLAMA_DIR)/convert.py -i $< -o $@
-$(MODEL_NAME)-f16.bin: $(HF_FILES) convert.py
-	$(PYTHON) convert.py --outtype f16 --outfile $@ .
 $(MODEL_NAME)-q%.bin: $(MODEL_NAME)-f16.bin $(LLAMA_DIR)/quantize
 	$(LLAMA_DIR)/quantize $< $@ q$*

 MODEL_NAME=	open-llama-3b
 PYTHON?=	python
+LLAMA_TAG=	7487137
 LLAMA_TAR=	master-$(LLAMA_TAG).tar.gz
 LLAMA_DIR=	llama.cpp-master-$(LLAMA_TAG)
+LLAMA_FLAGS=	LLAMA_NO_K_QUANTS=1
 HF_REPO=	openlm-research/open_llama_3b
 HF_REF=		main
 HF_FILES=	pytorch_model.bin \
+		tokenizer.model \
+		config.json
 $(HF_FILES): 	SITE=	https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF)
 $(LLAMA_TAR): 	SITE=	https://github.com/ggerganov/llama.cpp/archive/refs/tags
 	tar -xf $(LLAMA_TAR)
 $(LLAMA_DIR)/quantize: | $(LLAMA_DIR)
+	$(MAKE) -C $(LLAMA_DIR) $(LLAMA_FLAGS) quantize
+$(MODEL_NAME)-f16.bin: $(HF_FILES) | $(LLAMA_DIR)
+	$(PYTHON) $(LLAMA_DIR)/convert.py --outtype f16 --outfile $@ .
 $(MODEL_NAME)-q%.bin: $(MODEL_NAME)-f16.bin $(LLAMA_DIR)/quantize
 	$(LLAMA_DIR)/quantize $< $@ q$*

SHA256SUMS CHANGED Viewed

@@ -1,6 +1,6 @@
-f123887011114da3a9b3ffb06b6fc695aaecf0a5503c38e1589379f7304f37f6  open-llama-3b-f16.bin
-ec0460668c7fa50eee4d96b8687dbb29fa42b91c634151fe9c0c53cace0ab81a  open-llama-3b-q4_0.bin
-15325aeae94da8886ad94ac46491cd103111e82d3be910aaaaf7c3fa0bc3c128  open-llama-3b-q4_1.bin
-87d6146b47177611f8c4529fb6186a093c512bd09050e1625819e82a1946e2da  open-llama-3b-q5_0.bin
-ce5de27bfccd02a34465a6d8a80ac8ad0baff186e600c29f6e3c6740f5a2dfd4  open-llama-3b-q5_1.bin
-9bbe718478161752fcc085f4c7393a6472c0b4b003c43b681208617241884d7d  open-llama-3b-q8_0.bin

+126462bce62f0742227756895b192e9410700f58007a3fdec40125361004d1c9  open-llama-3b-f16.bin
+57f12972d25bae770e4f38a3f1d843d84c4444b59a9f6218e2a7ed4d63d3b2f9  open-llama-3b-q4_0.bin
+047967a42b305df792c189887873a87b22effe17fe05e068036a5973c7971356  open-llama-3b-q4_1.bin
+46cba2efff2cb84626787e54382ffdf4c9c10235a6a37ad2e3c74ccfb0e4b545  open-llama-3b-q5_0.bin
+3b59291cc0ccc772e7ebe091a9a3e80212eda0b172b86387aa96cdaa1334c32a  open-llama-3b-q5_1.bin
+4631f63a9ec1d0ebabf85d5d675bde51348e702296abb1d5e376301f30a56563  open-llama-3b-q8_0.bin

convert.py.diff DELETED Viewed

@@ -1,39 +0,0 @@
---- a/convert.py	2023-05-30 20:48:07.687486627 +0300
-+++ b/convert.py	2023-05-30 20:47:55.854142065 +0300
-@@ -143,12 +143,22 @@
-     def guessed(model: 'LazyModel', file_type: GGMLFileType) -> 'Params':
-         n_vocab, n_embd = model["tok_embeddings.weight"].shape
-+        n_mult=256
-+        n_head=n_embd // 128
-+        n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
-+
-+        # TODO: hack for open_llama_3b
-+        if n_embd == 3200:
-+            n_mult = 216
-+            n_head = 32
-+            n_layer = 26
-+
-         return Params(
-             n_vocab=n_vocab,
-             n_embd=n_embd,
--            n_mult=256,
--            n_head=n_embd // 128,
--            n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model),
-+            n_mult=n_mult,
-+            n_head=n_head,
-+            n_layer=n_layer,
-             file_type=file_type,
-         )
-@@ -597,7 +607,9 @@
-     out["norm.weight"] = model["model.norm.weight"]
-     out["output.weight"] = model["lm_head.weight"]
--    n_head = model["model.layers.0.self_attn.q_proj.weight"].shape[1] // 128
-+    # TODO: hack for open_llama_3b
-+    n_embd = model["model.layers.0.self_attn.q_proj.weight"].shape[1]
-+    n_head = 32 if n_embd == 3200 else n_embd // 128
-     for i in itertools.count():
-         if f"model.layers.{i}.self_attn.q_proj.weight" not in model:
-             break

open-llama-3b-f16.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f123887011114da3a9b3ffb06b6fc695aaecf0a5503c38e1589379f7304f37f6
 size 6853758208

 version https://git-lfs.github.com/spec/v1
+oid sha256:126462bce62f0742227756895b192e9410700f58007a3fdec40125361004d1c9
 size 6853758208

open-llama-3b-q4_0.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec0460668c7fa50eee4d96b8687dbb29fa42b91c634151fe9c0c53cace0ab81a
 size 1928446208

 version https://git-lfs.github.com/spec/v1
+oid sha256:57f12972d25bae770e4f38a3f1d843d84c4444b59a9f6218e2a7ed4d63d3b2f9
 size 1928446208

open-llama-3b-q4_1.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15325aeae94da8886ad94ac46491cd103111e82d3be910aaaaf7c3fa0bc3c128
 size 2142590208

 version https://git-lfs.github.com/spec/v1
+oid sha256:047967a42b305df792c189887873a87b22effe17fe05e068036a5973c7971356
 size 2142590208

open-llama-3b-q5_0.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:87d6146b47177611f8c4529fb6186a093c512bd09050e1625819e82a1946e2da
 size 2356734208

 version https://git-lfs.github.com/spec/v1
+oid sha256:46cba2efff2cb84626787e54382ffdf4c9c10235a6a37ad2e3c74ccfb0e4b545
 size 2356734208

open-llama-3b-q5_1.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce5de27bfccd02a34465a6d8a80ac8ad0baff186e600c29f6e3c6740f5a2dfd4
 size 2570878208

 version https://git-lfs.github.com/spec/v1
+oid sha256:3b59291cc0ccc772e7ebe091a9a3e80212eda0b172b86387aa96cdaa1334c32a
 size 2570878208

open-llama-3b-q8_0.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9bbe718478161752fcc085f4c7393a6472c0b4b003c43b681208617241884d7d
 size 3641598208

 version https://git-lfs.github.com/spec/v1
+oid sha256:4631f63a9ec1d0ebabf85d5d675bde51348e702296abb1d5e376301f30a56563
 size 3641598208