SlyEcho commited on
Commit
795a194
·
verified ·
1 Parent(s): 1e64051

Update to latest llama.cpp release.

Browse files

The model data has not changed but because the new convert.py
generates a little different file (difference of 1 byte only)
I updated the repo here.

This is so the checksums would match if someone converted the
model from scratch using the released toolset.

.gitignore CHANGED
@@ -4,3 +4,4 @@ pytorch_model.bin
4
  *.sha
5
  *.tar.gz
6
  tokenizer.model
 
 
4
  *.sha
5
  *.tar.gz
6
  tokenizer.model
7
+ config.json
Makefile CHANGED
@@ -1,12 +1,14 @@
1
  MODEL_NAME= open-llama-3b
2
  PYTHON?= python
3
- LLAMA_TAG= cb40dfc
4
  LLAMA_TAR= master-$(LLAMA_TAG).tar.gz
5
  LLAMA_DIR= llama.cpp-master-$(LLAMA_TAG)
 
6
  HF_REPO= openlm-research/open_llama_3b
7
  HF_REF= main
8
  HF_FILES= pytorch_model.bin \
9
- tokenizer.model
 
10
  $(HF_FILES): SITE= https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF)
11
  $(LLAMA_TAR): SITE= https://github.com/ggerganov/llama.cpp/archive/refs/tags
12
 
@@ -25,13 +27,10 @@ $(LLAMA_DIR): | $(LLAMA_TAR)
25
  tar -xf $(LLAMA_TAR)
26
 
27
  $(LLAMA_DIR)/quantize: | $(LLAMA_DIR)
28
- $(MAKE) -C $(LLAMA_DIR) LLAMA_NO_K_QUANTS=1 quantize
29
 
30
- convert.py: convert.py.diff | $(LLAMA_DIR)
31
- patch -ru $(LLAMA_DIR)/convert.py -i $< -o $@
32
-
33
- $(MODEL_NAME)-f16.bin: $(HF_FILES) convert.py
34
- $(PYTHON) convert.py --outtype f16 --outfile $@ .
35
 
36
  $(MODEL_NAME)-q%.bin: $(MODEL_NAME)-f16.bin $(LLAMA_DIR)/quantize
37
  $(LLAMA_DIR)/quantize $< $@ q$*
 
1
  MODEL_NAME= open-llama-3b
2
  PYTHON?= python
3
+ LLAMA_TAG= 7487137
4
  LLAMA_TAR= master-$(LLAMA_TAG).tar.gz
5
  LLAMA_DIR= llama.cpp-master-$(LLAMA_TAG)
6
+ LLAMA_FLAGS= LLAMA_NO_K_QUANTS=1
7
  HF_REPO= openlm-research/open_llama_3b
8
  HF_REF= main
9
  HF_FILES= pytorch_model.bin \
10
+ tokenizer.model \
11
+ config.json
12
  $(HF_FILES): SITE= https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF)
13
  $(LLAMA_TAR): SITE= https://github.com/ggerganov/llama.cpp/archive/refs/tags
14
 
 
27
  tar -xf $(LLAMA_TAR)
28
 
29
  $(LLAMA_DIR)/quantize: | $(LLAMA_DIR)
30
+ $(MAKE) -C $(LLAMA_DIR) $(LLAMA_FLAGS) quantize
31
 
32
+ $(MODEL_NAME)-f16.bin: $(HF_FILES) | $(LLAMA_DIR)
33
+ $(PYTHON) $(LLAMA_DIR)/convert.py --outtype f16 --outfile $@ .
 
 
 
34
 
35
  $(MODEL_NAME)-q%.bin: $(MODEL_NAME)-f16.bin $(LLAMA_DIR)/quantize
36
  $(LLAMA_DIR)/quantize $< $@ q$*
SHA256SUMS CHANGED
@@ -1,6 +1,6 @@
1
- f123887011114da3a9b3ffb06b6fc695aaecf0a5503c38e1589379f7304f37f6 open-llama-3b-f16.bin
2
- ec0460668c7fa50eee4d96b8687dbb29fa42b91c634151fe9c0c53cace0ab81a open-llama-3b-q4_0.bin
3
- 15325aeae94da8886ad94ac46491cd103111e82d3be910aaaaf7c3fa0bc3c128 open-llama-3b-q4_1.bin
4
- 87d6146b47177611f8c4529fb6186a093c512bd09050e1625819e82a1946e2da open-llama-3b-q5_0.bin
5
- ce5de27bfccd02a34465a6d8a80ac8ad0baff186e600c29f6e3c6740f5a2dfd4 open-llama-3b-q5_1.bin
6
- 9bbe718478161752fcc085f4c7393a6472c0b4b003c43b681208617241884d7d open-llama-3b-q8_0.bin
 
1
+ 126462bce62f0742227756895b192e9410700f58007a3fdec40125361004d1c9 open-llama-3b-f16.bin
2
+ 57f12972d25bae770e4f38a3f1d843d84c4444b59a9f6218e2a7ed4d63d3b2f9 open-llama-3b-q4_0.bin
3
+ 047967a42b305df792c189887873a87b22effe17fe05e068036a5973c7971356 open-llama-3b-q4_1.bin
4
+ 46cba2efff2cb84626787e54382ffdf4c9c10235a6a37ad2e3c74ccfb0e4b545 open-llama-3b-q5_0.bin
5
+ 3b59291cc0ccc772e7ebe091a9a3e80212eda0b172b86387aa96cdaa1334c32a open-llama-3b-q5_1.bin
6
+ 4631f63a9ec1d0ebabf85d5d675bde51348e702296abb1d5e376301f30a56563 open-llama-3b-q8_0.bin
convert.py.diff DELETED
@@ -1,39 +0,0 @@
1
- --- a/convert.py 2023-05-30 20:48:07.687486627 +0300
2
- +++ b/convert.py 2023-05-30 20:47:55.854142065 +0300
3
- @@ -143,12 +143,22 @@
4
- def guessed(model: 'LazyModel', file_type: GGMLFileType) -> 'Params':
5
- n_vocab, n_embd = model["tok_embeddings.weight"].shape
6
-
7
- + n_mult=256
8
- + n_head=n_embd // 128
9
- + n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
10
- +
11
- + # TODO: hack for open_llama_3b
12
- + if n_embd == 3200:
13
- + n_mult = 216
14
- + n_head = 32
15
- + n_layer = 26
16
- +
17
- return Params(
18
- n_vocab=n_vocab,
19
- n_embd=n_embd,
20
- - n_mult=256,
21
- - n_head=n_embd // 128,
22
- - n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model),
23
- + n_mult=n_mult,
24
- + n_head=n_head,
25
- + n_layer=n_layer,
26
- file_type=file_type,
27
- )
28
-
29
- @@ -597,7 +607,9 @@
30
- out["norm.weight"] = model["model.norm.weight"]
31
- out["output.weight"] = model["lm_head.weight"]
32
-
33
- - n_head = model["model.layers.0.self_attn.q_proj.weight"].shape[1] // 128
34
- + # TODO: hack for open_llama_3b
35
- + n_embd = model["model.layers.0.self_attn.q_proj.weight"].shape[1]
36
- + n_head = 32 if n_embd == 3200 else n_embd // 128
37
- for i in itertools.count():
38
- if f"model.layers.{i}.self_attn.q_proj.weight" not in model:
39
- break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
open-llama-3b-f16.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f123887011114da3a9b3ffb06b6fc695aaecf0a5503c38e1589379f7304f37f6
3
  size 6853758208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:126462bce62f0742227756895b192e9410700f58007a3fdec40125361004d1c9
3
  size 6853758208
open-llama-3b-q4_0.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec0460668c7fa50eee4d96b8687dbb29fa42b91c634151fe9c0c53cace0ab81a
3
  size 1928446208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57f12972d25bae770e4f38a3f1d843d84c4444b59a9f6218e2a7ed4d63d3b2f9
3
  size 1928446208
open-llama-3b-q4_1.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15325aeae94da8886ad94ac46491cd103111e82d3be910aaaaf7c3fa0bc3c128
3
  size 2142590208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:047967a42b305df792c189887873a87b22effe17fe05e068036a5973c7971356
3
  size 2142590208
open-llama-3b-q5_0.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87d6146b47177611f8c4529fb6186a093c512bd09050e1625819e82a1946e2da
3
  size 2356734208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46cba2efff2cb84626787e54382ffdf4c9c10235a6a37ad2e3c74ccfb0e4b545
3
  size 2356734208
open-llama-3b-q5_1.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce5de27bfccd02a34465a6d8a80ac8ad0baff186e600c29f6e3c6740f5a2dfd4
3
  size 2570878208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b59291cc0ccc772e7ebe091a9a3e80212eda0b172b86387aa96cdaa1334c32a
3
  size 2570878208
open-llama-3b-q8_0.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bbe718478161752fcc085f4c7393a6472c0b4b003c43b681208617241884d7d
3
  size 3641598208
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4631f63a9ec1d0ebabf85d5d675bde51348e702296abb1d5e376301f30a56563
3
  size 3641598208