Update README.md
Browse files
README.md
CHANGED
@@ -10,3 +10,38 @@ pipeline_tag: text-generation
|
|
10 |
---
|
11 |
|
12 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
---
|
11 |
|
12 |
|
13 |
+
## How to use
|
14 |
+
Please download the code from [LiqunMa/FBI-LLM](https://github.com/LiqunMa/FBI-LLM) firstly
|
15 |
+
```
|
16 |
+
from pathlib import Path
|
17 |
+
from transformers import AutoTokenizer,LlamaConfig,LlamaForCausalLM, AutoModelForCausalLM
|
18 |
+
from qat.replace_module import replace_with_learnable_binarylinear
|
19 |
+
|
20 |
+
|
21 |
+
def load_model(model_size, model_dir):
|
22 |
+
assert model_size in ["130M", "1.3B", "7B"]
|
23 |
+
|
24 |
+
model_dir = Path(model_dir)
|
25 |
+
with Path(f'FBI-LLM_configs/FBI-LLM_llama2_{model_size}.json').open('r') as r_f:
|
26 |
+
config = json.load(r_f)
|
27 |
+
llama_config = LlamaConfig(**config)
|
28 |
+
model = LlamaForCausalLM(llama_config).to('cuda')
|
29 |
+
tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf', padding_side="right", use_fast=False)
|
30 |
+
|
31 |
+
if exist_extra_para:
|
32 |
+
model = replace_with_learnable_binarylinear(model, scaling_pattern = "column", keep_parts = ["lm_head"])
|
33 |
+
|
34 |
+
weight_dict = {}
|
35 |
+
ckpt_plist = [p for p in model_dir.iterdir() if p.suffix == '.bin']
|
36 |
+
for p in ckpt_plist:
|
37 |
+
weight_dict = torch.load(p)
|
38 |
+
for k,v in _weight_dict.items():
|
39 |
+
if 'self_attn.rotary_emb.inv_freq' not in k:
|
40 |
+
weight_dict[k] = v
|
41 |
+
|
42 |
+
model.load_state_dict(weight_dict)
|
43 |
+
for param in model.parameters():
|
44 |
+
param.data = param.data.to(torch.float16)
|
45 |
+
|
46 |
+
return model, tokenizer
|
47 |
+
```
|