LiqunMa commited on
Commit
69d5187
·
verified ·
1 Parent(s): 8b8d414

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +35 -0
README.md CHANGED
@@ -10,3 +10,38 @@ pipeline_tag: text-generation
10
  ---
11
 
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  ---
11
 
12
 
13
+ ## How to use
14
+ Please download the code from [LiqunMa/FBI-LLM](https://github.com/LiqunMa/FBI-LLM) firstly
15
+ ```
16
+ from pathlib import Path
17
+ from transformers import AutoTokenizer,LlamaConfig,LlamaForCausalLM, AutoModelForCausalLM
18
+ from qat.replace_module import replace_with_learnable_binarylinear
19
+
20
+
21
+ def load_model(model_size, model_dir):
22
+ assert model_size in ["130M", "1.3B", "7B"]
23
+
24
+ model_dir = Path(model_dir)
25
+ with Path(f'FBI-LLM_configs/FBI-LLM_llama2_{model_size}.json').open('r') as r_f:
26
+ config = json.load(r_f)
27
+ llama_config = LlamaConfig(**config)
28
+ model = LlamaForCausalLM(llama_config).to('cuda')
29
+ tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf', padding_side="right", use_fast=False)
30
+
31
+ if exist_extra_para:
32
+ model = replace_with_learnable_binarylinear(model, scaling_pattern = "column", keep_parts = ["lm_head"])
33
+
34
+ weight_dict = {}
35
+ ckpt_plist = [p for p in model_dir.iterdir() if p.suffix == '.bin']
36
+ for p in ckpt_plist:
37
+ weight_dict = torch.load(p)
38
+ for k,v in _weight_dict.items():
39
+ if 'self_attn.rotary_emb.inv_freq' not in k:
40
+ weight_dict[k] = v
41
+
42
+ model.load_state_dict(weight_dict)
43
+ for param in model.parameters():
44
+ param.data = param.data.to(torch.float16)
45
+
46
+ return model, tokenizer
47
+ ```