jhao commited on
Commit
8b53fcf
1 Parent(s): 3dfbdbc

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +17 -0
README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from transformers import AutoTokenizer
3
+ from auto_gptq import AutoGPTQForCausalLM
4
+ model_path = 'efficient-llm/llama-2-13b-chat-gptq'
5
+ tokenizer_path = 'meta-llama/Llama-2-7b-hf'
6
+ model = AutoGPTQForCausalLM.from_quantized(
7
+ model_path,
8
+ # inject_fused_attention=False, # or
9
+ disable_exllama=True,
10
+ device_map='auto',
11
+ revision='3bit_128g',
12
+ )
13
+ from transformers import AutoTokenizer
14
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)
15
+ input_ids = tokenizer('How are you?', return_tensors='pt').input_ids.to('cuda')
16
+ outputs = model.generate(input_ids=input_ids, max_length=128)
17
+ print(tokenizer.decode(outputs[0]))