|
Please use the following code sample to load these AWQ models: |
|
``` |
|
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig |
|
from auto_gptq import AutoGPTQForCausalLM |
|
import torch |
|
|
|
model_path = 'efficient-llm/vicuna-13b-v1.3-awq' |
|
config = AutoConfig.from_pretrained(model_path, revision='3bit_128g', trust_remote_code=True) |
|
enc = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-hf', trust_remote_code=True) |
|
kwargs = {"torch_dtype": torch.float16, "low_cpu_mem_usage": True} |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_path, config=config, trust_remote_code=True, device_map='auto', revision='3bit_128g', **kwargs) |
|
|
|
model.eval() |
|
input_ids = enc('How are you today?', return_tensors='pt').input_ids.to('cuda') |
|
outputs = model.generate(input_ids=input_ids, max_length=128) |
|
print(enc.decode(outputs[0])) |
|
``` |