RuntimeError: Unknown layout
#8
by
AkshatDogra
- opened
Hi, I am gettting the error:
AutoAWQ/awq/modules/linear/gemm.py", line 46, in forward
out = awq_ext.gemm_forward_cuda(
^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Unknown layout
when I am running the code:
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer
import torch
model_name_or_path = "TheBloke/Mistral-7B-OpenOrca-AWQ"
# # Load model
model = AutoAWQForCausalLM.from_quantized(
model_name_or_path,
fuse_layers=True,
device_map="auto",
trust_remote_code=False,
safetensors=True,
low_cpu_mem_usage=True,
)
tokenizer = AutoTokenizer.from_pretrained(
model_name_or_path,
trust_remote_code=False,
device_map="auto",
low_cpu_mem_usage=True,
)
system_message = f"""[INST] <<SYS>>
You are a good and accurate assistant.
# """
prompt = "Who is answer to life,universe and everything?"
prompt_template = f"""<|im_start|>system
# {system_message}<|im_end|>
# <|im_start|>user
# {prompt}<|im_end|>
# <|im_start|>assistant
put's `attention_mask` to obtain reliable results.
# """
print("\n\n*** Generate:")
tokens = tokenizer(prompt_template, return_tensors="pt").input_ids.cuda()
# Generate output
generation_output = model.generate(
tokens, do_sample=True, temperature=0.7, top_p=0.95, top_k=40, max_new_tokens=512
)
print("Output: ", tokenizer.decode(generation_output[0]))