RuntimeError: Unknown layout

#8
by AkshatDogra - opened

Hi, I am gettting the error:

AutoAWQ/awq/modules/linear/gemm.py", line 46, in forward
    out = awq_ext.gemm_forward_cuda(
          ^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Unknown layout

when I am running the code:

from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer
import torch

model_name_or_path = "TheBloke/Mistral-7B-OpenOrca-AWQ"


# # Load model
model = AutoAWQForCausalLM.from_quantized(
    model_name_or_path,
    fuse_layers=True,
    device_map="auto",
    trust_remote_code=False,
    safetensors=True,
    low_cpu_mem_usage=True,
)
tokenizer = AutoTokenizer.from_pretrained(
    model_name_or_path,
    trust_remote_code=False,
    device_map="auto",
    low_cpu_mem_usage=True,
)

system_message = f"""[INST] <<SYS>>
You are a good and accurate assistant.
# """


prompt = "Who is answer to life,universe and everything?"
prompt_template = f"""<|im_start|>system
# {system_message}<|im_end|>
# <|im_start|>user
# {prompt}<|im_end|>
# <|im_start|>assistant
put's `attention_mask` to obtain reliable results.
# """

print("\n\n*** Generate:")

tokens = tokenizer(prompt_template, return_tensors="pt").input_ids.cuda()
# Generate output
generation_output = model.generate(
    tokens, do_sample=True, temperature=0.7, top_p=0.95, top_k=40, max_new_tokens=512
)

print("Output: ", tokenizer.decode(generation_output[0]))

Sign up or log in to comment