Spaces:
Runtime error
Runtime error
import torch | |
from ctransformers import AutoModelForCausalLM, AutoTokenizer | |
from loguru import logger | |
import spaces | |
def models(): | |
return ["openhermes-2.5-mistral-7b.Q4_K_M.gguf"] | |
def load(): | |
# torch.set_default_device("cuda") | |
model = AutoModelForCausalLM.from_pretrained("TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", model_file="openhermes-2.5-mistral-7b.Q4_K_M.gguf", model_type="mistral", gpu_layers=50) | |
# tokenizer = AutoTokenizer.from_pretrained(models()[0], trust_remote_code=True).to("cuda") | |
return (model, tokenizer) | |
model, tokenizer = load() | |
def ask(_, system_prompt, pre_prompt, question): | |
messages = [ | |
{ | |
'role': 'system', | |
'content': f"{system_prompt} {pre_prompt}", | |
}, | |
{ | |
'role': 'user', | |
'content': f"{question}", | |
}, | |
] | |
logger.debug(f"<< openhermes << {question}") | |
# inputs = tokenizer(question, return_tensors="pt", return_attention_mask=False) | |
# outputs = model.generate(**inputs, max_length=200) | |
# answer = tokenizer.batch_decode(outputs)[0] | |
answer = model(question) | |
logger.debug(f">> openhermes >> {answer}") | |
return answer | |