Spaces:
Runtime error
Runtime error
File size: 1,175 Bytes
8a8ca58 8841f45 8a8ca58 a7fd8f6 8a8ca58 a7fd8f6 8a8ca58 ca6382b 8a8ca58 beb6978 a7fd8f6 beb6978 8a8ca58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import torch
from ctransformers import AutoModelForCausalLM, AutoTokenizer
from loguru import logger
import spaces
def models():
return ["openhermes-2.5-mistral-7b.Q4_K_M.gguf"]
def load():
# torch.set_default_device("cuda")
model = AutoModelForCausalLM.from_pretrained("TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", model_file="openhermes-2.5-mistral-7b.Q4_K_M.gguf", model_type="mistral", gpu_layers=50)
# tokenizer = AutoTokenizer.from_pretrained(models()[0], trust_remote_code=True).to("cuda")
return (model, tokenizer)
model, tokenizer = load()
def ask(_, system_prompt, pre_prompt, question):
messages = [
{
'role': 'system',
'content': f"{system_prompt} {pre_prompt}",
},
{
'role': 'user',
'content': f"{question}",
},
]
logger.debug(f"<< openhermes << {question}")
# inputs = tokenizer(question, return_tensors="pt", return_attention_mask=False)
# outputs = model.generate(**inputs, max_length=200)
# answer = tokenizer.batch_decode(outputs)[0]
answer = model(question)
logger.debug(f">> openhermes >> {answer}")
return answer
|