File size: 1,175 Bytes
8a8ca58
8841f45
8a8ca58
 
 
 
 
a7fd8f6
8a8ca58
 
 
a7fd8f6
 
 
8a8ca58
 
 
 
 
 
ca6382b
8a8ca58
 
 
 
 
 
 
 
 
 
beb6978
a7fd8f6
 
 
 
beb6978
8a8ca58
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import torch
from ctransformers import AutoModelForCausalLM, AutoTokenizer
from loguru import logger
import spaces


def models():
    return ["openhermes-2.5-mistral-7b.Q4_K_M.gguf"]


def load():
    # torch.set_default_device("cuda")
    model = AutoModelForCausalLM.from_pretrained("TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", model_file="openhermes-2.5-mistral-7b.Q4_K_M.gguf", model_type="mistral", gpu_layers=50)
    # tokenizer = AutoTokenizer.from_pretrained(models()[0], trust_remote_code=True).to("cuda")
    return (model, tokenizer)


model, tokenizer = load()


def ask(_, system_prompt, pre_prompt, question):
    messages = [
        {
            'role': 'system',
            'content': f"{system_prompt} {pre_prompt}",
        },
        {
            'role': 'user',
            'content': f"{question}",
        },
    ]
    logger.debug(f"<< openhermes << {question}")
    # inputs = tokenizer(question, return_tensors="pt", return_attention_mask=False)
    # outputs = model.generate(**inputs, max_length=200)
    # answer = tokenizer.batch_decode(outputs)[0]
    answer = model(question)
    logger.debug(f">> openhermes >> {answer}")
    return answer