File size: 857 Bytes
97fdf26 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
import re
PROMPT = "YOUR PROMPT HERE"
MAX_LENGTH = 32768 # Do not change
DEVICE = "cuda"
model_id = "agarkovv/Ministral-8B-Instruct-2410-LoRA-trading"
base_model_id = "mistralai/Ministral-8B-Instruct-2410"
model = AutoPeftModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
model = model.to(DEVICE)
model.eval()
inputs = tokenizer(
PROMPT, return_tensors="pt", padding=False, max_length=MAX_LENGTH, truncation=True
)
inputs = {key: value.to(model.device) for key, value in inputs.items()}
res = model.generate(
**inputs,
use_cache=True,
max_new_tokens=MAX_LENGTH,
)
output = tokenizer.decode(res[0], skip_special_tokens=True)
answer = re.sub(r".*\[/INST\]\s*", "", output, flags=re.DOTALL)
print(answer)
|