Spaces:
Paused
Paused
from ctransformers import AutoModelForCausalLM, AutoTokenizer | |
from transformers import pipeline | |
import streamlit as st | |
# Simple inference example | |
# output = llm( | |
# "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", # Prompt | |
# max_tokens=512, # Generate up to 512 tokens | |
# stop=["</s>"], # Example stop token - not necessarily correct for this specific model! Please check before using. | |
# echo=True # Whether to echo the prompt | |
#) | |
prompt_format = "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant" | |
def get_llm_response(repo, filename, model_type, gpu_layers, prompt): | |
print("Loading model") | |
model = AutoModelForCausalLM.from_pretrained(repo, model_file=filename, model_type=model_type, gpu_layers=gpu_layers) | |
print("Model loaded") | |
#llm_prompt = prompt_format.format(system_message=system_prompt, prompt=prompt) | |
print(f"LLM prompt: {prompt}") | |
response = model(prompt, stop=["</s>"]) | |
print(f"Response: {response}") | |
return response | |