Spaces:
Paused
Paused
File size: 1,325 Bytes
74044e0 90d439d 74044e0 90d439d 74044e0 90d439d 74044e0 90d439d 74044e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
from ctransformers import AutoModelForCausalLM, AutoTokenizer
from transformers import pipeline
import streamlit as st
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
# Simple inference example
# output = llm(
# "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", # Prompt
# max_tokens=512, # Generate up to 512 tokens
# stop=["</s>"], # Example stop token - not necessarily correct for this specific model! Please check before using.
# echo=True # Whether to echo the prompt
#)
prompt_format = "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"
template = """Question: {question}
Answer:"""
def get_llm_response(repo, filename, model_type, gpu_layers, prompt):
print("Loading model")
llm = AutoModelForCausalLM.from_pretrained(repo, model_file=filename, model_type=model_type, gpu_layers=gpu_layers)
print("Model loaded")
#llm_prompt = prompt_format.format(system_message=system_prompt, prompt=prompt)
print(f"LLM prompt: {prompt}")
prompt = PromptTemplate(template=template, input_variables=["question"])
llm_chain = LLMChain(prompt=prompt, llm=llm)
response = llm_chain.run(prompt)
return response
|