File size: 1,325 Bytes
74044e0
 
 
90d439d
 
74044e0
 
 
 
 
 
 
 
 
 
 
90d439d
 
 
 
 
 
 
 
74044e0
 
90d439d
74044e0
 
 
 
90d439d
 
 
 
 
74044e0
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from ctransformers import AutoModelForCausalLM, AutoTokenizer
from transformers import pipeline
import streamlit as st
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Simple inference example
# output = llm(
#  "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant", # Prompt
#  max_tokens=512,  # Generate up to 512 tokens
#  stop=["</s>"],   # Example stop token - not necessarily correct for this specific model! Please check before using.
#  echo=True        # Whether to echo the prompt
#)

prompt_format = "<|im_start|>system\n{system_message}<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant"


template = """Question: {question}

Answer:"""




def get_llm_response(repo, filename, model_type, gpu_layers, prompt):
    print("Loading model")
    llm = AutoModelForCausalLM.from_pretrained(repo, model_file=filename, model_type=model_type, gpu_layers=gpu_layers)
    print("Model loaded")

    #llm_prompt = prompt_format.format(system_message=system_prompt, prompt=prompt)
    print(f"LLM prompt: {prompt}")
    
    prompt = PromptTemplate(template=template, input_variables=["question"])

    llm_chain = LLMChain(prompt=prompt, llm=llm)
    response = llm_chain.run(prompt)
    
    return response