|
import streamlit as st |
|
import pathlib |
|
|
|
from huggingface_hub import hf_hub_download |
|
from langchain_community.llms import LlamaCpp |
|
from langchain.chains import create_retrieval_chain |
|
from langchain.chains.combine_documents import create_stuff_documents_chain |
|
from langchain_core.prompts import ChatPromptTemplate |
|
|
|
|
|
@st.cache_resource() |
|
def load_llm(repo_id, filename): |
|
|
|
models_folder = pathlib.Path("models") |
|
models_folder.mkdir(exist_ok=True) |
|
|
|
|
|
model_path = hf_hub_download( |
|
repo_id=repo_id, filename=filename, local_dir=models_folder |
|
) |
|
|
|
llm = LlamaCpp( |
|
model_path=model_path, |
|
repo_id=repo_id, |
|
filename=filename, |
|
verbose=False, |
|
use_mmap=True, |
|
use_mlock=True, |
|
n_threads=4, |
|
n_threads_batch=4, |
|
n_ctx=8000, |
|
) |
|
print(f"{repo_id} loaded successfully. ✅") |
|
return llm |
|
|
|
|
|
|
|
def response_generator(llm, messages, question, retriever): |
|
system_prompt = ( |
|
"You are an assistant for question-answering tasks. " |
|
"Use the following pieces of retrieved context to answer " |
|
"the question. If you don't know the answer, say that you " |
|
"don't know. Use three sentences maximum and keep the " |
|
"answer concise." |
|
"\n\n" |
|
"{context}" |
|
) |
|
|
|
prompt = ChatPromptTemplate.from_messages( |
|
[ |
|
("system", system_prompt), |
|
("user", "{input}"), |
|
] |
|
) |
|
|
|
question_answer_chain = create_stuff_documents_chain(llm, prompt) |
|
rag_chain = create_retrieval_chain(retriever, question_answer_chain) |
|
|
|
results = rag_chain.invoke({"input": question}) |
|
|
|
return results |
|
|