Spaces:

mobinln
/

pdf_qa

Sleeping

pdf_qa / llm.py

3af157b 8 months ago

1.76 kB

	import streamlit as st
	import pathlib

	from huggingface_hub import hf_hub_download
	from langchain_community.llms import LlamaCpp
	from langchain.chains import create_retrieval_chain
	from langchain.chains.combine_documents import create_stuff_documents_chain
	from langchain_core.prompts import ChatPromptTemplate


	@st.cache_resource()
	def load_llm(repo_id, filename):
	# Create a directory for models if it doesn't exist
	models_folder = pathlib.Path("models")
	models_folder.mkdir(exist_ok=True)

	# Download the model
	model_path = hf_hub_download(
	repo_id=repo_id, filename=filename, local_dir=models_folder
	)

	llm = LlamaCpp(
	model_path=model_path,
	repo_id=repo_id,
	filename=filename,
	verbose=False,
	use_mmap=True,
	use_mlock=True,
	n_threads=4,
	n_threads_batch=4,
	n_ctx=8000,
	)
	print(f"{repo_id} loaded successfully. ✅")
	return llm


	# Streamed response emulator
	def response_generator(llm, messages, question, retriever):
	system_prompt = (
	"You are an assistant for question-answering tasks. "
	"Use the following pieces of retrieved context to answer "
	"the question. If you don't know the answer, say that you "
	"don't know. Use three sentences maximum and keep the "
	"answer concise."
	"\n\n"
	"{context}"
	)

	prompt = ChatPromptTemplate.from_messages(
	[
	("system", system_prompt),
	("user", "{input}"),
	]
	)

	question_answer_chain = create_stuff_documents_chain(llm, prompt)
	rag_chain = create_retrieval_chain(retriever, question_answer_chain)

	results = rag_chain.invoke({"input": question})

	return results