BudgetBuddyPDFChat / LoadLLM.py
hypeconqueror1's picture
Update LoadLLM.py
2eabd29 verified
raw
history blame contribute delete
732 Bytes
from langchain_community.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
model_path = 'llama-2-7b-chat.Q4_K_M.gguf'
class Loadllm:
@staticmethod
def load_llm():
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Prepare the LLM
llm = LlamaCpp(
model_path=model_path,
n_gpu_layers=40,
n_batch=512,
n_ctx=1024,
f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
callback_manager=callback_manager,
verbose=True,
)
return llm