Spaces:
Sleeping
Sleeping
from langchain_community.llms import LlamaCpp | |
from langchain.callbacks.manager import CallbackManager | |
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler | |
model_path = 'llama-2-7b-chat.Q4_K_M.gguf' | |
class Loadllm: | |
def load_llm(): | |
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()]) | |
# Prepare the LLM | |
llm = LlamaCpp( | |
model_path=model_path, | |
n_gpu_layers=40, | |
n_batch=512, | |
n_ctx=1024, | |
f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls | |
callback_manager=callback_manager, | |
verbose=True, | |
) | |
return llm |