Spaces:
Sleeping
Sleeping
File size: 732 Bytes
68668af 01217f6 68668af 2eabd29 68668af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
from langchain_community.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
model_path = 'llama-2-7b-chat.Q4_K_M.gguf'
class Loadllm:
@staticmethod
def load_llm():
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
# Prepare the LLM
llm = LlamaCpp(
model_path=model_path,
n_gpu_layers=40,
n_batch=512,
n_ctx=1024,
f16_kv=True, # MUST set to True, otherwise you will run into problem after a couple of calls
callback_manager=callback_manager,
verbose=True,
)
return llm |