Fawaz0ibra commited on
Commit
9189cc1
·
verified ·
1 Parent(s): 43d659b

Update chain_setup.py

Browse files
Files changed (1) hide show
  1. chain_setup.py +7 -9
chain_setup.py CHANGED
@@ -18,15 +18,13 @@ def load_llm():
18
 
19
  # 2) Load the model with llama-cpp via LangChain’s LlamaCpp
20
  llm = LlamaCpp(
21
- model_path=model_file,
22
- # If you have a GPU that supports flash attention, set flash to True
23
- flash_attn=False,
24
- n_ctx=8192, # Large context if you have enough RAM
25
- n_batch=1024, # Adjust based on your system’s memory
26
- # Qwen typically uses ChatML (<|im_start|> / <|im_end|> tokens)
27
- # Setting chat_format='chatml' helps the model handle chat roles
28
- chat_format='chatml'
29
- )
30
 
31
  return llm
32
 
 
18
 
19
  # 2) Load the model with llama-cpp via LangChain’s LlamaCpp
20
  llm = LlamaCpp(
21
+ model_path=model_file,
22
+ flash_attn=False,
23
+ n_ctx=2048, # or 4096
24
+ n_batch=512, # or even 256
25
+ chat_format='chatml'
26
+ )
27
+
 
 
28
 
29
  return llm
30