Spaces:

Fawaz0ibra
/

NDMO_english_assistant

Running

Fawaz0ibra commited on 9 days ago

Commit

9189cc1

verified ·

1 Parent(s): 43d659b

Update chain_setup.py

Files changed (1) hide show

chain_setup.py CHANGED Viewed

@@ -18,15 +18,13 @@ def load_llm():
     # 2) Load the model with llama-cpp via LangChain’s LlamaCpp
     llm = LlamaCpp(
-        model_path=model_file,
-        # If you have a GPU that supports flash attention, set flash to True
-        flash_attn=False,
-        n_ctx=8192,     # Large context if you have enough RAM
-        n_batch=1024,   # Adjust based on your system’s memory
-        # Qwen typically uses ChatML (<|im_start|> / <|im_end|> tokens)
-        # Setting chat_format='chatml' helps the model handle chat roles
-        chat_format='chatml'
-    )
     return llm

     # 2) Load the model with llama-cpp via LangChain’s LlamaCpp
     llm = LlamaCpp(
+    model_path=model_file,
+    flash_attn=False,
+    n_ctx=2048,  # or 4096
+    n_batch=512, # or even 256
+    chat_format='chatml'
+)
     return llm