Fawaz0ibra commited on
Commit
3436310
·
verified ·
1 Parent(s): 60a6571

Update chain_setup.py

Browse files
Files changed (1) hide show
  1. chain_setup.py +32 -6
chain_setup.py CHANGED
@@ -1,23 +1,49 @@
1
  from langchain.chains import ConversationalRetrievalChain
2
- from langchain.llms import LlamaCpp
3
  from langchain.memory import ConversationBufferMemory
 
 
 
4
 
5
  def load_llm():
6
- model_path = "qwen2.5-7b-instruct-q4_k_m.gguf" # path to your GGUF file
7
- # Adjust parameters like n_ctx as needed
8
- llm = LlamaCpp(model_path=model_path, n_ctx=2048)
9
- return llm
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def build_conversational_chain(vectorstore):
 
 
 
 
12
  llm = load_llm()
 
13
  memory = ConversationBufferMemory(
14
  memory_key="chat_history",
15
  return_messages=True
16
  )
 
17
  qa_chain = ConversationalRetrievalChain.from_llm(
18
  llm=llm,
19
  retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
20
  memory=memory,
21
- verbose=True
22
  )
 
23
  return qa_chain
 
1
  from langchain.chains import ConversationalRetrievalChain
2
+ from langchain.llms import HuggingFacePipeline
3
  from langchain.memory import ConversationBufferMemory
4
+ import transformers
5
+ import torch
6
+ import os
7
 
8
  def load_llm():
9
+ model_id = "redhat6/Qwen2.5-7B-Instruct-Q4_K_M-GGUF"
10
+ tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
 
 
11
 
12
+ offload_folder = "offload"
13
+ os.makedirs(offload_folder, exist_ok=True)
14
+
15
+ model = transformers.AutoModelForCausalLM.from_pretrained(
16
+ model_id,
17
+ trust_remote_code=True,
18
+ device_map="auto",
19
+ offload_folder=offload_folder
20
+ )
21
+
22
+ pipe = transformers.pipeline(
23
+ "text-generation",
24
+ model=model,
25
+ tokenizer=tokenizer,
26
+ max_new_tokens=512
27
+ )
28
+ return pipe
29
+
30
  def build_conversational_chain(vectorstore):
31
+ """
32
+ Creates a ConversationalRetrievalChain using the HuggingFacePipeline based LLM
33
+ and a ConversationBufferMemory for multi-turn Q&A.
34
+ """
35
  llm = load_llm()
36
+
37
  memory = ConversationBufferMemory(
38
  memory_key="chat_history",
39
  return_messages=True
40
  )
41
+
42
  qa_chain = ConversationalRetrievalChain.from_llm(
43
  llm=llm,
44
  retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
45
  memory=memory,
46
+ verbose=True # optional: enables debug logs
47
  )
48
+
49
  return qa_chain