Fawaz0ibra commited on
Commit
60a6571
·
verified ·
1 Parent(s): 35e65b4

Update chain_setup.py

Browse files
Files changed (1) hide show
  1. chain_setup.py +7 -34
chain_setup.py CHANGED
@@ -1,50 +1,23 @@
1
  from langchain.chains import ConversationalRetrievalChain
2
- from langchain.llms import HuggingFacePipeline
3
  from langchain.memory import ConversationBufferMemory
4
- import transformers
5
- import torch
6
- import os
7
 
8
  def load_llm():
9
- model_id = "redhat6/Qwen2.5-7B-Instruct-Q4_K_M-GGUF" # Verify the exact model ID on Hugging Face Hub
10
- tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
11
-
12
- # Create an offload folder if it doesn't exist
13
- offload_folder = "offload"
14
- os.makedirs(offload_folder, exist_ok=True)
15
-
16
- model = transformers.AutoModelForCausalLM.from_pretrained(
17
- model_id,
18
- trust_remote_code=True,
19
- device_map="auto", # Use "cpu" if no GPU is available
20
- offload_folder=offload_folder
21
- )
22
-
23
- pipe = transformers.pipeline(
24
- "text-generation",
25
- model=model,
26
- tokenizer=tokenizer,
27
- max_new_tokens=512 # Adjust as needed
28
- )
29
- return HuggingFacePipeline(pipeline=pipe)
30
-
31
  def build_conversational_chain(vectorstore):
32
- """
33
- Creates a ConversationalRetrievalChain using the HuggingFacePipeline based LLM
34
- and a ConversationBufferMemory for multi-turn Q&A.
35
- """
36
  llm = load_llm()
37
-
38
  memory = ConversationBufferMemory(
39
  memory_key="chat_history",
40
  return_messages=True
41
  )
42
-
43
  qa_chain = ConversationalRetrievalChain.from_llm(
44
  llm=llm,
45
  retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
46
  memory=memory,
47
- verbose=True # optional: enables debug logs
48
  )
49
-
50
  return qa_chain
 
1
  from langchain.chains import ConversationalRetrievalChain
2
+ from langchain.llms import LlamaCpp
3
  from langchain.memory import ConversationBufferMemory
 
 
 
4
 
5
  def load_llm():
6
+ model_path = "qwen2.5-7b-instruct-q4_k_m.gguf" # path to your GGUF file
7
+ # Adjust parameters like n_ctx as needed
8
+ llm = LlamaCpp(model_path=model_path, n_ctx=2048)
9
+ return llm
10
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def build_conversational_chain(vectorstore):
 
 
 
 
12
  llm = load_llm()
 
13
  memory = ConversationBufferMemory(
14
  memory_key="chat_history",
15
  return_messages=True
16
  )
 
17
  qa_chain = ConversationalRetrievalChain.from_llm(
18
  llm=llm,
19
  retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
20
  memory=memory,
21
+ verbose=True
22
  )
 
23
  return qa_chain