Fawaz0ibra commited on
Commit
f49be22
·
verified ·
1 Parent(s): f83b9cd

Update chain_setup.py

Browse files
Files changed (1) hide show
  1. chain_setup.py +44 -26
chain_setup.py CHANGED
@@ -1,26 +1,44 @@
1
- # chain_setup.py
2
-
3
- from langchain.chains import ConversationalRetrievalChain
4
- from langchain_community.chat_models import ChatOllama
5
- from langchain.memory import ConversationBufferMemory
6
-
7
- def build_conversational_chain(vectorstore):
8
- """
9
- Creates a ConversationalRetrievalChain with a ChatOllama LLM and
10
- a ConversationBufferMemory for multi-turn Q&A.
11
- """
12
- llm = ChatOllama(model="qwen2.5:7b")
13
-
14
- memory = ConversationBufferMemory(
15
- memory_key="chat_history",
16
- return_messages=True
17
- )
18
-
19
- qa_chain = ConversationalRetrievalChain.from_llm(
20
- llm=llm,
21
- retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
22
- memory=memory,
23
- verbose=True # optional debug logs
24
- )
25
-
26
- return qa_chain
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.chains import ConversationalRetrievalChain
2
+ from langchain.llms import HuggingFacePipeline
3
+ from langchain.memory import ConversationBufferMemory
4
+ import transformers
5
+ import torch
6
+
7
+ def load_llm():
8
+ model_id = "qwen2.5-7b" # Ensure this is the correct model identifier on Hugging Face Hub
9
+ # Use trust_remote_code if required by the model
10
+ tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
11
+ # If you're on CPU, you can set device_map="cpu". On Spaces with GPU support, you can use "auto"
12
+ model = transformers.AutoModelForCausalLM.from_pretrained(
13
+ model_id,
14
+ trust_remote_code=True,
15
+ device_map="auto" # change to "cpu" if no GPU is available
16
+ )
17
+ pipe = transformers.pipeline(
18
+ "text-generation",
19
+ model=model,
20
+ tokenizer=tokenizer,
21
+ max_new_tokens=512 # adjust based on your needs
22
+ )
23
+ return HuggingFacePipeline(pipeline=pipe)
24
+
25
+ def build_conversational_chain(vectorstore):
26
+ """
27
+ Creates a ConversationalRetrievalChain using the HuggingFacePipeline based LLM
28
+ and a ConversationBufferMemory for multi-turn Q&A.
29
+ """
30
+ llm = load_llm()
31
+
32
+ memory = ConversationBufferMemory(
33
+ memory_key="chat_history",
34
+ return_messages=True
35
+ )
36
+
37
+ qa_chain = ConversationalRetrievalChain.from_llm(
38
+ llm=llm,
39
+ retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
40
+ memory=memory,
41
+ verbose=True # optional: enables debug logs
42
+ )
43
+
44
+ return qa_chain