Spaces:

Fawaz0ibra
/

NDMO_english_assistant

Sleeping

App Files Files Community

Fawaz0ibra commited on Mar 13

Commit

f49be22

verified ·

1 Parent(s): f83b9cd

Update chain_setup.py

Browse files

Files changed (1) hide show

chain_setup.py +44 -26

chain_setup.py CHANGED Viewed

@@ -1,26 +1,44 @@
-# chain_setup.py
-from langchain.chains import ConversationalRetrievalChain
-from langchain_community.chat_models import ChatOllama
-from langchain.memory import ConversationBufferMemory
-def build_conversational_chain(vectorstore):
-    """
-    Creates a ConversationalRetrievalChain with a ChatOllama LLM and
-    a ConversationBufferMemory for multi-turn Q&A.
-    """
-    llm = ChatOllama(model="qwen2.5:7b")
-    memory = ConversationBufferMemory(
-        memory_key="chat_history",
-        return_messages=True
-    )
-    qa_chain = ConversationalRetrievalChain.from_llm(
-        llm=llm,
-        retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
-        memory=memory,
-        verbose=True  # optional debug logs
-    )
-    return qa_chain

+from langchain.chains import ConversationalRetrievalChain
+from langchain.llms import HuggingFacePipeline
+from langchain.memory import ConversationBufferMemory
+import transformers
+import torch
+def load_llm():
+    model_id = "qwen2.5-7b"  # Ensure this is the correct model identifier on Hugging Face Hub
+    # Use trust_remote_code if required by the model
+    tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
+    # If you're on CPU, you can set device_map="cpu". On Spaces with GPU support, you can use "auto"
+    model = transformers.AutoModelForCausalLM.from_pretrained(
+        model_id,
+        trust_remote_code=True,
+        device_map="auto"  # change to "cpu" if no GPU is available
+    )
+    pipe = transformers.pipeline(
+        "text-generation",
+        model=model,
+        tokenizer=tokenizer,
+        max_new_tokens=512  # adjust based on your needs
+    )
+    return HuggingFacePipeline(pipeline=pipe)
+def build_conversational_chain(vectorstore):
+    """
+    Creates a ConversationalRetrievalChain using the HuggingFacePipeline based LLM
+    and a ConversationBufferMemory for multi-turn Q&A.
+    """
+    llm = load_llm()
+    memory = ConversationBufferMemory(
+        memory_key="chat_history",
+        return_messages=True
+    )
+    qa_chain = ConversationalRetrievalChain.from_llm(
+        llm=llm,
+        retriever=vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5}),
+        memory=memory,
+        verbose=True  # optional: enables debug logs
+    )
+    return qa_chain