Spaces:

Somnath3570
/

PDF_based_knowledge_management_system

Sleeping

App Files Files Community

Somnath3570 commited on Feb 13

Commit

4f260fc

verified ·

1 Parent(s): a6def53

Upload 3 files

Browse files

Files changed (3) hide show

connect_memory_with_llm.py +63 -0
create_memory_for_llm.py +46 -0
medibot.py +102 -0

connect_memory_with_llm.py ADDED Viewed

	@@ -0,0 +1,63 @@

+import os
+from langchain_huggingface import HuggingFaceEndpoint
+from langchain_core.prompts import PromptTemplate
+from langchain.chains import RetrievalQA
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+## Uncomment the following files if you're not using pipenv as your virtual environment manager
+from dotenv import load_dotenv, find_dotenv
+load_dotenv(find_dotenv())
+# Step 1: Setup LLM (Mistral with HuggingFace)
+HF_TOKEN=os.environ.get("HF_TOKEN")
+HUGGINGFACE_REPO_ID="mistralai/Mistral-7B-Instruct-v0.3"
+def load_llm(huggingface_repo_id):
+    llm=HuggingFaceEndpoint(
+        repo_id=huggingface_repo_id,
+        task="text-generation",
+        temperature=0.5,
+        model_kwargs={"token":HF_TOKEN,
+                      "max_length":512}
+    )
+    return llm
+# Step 2: Connect LLM with FAISS and Create chain
+CUSTOM_PROMPT_TEMPLATE = """
+Use the pieces of information provided in the context to answer user's question.
+If you dont know the answer, just say that you dont know, dont try to make up an answer.
+Dont provide anything out of the given context
+Context: {context}
+Question: {question}
+Start the answer directly. No small talk please.
+"""
+def set_custom_prompt(custom_prompt_template):
+    prompt=PromptTemplate(template=custom_prompt_template, input_variables=["context", "question"])
+    return prompt
+# Load Database
+DB_FAISS_PATH="vectorstore/db_faiss"
+embedding_model=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+db=FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
+# Create QA chain
+qa_chain=RetrievalQA.from_chain_type(
+    llm=load_llm(HUGGINGFACE_REPO_ID),
+    chain_type="stuff",
+    retriever=db.as_retriever(search_kwargs={'k':3}),
+    return_source_documents=True,
+    chain_type_kwargs={'prompt':set_custom_prompt(CUSTOM_PROMPT_TEMPLATE)}
+)
+# Now invoke with a single query
+user_query=input("Write Query Here: ")
+response=qa_chain.invoke({'query': user_query})
+print("RESULT: ", response["result"])
+print("SOURCE DOCUMENTS: ", response["source_documents"])

create_memory_for_llm.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_community.vectorstores import FAISS
+## Uncomment the following files if you're not using pipenv as your virtual environment manager
+from dotenv import load_dotenv, find_dotenv
+load_dotenv(find_dotenv())
+# Step 1: Load raw PDF(s)
+DATA_PATH="data/"
+def load_pdf_files(data):
+    loader = DirectoryLoader(data,
+                             glob='*.pdf',
+                             loader_cls=PyPDFLoader)
+    documents=loader.load()
+    return documents
+documents=load_pdf_files(data=DATA_PATH)
+#print("Length of PDF pages: ", len(documents))
+# Step 2: Create Chunks
+def create_chunks(extracted_data):
+    text_splitter=RecursiveCharacterTextSplitter(chunk_size=500,
+                                                 chunk_overlap=50)
+    text_chunks=text_splitter.split_documents(extracted_data)
+    return text_chunks
+text_chunks=create_chunks(extracted_data=documents)
+#print("Length of Text Chunks: ", len(text_chunks))
+# Step 3: Create Vector Embeddings
+def get_embedding_model():
+    embedding_model=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
+    return embedding_model
+embedding_model=get_embedding_model()
+# Step 4: Store embeddings in FAISS
+DB_FAISS_PATH="vectorstore/db_faiss"
+db=FAISS.from_documents(text_chunks, embedding_model)
+db.save_local(DB_FAISS_PATH)

medibot.py ADDED Viewed

	@@ -0,0 +1,102 @@

+import os
+import streamlit as st
+# Update these imports
+from langchain_community.embeddings import HuggingFaceEmbeddings
+from langchain.chains import RetrievalQA
+from langchain_community.vectorstores import FAISS
+from langchain_core.prompts import PromptTemplate
+from langchain_huggingface import HuggingFaceEndpoint
+from dotenv import load_dotenv, find_dotenv
+load_dotenv(find_dotenv())
+DB_FAISS_PATH = "vectorstore/db_faiss"
+@st.cache_resource
+def get_vectorstore():
+    embedding_model = HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')
+    db = FAISS.load_local(DB_FAISS_PATH, embedding_model, allow_dangerous_deserialization=True)
+    return db
+def set_custom_prompt(custom_prompt_template):
+    prompt = PromptTemplate(template=custom_prompt_template, input_variables=["context", "question"])
+    return prompt
+def load_llm(huggingface_repo_id, HF_TOKEN):
+    llm = HuggingFaceEndpoint(
+        repo_id=huggingface_repo_id,
+        task="text-generation",  # Add this line
+        temperature=0.5,
+        model_kwargs={
+            "token": HF_TOKEN,
+            "max_length": 512  # Changed to integer
+        }
+    )
+    return llm
+def main():
+    st.title("Ask Chatbot!")
+    if 'messages' not in st.session_state:
+        st.session_state.messages = []
+    for message in st.session_state.messages:
+        st.chat_message(message['role']).markdown(message['content'])
+    prompt = st.chat_input("Pass your prompt here")
+    if prompt:
+        st.chat_message('user').markdown(prompt)
+        st.session_state.messages.append({'role': 'user', 'content': prompt})
+        CUSTOM_PROMPT_TEMPLATE = """
+        Use the pieces of information provided in the context to answer user's question.
+        If you dont know the answer, just say that you dont know, dont try to make up an answer.
+        Dont provide anything out of the given context
+        Context: {context}
+        Question: {question}
+        Start the answer directly. No small talk please.
+        """
+        HUGGINGFACE_REPO_ID = "mistralai/Mistral-7B-Instruct-v0.3"
+        HF_TOKEN = os.environ.get("HF_TOKEN")
+        try:
+            with st.spinner("Thinking..."):  # Add loading indicator
+                vectorstore = get_vectorstore()
+                if vectorstore is None:
+                    st.error("Failed to load the vector store")
+                    return
+                qa_chain = RetrievalQA.from_chain_type(
+                    llm=load_llm(huggingface_repo_id=HUGGINGFACE_REPO_ID, HF_TOKEN=HF_TOKEN),
+                    chain_type="stuff",
+                    retriever=vectorstore.as_retriever(search_kwargs={'k': 3}),
+                    return_source_documents=True,
+                    chain_type_kwargs={'prompt': set_custom_prompt(CUSTOM_PROMPT_TEMPLATE)}
+                )
+                response = qa_chain.invoke({'query': prompt})
+                result = response["result"]
+                source_documents = response["source_documents"]
+                # Format source documents more cleanly
+                source_docs_text = "\n\n**Source Documents:**\n"
+                for i, doc in enumerate(source_documents, 1):
+                    source_docs_text += f"{i}. Page {doc.metadata.get('page', 'N/A')}: {doc.page_content[:200]}...\n\n"
+                result_to_show = f"{result}\n{source_docs_text}"
+                st.chat_message('assistant').markdown(result_to_show)
+                st.session_state.messages.append({'role': 'assistant', 'content': result_to_show})
+        except Exception as e:
+            st.error(f"Error: {str(e)}")
+            st.error("Please check your HuggingFace token and model access permissions")
+if __name__ == "__main__":
+    main()