langchain-chat-with-pdf-openai-MU

Paused

App Files Files Community

Pavan178 commited on Dec 9, 2024

Commit

3f31c68

verified ·

1 Parent(s): 741c69d

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -32

app.py CHANGED Viewed

@@ -4,14 +4,11 @@ from langchain.document_loaders import PyPDFLoader
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings import OpenAIEmbeddings
 from langchain.vectorstores import FAISS
-from langchain.chains import ConversationalRetrievalChain
 from langchain.chat_models import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
 from langchain.prompts import PromptTemplate
 openai_api_key = os.environ.get("OPENAI_API_KEY")
 class AdvancedPdfChatbot:
@@ -19,33 +16,30 @@ class AdvancedPdfChatbot:
         os.environ["OPENAI_API_KEY"] = openai_api_key
         self.embeddings = OpenAIEmbeddings()
         self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
-        self.llm =  ChatOpenAI(temperature=0,model_name='gpt-4o-mini')
         self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
-        self.qa_chain = None
-        self.pdf_path = None
         self.template = """
-        You are a study partner assistant, students give you pdfs
-        and you help them to answer their questions.
         Answer the question based on the most recent provided resources only.
         Give the most relevant answer.
-        Instructions:
-        Use given source for Context: Generate responses using only the provided content.
-        Cite Sources: Reference content using [page: paragraph] or [page: line] format.
-        Address Multiple Subjects: If the query relates to multiple subjects with the same name, provide distinct responses for each.
-        Relevance Only: Exclude irrelevant or outlier information.
-        Keep it Concise: Provide clear, direct, and descriptive answers, answer in great details when needed and keep short responses when needed.
-        No Guesswork: Do not generate information beyond the given content.
-        No Match: If no relevant content is found, reply with: "No relevant information found.
-        Add comprehensive details and break down the responses into parts whenever needed.
         Context: {context}
         Question: {question}
         Answer:
-        (Note :YOUR OUTPUT IS RENDERED IN PROPER PARAGRAPHS or BULLET POINTS when needed, modify the response formats as needed, only choose the formats based on the type of question asked)
         """
         self.prompt = PromptTemplate(template=self.template, input_variables=["context", "question"])
@@ -54,27 +48,64 @@ class AdvancedPdfChatbot:
         documents = loader.load()
         texts = self.text_splitter.split_documents(documents)
         self.db = FAISS.from_documents(texts, self.embeddings)
-        self.pdf_path = pdf_path
         self.setup_conversation_chain()
     def setup_conversation_chain(self):
-        self.qa_chain = ConversationalRetrievalChain.from_llm(
-            self.llm,
             retriever=self.db.as_retriever(),
             memory=self.memory,
-            combine_docs_chain_kwargs={"prompt": self.prompt}
         )
     def chat(self, query):
-        if not self.qa_chain:
             return "Please upload a PDF first."
-        result = self.qa_chain({"question": query})
         return result['answer']
     def get_pdf_path(self):
-        # Return the stored PDF path
-        if self.pdf_path:
-            return self.pdf_path
         else:
             return "No PDF uploaded yet."
@@ -98,7 +129,6 @@ def clear_chatbot():
     return []
 def get_pdf_path():
-    # Call the method to return the current PDF path
     return pdf_chatbot.get_pdf_path()
 # Create the Gradio interface
@@ -122,4 +152,4 @@ with gr.Blocks() as demo:
     path_button.click(get_pdf_path, outputs=[pdf_path_display])
 if __name__ == "__main__":
-    demo.launch()

 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.embeddings import OpenAIEmbeddings
 from langchain.vectorstores import FAISS
+from langchain.chains import ConversationalRetrievalChain, Chain
 from langchain.chat_models import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
 from langchain.prompts import PromptTemplate
 openai_api_key = os.environ.get("OPENAI_API_KEY")
 class AdvancedPdfChatbot:
         os.environ["OPENAI_API_KEY"] = openai_api_key
         self.embeddings = OpenAIEmbeddings()
         self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+        self.llm = ChatOpenAI(temperature=0, model_name='gpt-4')  # Corrected model name
+        self.refinement_llm = ChatOpenAI(temperature=0, model_name='gpt-3.5-turbo')
         self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
+        self.overall_chain = None
+        self.db = None
+        self.refinement_prompt = PromptTemplate(
+            input_variables=['query', 'chat_history'],
+            template="""Given the user's query and the conversation history, refine the query to be more specific and detailed.
+            If the query is too vague, make reasonable assumptions based on the conversation context.
+            Output the refined query."""
+        )
         self.template = """
+        You are a study partner assistant, students give you pdfs and you help them to answer their questions.
         Answer the question based on the most recent provided resources only.
         Give the most relevant answer.
         Context: {context}
         Question: {question}
         Answer:
+        (Note: YOUR OUTPUT IS RENDERED IN PROPER PARAGRAPHS or BULLET POINTS when needed, modify the response formats as needed, only choose the formats based on the type of question asked)
         """
         self.prompt = PromptTemplate(template=self.template, input_variables=["context", "question"])
         documents = loader.load()
         texts = self.text_splitter.split_documents(documents)
         self.db = FAISS.from_documents(texts, self.embeddings)
         self.setup_conversation_chain()
     def setup_conversation_chain(self):
+        class CustomChain(Chain):
+            refinement_chain: Chain
+            qa_chain: Chain
+            @classmethod
+            def from_llms(cls, refinement_llm, qa_llm, retriever, memory, prompt):
+                refinement_chain = Chain(
+                    llm_chain=LLMChain(
+                        llm=refinement_llm,
+                        prompt=self.refinement_prompt,
+                        output_key='refined_query'
+                    )
+                )
+                qa_chain = ConversationalRetrievalChain.from_llm(
+                    qa_llm,
+                    retriever=retriever,
+                    memory=memory,
+                    combine_docs_chain_kwargs={"prompt": prompt}
+                )
+                return cls(refinement_chain=refinement_chain, qa_chain=qa_chain)
+            def _call(self, inputs):
+                query = inputs['query']
+                chat_history = inputs.get('chat_history', [])
+                refined_query = self.refinement_chain.run(query=query, chat_history=chat_history)
+                response = self.qa_chain({"question": refined_query, "chat_history": chat_history})
+                self.qa_chain.memory.save_context({"input": query}, {"output": response['answer']})
+                return {"answer": response['answer']}
+            @property
+            def input_keys(self):
+                return ['query', 'chat_history']
+            @property
+            def output_keys(self):
+                return ['answer']
+        self.overall_chain = CustomChain.from_llms(
+            refinement_llm=self.refinement_llm,
+            qa_llm=self.llm,
             retriever=self.db.as_retriever(),
             memory=self.memory,
+            prompt=self.prompt
         )
     def chat(self, query):
+        if not self.overall_chain:
             return "Please upload a PDF first."
+        chat_history = self.memory.load_memory_variables({})['chat_history']
+        result = self.overall_chain({'query': query, 'chat_history': chat_history})
         return result['answer']
     def get_pdf_path(self):
+        if self.db:
+            return self.db.path
         else:
             return "No PDF uploaded yet."
     return []
 def get_pdf_path():
     return pdf_chatbot.get_pdf_path()
 # Create the Gradio interface
     path_button.click(get_pdf_path, outputs=[pdf_path_display])
 if __name__ == "__main__":
+    demo.launch()