import os import gradio as gr from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chains import ConversationalRetrievalChain from langchain.chat_models import ChatOpenAI from langchain.memory import ConversationBufferMemory from langchain.prompts import PromptTemplate openai_api_key = os.environ.get("OPENAI_API_KEY") class AdvancedPdfChatbot: def __init__(self, openai_api_key): os.environ["OPENAI_API_KEY"] = openai_api_key self.embeddings = OpenAIEmbeddings() self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) self.llm = ChatOpenAI(temperature=0.5,model_name='gpt-4o') self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) self.qa_chain = None self.pdf_path = None self.template = """ I want you to act as a chat assistant for research more similar to ChatGPT itself, specifically focusing on knowledge retrieval. Provide detailed responses with great attention to context understanding and structured format. Be specific and detail-oriented in your responses, ensuring accuracy and depth in information provided. If you dont know the answer from the given knowledge base say you dont know it. Use Relevant formatting such as Headlines, subtexts or ordered lists when required but not always. Context: {context} Question: {question} Answer: """ self.prompt = PromptTemplate(template=self.template, input_variables=["context", "question"]) def load_and_process_pdf(self, pdf_path): loader = PyPDFLoader(pdf_path) documents = loader.load() texts = self.text_splitter.split_documents(documents) self.db = FAISS.from_documents(texts, self.embeddings) self.pdf_path = pdf_path self.setup_conversation_chain() def setup_conversation_chain(self): self.qa_chain = ConversationalRetrievalChain.from_llm( self.llm, retriever=self.db.as_retriever(), memory=self.memory, combine_docs_chain_kwargs={"prompt": self.prompt} ) def chat(self, query): if not self.qa_chain: return "Please upload a PDF first." result = self.qa_chain({"question": query}) return result['answer'] def get_pdf_path(self): # Return the stored PDF path if self.pdf_path: return self.pdf_path else: return "No PDF uploaded yet." # Initialize the chatbot pdf_chatbot = AdvancedPdfChatbot(openai_api_key) def upload_pdf(pdf_file): if pdf_file is None: return "Please upload a PDF file." file_path = pdf_file.name pdf_chatbot.load_and_process_pdf(file_path) return file_path def respond(message, history): bot_message = pdf_chatbot.chat(message) history.append((message, bot_message)) return "", history def clear_chatbot(): pdf_chatbot.memory.clear() return [] def get_pdf_path(): # Call the method to return the current PDF path return pdf_chatbot.get_pdf_path() # Create the Gradio interface with gr.Blocks() as demo: gr.Markdown("# PDF Chatbot") with gr.Row(): pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"]) upload_button = gr.Button("Process PDF") upload_status = gr.Textbox(label="Upload Status") upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status]) path_button = gr.Button("Get PDF Path") pdf_path_display = gr.Textbox(label="Current PDF Path") chatbot_interface = gr.Chatbot() msg = gr.Textbox() clear = gr.Button("Clear") msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface]) clear.click(clear_chatbot, outputs=[chatbot_interface]) path_button.click(get_pdf_path, outputs=[pdf_path_display]) if __name__ == "__main__": demo.launch()