import os import gradio as gr import logging from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chat_models import ChatOpenAI from langchain.chains import LLMChain from langchain.memory import ConversationBufferMemory from langchain.prompts import PromptTemplate from PyPDF2 import PdfReader class ContextAwareResponseGenerator: def __init__(self, llm): self.llm = llm self.response_prompt = PromptTemplate( input_variables=['context', 'query', 'chat_history'], template="""Given the context, query, and chat history, generate the best response that is clear and helpful. Context: {context} Query: {query} Chat History: {chat_history} Choose the most appropriate response structure and generate the response directly, without explicit guidance on which format to use. Your response should be based on the query and context provided.""" ) self.response_chain = LLMChain(llm=self.llm, prompt=self.response_prompt) def generate_response(self, context, query, chat_history=''): try: # Generate the response content with structure handled by the LLM itself response = self.response_chain.run({ 'context': context, 'query': query, 'chat_history': chat_history or "No previous context" }) return response.strip() # LLM decides on the structure except Exception as e: logging.error(f"Response generation error: {e}") return self._default_response(query) def _default_response(self, query): return f"I couldn't generate a response for: {query}" class AdvancedPdfChatbot: def __init__(self, openai_api_key): os.environ["OPENAI_API_KEY"] = openai_api_key self.llm = ChatOpenAI(temperature=0.2, model_name='gpt-4o') self.embeddings = OpenAIEmbeddings() self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) self.response_generator = ContextAwareResponseGenerator(self.llm) self.db = None self.document_context = "" def load_and_process_pdf(self, pdf_path): try: reader = PdfReader(pdf_path) metadata = { "title": reader.metadata.get("/Title", "Untitled"), "author": reader.metadata.get("/Author", "Unknown") } loader = PyPDFLoader(pdf_path) documents = loader.load() texts = self.text_splitter.split_documents(documents) self.db = FAISS.from_documents(texts[:50], self.embeddings) self.document_context = f"Document: {metadata['title']} by {metadata['author']}" return True except Exception as e: logging.error(f"PDF processing error: {e}") return False def chat(self, query, is_new_question=False): if not self.db: return "Please upload a PDF first." # Retrieve chat history chat_history = self.memory.load_memory_variables({}).get('chat_history', []) # Reset chat history for new questions if is_new_question: chat_history = [] # For new questions, reset the chat history # Generate context-aware response response = self.response_generator.generate_response( context=self.document_context, query=query, chat_history=str(chat_history) ) # Store conversation in memory self.memory.save_context({"input": query}, {"output": response}) return response # Gradio Interface pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY")) def upload_pdf(pdf_file): if not pdf_file: return "Upload a PDF file." file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file return "PDF processed successfully" if pdf_chatbot.load_and_process_pdf(file_path) else "Processing failed" def respond(message, history): try: is_new_question = len(history) == 0 # If history is empty, it's a new question bot_message = pdf_chatbot.chat(message, is_new_question) history.append((message, bot_message)) return "", history except Exception as e: return f"Error: {e}", history # Gradio UI with gr.Blocks() as demo: gr.Markdown("# Advanced PDF Chatbot") with gr.Row(): pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"]) upload_button = gr.Button("Process PDF") upload_status = gr.Textbox(label="Upload Status") upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status]) chatbot_interface = gr.Chatbot() msg = gr.Textbox(placeholder="Enter your query...") msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface]) if __name__ == "__main__": demo.launch()