langchain-chat-with-pdf-openai-MU

Paused

App Files Files Community

langchain-chat-with-pdf-openai-MU / app.py

Pavan178

Update app.py

e8434ee verified 7 months ago

raw

history blame

5.08 kB

	import os
	import gradio as gr
	import logging
	from langchain.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.chat_models import ChatOpenAI
	from langchain.chains import ConversationalRetrievalChain, LLMChain
	from langchain.memory import ConversationBufferMemory
	from langchain.prompts import PromptTemplate
	from PyPDF2 import PdfReader

	class ContextAwareResponseGenerator:
	def __init__(self, llm):
	self.llm = llm
	self.response_prompt = PromptTemplate(
	input_variables=['context', 'query', 'chat_history'],
	template="""Analyze the context, query, and chat history to generate an optimal response:

	Context: {context}
	Query: {query}
	Chat History: {chat_history}

	Response Structure Selection Criteria:
	1. Technical academic breakdown
	2. Concise summary with key points
	3. Markdown with hierarchical insights
	4. Narrative explanation
	5. Comparative analysis

	Choose the most appropriate response structure (1-5) and generate the response accordingly:"""
	)
	self.response_chain = LLMChain(llm=self.llm, prompt=self.response_prompt)

	def generate_response(self, context, query, chat_history=''):
	try:
	# Generate structured response
	full_response = self.response_chain.run({
	'context': context,
	'query': query,
	'chat_history': chat_history or "No previous context"
	})

	# Extract only the actual response content (after the structure selection)
	response_content = full_response[1:].strip()

	return response_content
	except Exception as e:
	logging.error(f"Response generation error: {e}")
	return f"I couldn't generate a response for: {query}"

	class AdvancedPdfChatbot:
	def __init__(self, openai_api_key):
	os.environ["OPENAI_API_KEY"] = openai_api_key
	self.llm = ChatOpenAI(temperature=0.2, model_name='gpt-4o')

	self.embeddings = OpenAIEmbeddings()
	self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

	self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
	self.response_generator = ContextAwareResponseGenerator(self.llm)

	self.db = None
	self.document_context = ""

	def load_and_process_pdf(self, pdf_path):
	try:
	reader = PdfReader(pdf_path)
	metadata = {
	"title": reader.metadata.get("/Title", "Untitled"),
	"author": reader.metadata.get("/Author", "Unknown")
	}

	loader = PyPDFLoader(pdf_path)
	documents = loader.load()
	texts = self.text_splitter.split_documents(documents)

	self.db = FAISS.from_documents(texts[:50], self.embeddings)
	self.document_context = f"Document: {metadata['title']} by {metadata['author']}"

	return True
	except Exception as e:
	logging.error(f"PDF processing error: {e}")
	return False

	def chat(self, query):
	if not self.db:
	return "Please upload a PDF first."

	# Retrieve chat history
	chat_history = self.memory.load_memory_variables({}).get('chat_history', [])

	# Generate context-aware response
	response = self.response_generator.generate_response(
	context=self.document_context,
	query=query,
	chat_history=str(chat_history)
	)

	# Store conversation in memory
	self.memory.save_context({"input": query}, {"output": response})

	return response

	# Gradio Interface
	pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))

	def upload_pdf(pdf_file):
	if not pdf_file:
	return "Upload a PDF file."
	file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
	return "PDF processed successfully" if pdf_chatbot.load_and_process_pdf(file_path) else "Processing failed"

	def respond(message, history):
	try:
	bot_message = pdf_chatbot.chat(message)
	history.append((message, bot_message))
	return "", history
	except Exception as e:
	return f"Error: {e}", history

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# Advanced PDF Chatbot")
	with gr.Row():
	pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
	upload_button = gr.Button("Process PDF")

	upload_status = gr.Textbox(label="Upload Status")
	upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])

	chatbot_interface = gr.Chatbot()
	msg = gr.Textbox(placeholder="Enter your query...")
	msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])

	if __name__ == "__main__":
	demo.launch()