langchain-chat-with-pdf-openai-MU

Paused

App Files Files Community

langchain-chat-with-pdf-openai-MU / app.py

Pavan178

Update app.py

b10e9f4 verified 4 months ago

raw

history blame

5.29 kB

	import os
	import gradio as gr
	import logging
	from langchain.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.chat_models import ChatOpenAI
	from langchain.chains import ConversationalRetrievalChain, LLMChain
	from langchain.memory import ConversationBufferMemory
	from langchain.prompts import PromptTemplate
	from PyPDF2 import PdfReader

	class ContextAwareResponseGenerator:
	def __init__(self, llm):
	self.llm = llm
	self.response_prompt = PromptTemplate(
	input_variables=['context', 'query', 'chat_history'],
	template="""Based on the context, query, and chat history, generate a clear, concise, and helpful response.

	Context: {context}
	Query: {query}
	Chat History: {chat_history}

	Response Structure Selection Criteria (internal):
	1. Technical academic breakdown
	2. Concise summary with key points
	3. Markdown with hierarchical insights
	4. Narrative explanation
	5. Comparative analysis

	Generate the response based on the appropriate structure, but do not display structure selection to the user. Only show the final response.""" # This internal prompt generates the response.
	)
	self.response_chain = LLMChain(llm=self.llm, prompt=self.response_prompt)

	def generate_response(self, context, query, chat_history=''):
	try:
	# Generate structured response internally
	response = self.response_chain.run({
	'context': context,
	'query': query,
	'chat_history': chat_history or "No previous context"
	})

	# Optionally process response internally (e.g., format it based on structure)
	# but only return the final formatted response.
	return response.strip()
	except Exception as e:
	logging.error(f"Response generation error: {e}")
	return self._default_response(query)

	def _default_response(self, query):
	return f"I couldn't generate a response for: {query}"

	class AdvancedPdfChatbot:
	def __init__(self, openai_api_key):
	os.environ["OPENAI_API_KEY"] = openai_api_key
	self.llm = ChatOpenAI(temperature=0.2, model_name='gpt-4o')

	self.embeddings = OpenAIEmbeddings()
	self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

	self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
	self.response_generator = ContextAwareResponseGenerator(self.llm)

	self.db = None
	self.document_context = ""

	def load_and_process_pdf(self, pdf_path):
	try:
	reader = PdfReader(pdf_path)
	metadata = {
	"title": reader.metadata.get("/Title", "Untitled"),
	"author": reader.metadata.get("/Author", "Unknown")
	}

	loader = PyPDFLoader(pdf_path)
	documents = loader.load()
	texts = self.text_splitter.split_documents(documents)

	self.db = FAISS.from_documents(texts[:50], self.embeddings)
	self.document_context = f"Document: {metadata['title']} by {metadata['author']}"

	return True
	except Exception as e:
	logging.error(f"PDF processing error: {e}")
	return False

	def chat(self, query):
	if not self.db:
	return "Please upload a PDF first."

	# Retrieve chat history
	chat_history = self.memory.load_memory_variables({}).get('chat_history', [])

	# Generate context-aware response
	response = self.response_generator.generate_response(
	context=self.document_context,
	query=query,
	chat_history=str(chat_history)
	)

	# Store conversation in memory
	self.memory.save_context({"input": query}, {"output": response})

	return response

	# Gradio Interface
	pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))

	def upload_pdf(pdf_file):
	if not pdf_file:
	return "Upload a PDF file."
	file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
	return "PDF processed successfully" if pdf_chatbot.load_and_process_pdf(file_path) else "Processing failed"

	def respond(message, history):
	try:
	bot_message = pdf_chatbot.chat(message)
	history.append((message, bot_message))
	return "", history
	except Exception as e:
	return f"Error: {e}", history

	# Gradio UI
	with gr.Blocks() as demo:
	gr.Markdown("# Advanced PDF Chatbot")
	with gr.Row():
	pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
	upload_button = gr.Button("Process PDF")

	upload_status = gr.Textbox(label="Upload Status")
	upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])

	chatbot_interface = gr.Chatbot()
	msg = gr.Textbox(placeholder="Enter your query...")
	msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])

	if __name__ == "__main__":
	demo.launch()