langchain-chat-with-pdf-openai-MU

Paused

App Files Files Community

langchain-chat-with-pdf-openai-MU / app.py

Pavan178

Update app.py

741c69d verified 7 months ago

raw

history blame

4.84 kB

	import os
	import gradio as gr
	from langchain.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.chains import ConversationalRetrievalChain
	from langchain.chat_models import ChatOpenAI
	from langchain.memory import ConversationBufferMemory

	from langchain.prompts import PromptTemplate



	openai_api_key = os.environ.get("OPENAI_API_KEY")

	class AdvancedPdfChatbot:
	def __init__(self, openai_api_key):
	os.environ["OPENAI_API_KEY"] = openai_api_key
	self.embeddings = OpenAIEmbeddings()
	self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	self.llm = ChatOpenAI(temperature=0,model_name='gpt-4o-mini')

	self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
	self.qa_chain = None
	self.pdf_path = None
	self.template = """
	You are a study partner assistant, students give you pdfs
	and you help them to answer their questions.

	Answer the question based on the most recent provided resources only.
	Give the most relevant answer.
	Instructions:

	Use given source for Context: Generate responses using only the provided content.
	Cite Sources: Reference content using [page: paragraph] or [page: line] format.
	Address Multiple Subjects: If the query relates to multiple subjects with the same name, provide distinct responses for each.
	Relevance Only: Exclude irrelevant or outlier information.
	Keep it Concise: Provide clear, direct, and descriptive answers, answer in great details when needed and keep short responses when needed.
	No Guesswork: Do not generate information beyond the given content.
	No Match: If no relevant content is found, reply with: "No relevant information found.
	Add comprehensive details and break down the responses into parts whenever needed.

	Context: {context}
	Question: {question}
	Answer:

	(Note :YOUR OUTPUT IS RENDERED IN PROPER PARAGRAPHS or BULLET POINTS when needed, modify the response formats as needed, only choose the formats based on the type of question asked)
	"""
	self.prompt = PromptTemplate(template=self.template, input_variables=["context", "question"])

	def load_and_process_pdf(self, pdf_path):
	loader = PyPDFLoader(pdf_path)
	documents = loader.load()
	texts = self.text_splitter.split_documents(documents)
	self.db = FAISS.from_documents(texts, self.embeddings)
	self.pdf_path = pdf_path
	self.setup_conversation_chain()

	def setup_conversation_chain(self):
	self.qa_chain = ConversationalRetrievalChain.from_llm(
	self.llm,
	retriever=self.db.as_retriever(),
	memory=self.memory,
	combine_docs_chain_kwargs={"prompt": self.prompt}
	)

	def chat(self, query):
	if not self.qa_chain:
	return "Please upload a PDF first."
	result = self.qa_chain({"question": query})
	return result['answer']

	def get_pdf_path(self):
	# Return the stored PDF path
	if self.pdf_path:
	return self.pdf_path
	else:
	return "No PDF uploaded yet."

	# Initialize the chatbot
	pdf_chatbot = AdvancedPdfChatbot(openai_api_key)

	def upload_pdf(pdf_file):
	if pdf_file is None:
	return "Please upload a PDF file."
	file_path = pdf_file.name
	pdf_chatbot.load_and_process_pdf(file_path)
	return file_path

	def respond(message, history):
	bot_message = pdf_chatbot.chat(message)
	history.append((message, bot_message))
	return "", history

	def clear_chatbot():
	pdf_chatbot.memory.clear()
	return []

	def get_pdf_path():
	# Call the method to return the current PDF path
	return pdf_chatbot.get_pdf_path()

	# Create the Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# PDF Chatbot")

	with gr.Row():
	pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
	upload_button = gr.Button("Process PDF")

	upload_status = gr.Textbox(label="Upload Status")
	upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
	path_button = gr.Button("Get PDF Path")
	pdf_path_display = gr.Textbox(label="Current PDF Path")
	chatbot_interface = gr.Chatbot()
	msg = gr.Textbox()
	clear = gr.Button("Clear")

	msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
	clear.click(clear_chatbot, outputs=[chatbot_interface])
	path_button.click(get_pdf_path, outputs=[pdf_path_display])

	if __name__ == "__main__":
	demo.launch()