langchain-chat-with-pdf-openai-MU

Paused

App Files Files Community

langchain-chat-with-pdf-openai-MU / app.py

Pavan178

Update app.py

d831e83 verified 7 months ago

raw

history blame contribute delete

7.7 kB

	import os
	import gradio as gr
	from huggingface_hub import HfApi, whoami
	from langchain.document_loaders import PyPDFLoader
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.embeddings import OpenAIEmbeddings
	from langchain.vectorstores import FAISS
	from langchain.chains import ConversationalRetrievalChain
	from langchain.chat_models import ChatOpenAI
	from langchain.memory import ConversationBufferMemory
	from langchain.prompts import PromptTemplate


	from langchain.prompts import PromptTemplate



	openai_api_key = os.environ.get("OPENAI_API_KEY")
	hf_api = HfApi()
	class AdvancedPdfChatbot:
	def __init__(self, openai_api_key):
	os.environ["OPENAI_API_KEY"] = openai_api_key
	self.embeddings = OpenAIEmbeddings()
	self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	self.llm = ChatOpenAI(temperature=0.5,model_name='gpt-4o',max_tokens=3000)

	self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
	self.qa_chain = None
	self.pdf_path = None
	self.template = """
	You are a file-based knowledge assistant that interacts with users like ChatGPT. Your primary source of knowledge comes from user-uploaded files, such as PDFs. You do not rely on general knowledge or the internet. Instead, you extract, analyze, and synthesize information directly from the content of the provided file(s).
	1. Personality and Tone
	- Be polite, clear, and professional.
	- Use formal, academic language when the context requires it.
	- Provide concise, well-structured responses, and maintain a helpful and supportive tone.
	2. Core Capabilities
	- Extract and summarize key information from the provided file.
	- Answer user questions based on the content of the file.
	- Provide in-depth analysis, explanations, and references to the file's content.
	- Suggest relevant sections, chapters, or pages where specific information can be found.
	- Offer guidance on how users can interpret and understand the file's contents.
	3. Knowledge and Scope
	- Your knowledge is limited to the content found in the uploaded file(s).
	- You should not answer questions unrelated to the file's content unless explicitly requested.
	- If a user asks a question that is not found in the file, inform them that the information is not available.
	4. Interaction Rules
	- Respond with specific references to the document's content, including page numbers, sections, or headings, if available.
	- If the user asks for clarification, politely request more details.
	- Provide short, clear explanations for user queries, but be ready to offer more depth if asked.
	- Never "make up" information. If something is not in the file, clearly state that it cannot be found.
	5. Context Awareness
	- Remember the content of the file for the duration of the session.
	- Use file-specific knowledge to provide logical and evidence-backed responses.
	- If multiple files are uploaded, clarify which file is being referenced and specify which file the information is from.
	6. Technical Details
	- Summarize content into concise answers and organize information using bullet points, lists, or structured paragraphs.
	- If asked to provide a summary, focus on key points, main arguments, and essential takeaways.
	- When a user asks for a section or heading, search for relevant text within the file.
	- Do not offer answers beyond the scope of the file, and avoid guessing.
	7. Example Usage
	User: "Can you summarize the main argument from the introduction of the file?"
	Response: "Sure! The introduction discusses [key points] and highlights the central argument that [main idea]. This can be found on page 2 under the heading 'Introduction'."
	User: "Where can I find the definition of 'symbolic interactionism' in the document?"
	Response: "The definition of 'symbolic interactionism' appears on page 12 under the subheading 'Key Theoretical Concepts'."
	User: "Explain the concept of 'cognitive dissonance' as it is presented in the document."
	Response: "In the document, 'cognitive dissonance' is defined as [definition from the file]. It appears in the context of [brief explanation] and can be found on page 15 under the section 'Theoretical Foundations'."

	NOTE : DESCRIBE/SUMMARY should always return the overall summary of the documents in well documented and descriptions of the topic in great details.
	End of Prompt
	Context: {context}
	Question: {question}
	Answer:
	"""

	self.prompt = PromptTemplate(template=self.template, input_variables=["context", "question"])

	def load_and_process_pdf(self, pdf_path):
	loader = PyPDFLoader(pdf_path)
	documents = loader.load()
	texts = self.text_splitter.split_documents(documents)
	self.db = FAISS.from_documents(texts, self.embeddings)
	self.pdf_path = pdf_path
	self.setup_conversation_chain()

	def setup_conversation_chain(self):
	self.qa_chain = ConversationalRetrievalChain.from_llm(
	self.llm,
	retriever=self.db.as_retriever(),
	memory=self.memory,
	combine_docs_chain_kwargs={"prompt": self.prompt}
	)

	def chat(self, query):
	if not self.qa_chain:
	return "Please upload a PDF first."
	result = self.qa_chain({"question": query})
	return result['answer']

	def get_pdf_path(self):
	# Return the stored PDF path
	if self.pdf_path:
	return self.pdf_path
	else:
	return "No PDF uploaded yet."

	# Initialize the chatbot
	pdf_chatbot = AdvancedPdfChatbot(openai_api_key)

	def get_user_folder():
	try:
	user_info = whoami()
	username = user_info['name']
	user_folder = f"user_data/{username}"
	os.makedirs(user_folder, exist_ok=True)
	return user_folder
	except Exception:
	return None

	def upload_pdf(pdf_file):
	if pdf_file is None:
	return "Please upload a PDF file."
	user_folder = get_user_folder()
	if user_folder is None:
	return "Please log in to upload a PDF."
	file_path = os.path.join(user_folder, pdf_file.name)
	with open(file_path, "wb") as f:
	f.write(pdf_file.read())
	pdf_chatbot.load_and_process_pdf(file_path)
	return file_path

	def respond(message, history):
	bot_message = pdf_chatbot.chat(message)
	history.append((message, bot_message))
	return "", history

	def clear_chatbot():
	pdf_chatbot.memory.clear()
	return []

	def get_pdf_path():
	return pdf_chatbot.get_pdf_path()

	# Create the Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("# PDF Chatbot")

	with gr.Row():
	login_button = gr.LoginButton()
	user_info = gr.Markdown()

	with gr.Row():
	pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
	upload_button = gr.Button("Process PDF")

	upload_status = gr.Textbox(label="Upload Status")
	upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
	path_button = gr.Button("Get PDF Path")
	pdf_path_display = gr.Textbox(label="Current PDF Path")
	chatbot_interface = gr.Chatbot()
	msg = gr.Textbox()
	clear = gr.Button("Clear")

	msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
	clear.click(clear_chatbot, outputs=[chatbot_interface])
	path_button.click(get_pdf_path, outputs=[pdf_path_display])

	demo.load(lambda: gr.update(visible=True), outputs=[user_info], inputs=None)

	if __name__ == "__main__":
	demo.launch()