langchain-chat-with-pdf-openai

Sleeping

File size: 4,844 Bytes

c2af1e5
d2e3c7f
 
 
 
 
28f9d4d
d2e3c7f
 
bd0ebd6
d43bb1b
633ac28
d43bb1b
 
aff3a65
a526ade
d2e3c7f
 
 
 
 
0a080de
355b657
d2e3c7f
 
ff0e62c
355b657
 
 
 
 
 
80803b0
 
 
 
 
 
741c69d
80803b0
741c69d
 
355b657
 
 
 
33831c4
f40cac0
355b657
 
5e8e8f0
d2e3c7f
 
 
 
f74eb2e
ff0e62c
d2e3c7f
 
 
 
 
 
510767a
355b657
d2e3c7f
 
 
 
 
 
 
 
ff0e62c
 
 
 
 
 
 
d2e3c7f
 
 
 
 
 
 
 
5afc751
d2e3c7f
 
 
 
 
 
f74eb2e
 
 
 
ff0e62c
 
 
 
d2e3c7f
 
 
f74eb2e
d2e3c7f
 
 
5e8e8f0
d2e3c7f
 
596dcf4
0ae5df7
d2e3c7f
 
 
5e8e8f0
d2e3c7f
f74eb2e
ff0e62c
5e8e8f0
d2e3c7f
bd0ebd6

import os
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory

from langchain.prompts import PromptTemplate



openai_api_key = os.environ.get("OPENAI_API_KEY")

class AdvancedPdfChatbot:
    def __init__(self, openai_api_key):
        os.environ["OPENAI_API_KEY"] = openai_api_key
        self.embeddings = OpenAIEmbeddings()
        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        self.llm =  ChatOpenAI(temperature=0,model_name='gpt-4o-mini')
        
        self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
        self.qa_chain = None
        self.pdf_path = None
        self.template = """
        You are a study partner assistant, students give you pdfs
        and you help them to answer their questions.
        
        Answer the question based on the most recent provided resources only.
        Give the most relevant answer.
        Instructions:

        Use given source for Context: Generate responses using only the provided content.
        Cite Sources: Reference content using [page: paragraph] or [page: line] format.
        Address Multiple Subjects: If the query relates to multiple subjects with the same name, provide distinct responses for each.
        Relevance Only: Exclude irrelevant or outlier information.
        Keep it Concise: Provide clear, direct, and descriptive answers, answer in great details when needed and keep short responses when needed. 
        No Guesswork: Do not generate information beyond the given content.
        No Match: If no relevant content is found, reply with: "No relevant information found.
        Add comprehensive details and break down the responses into parts whenever needed.
        
        Context: {context}
        Question: {question}
        Answer:

        (Note :YOUR OUTPUT IS RENDERED IN PROPER PARAGRAPHS or BULLET POINTS when needed, modify the response formats as needed, only choose the formats based on the type of question asked)
        """
        self.prompt = PromptTemplate(template=self.template, input_variables=["context", "question"])

    def load_and_process_pdf(self, pdf_path):
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
        texts = self.text_splitter.split_documents(documents)
        self.db = FAISS.from_documents(texts, self.embeddings)
        self.pdf_path = pdf_path
        self.setup_conversation_chain()

    def setup_conversation_chain(self):
        self.qa_chain = ConversationalRetrievalChain.from_llm(
            self.llm,
            retriever=self.db.as_retriever(),
            memory=self.memory,
            combine_docs_chain_kwargs={"prompt": self.prompt}
        )

    def chat(self, query):
        if not self.qa_chain:
            return "Please upload a PDF first."
        result = self.qa_chain({"question": query})
        return result['answer']

    def get_pdf_path(self):
        # Return the stored PDF path
        if self.pdf_path:
            return self.pdf_path
        else:
            return "No PDF uploaded yet."

# Initialize the chatbot
pdf_chatbot = AdvancedPdfChatbot(openai_api_key)

def upload_pdf(pdf_file):
    if pdf_file is None:
        return "Please upload a PDF file."
    file_path = pdf_file.name
    pdf_chatbot.load_and_process_pdf(file_path)
    return file_path

def respond(message, history):
    bot_message = pdf_chatbot.chat(message)
    history.append((message, bot_message))
    return "", history

def clear_chatbot():
    pdf_chatbot.memory.clear()
    return []

def get_pdf_path():
    # Call the method to return the current PDF path
    return pdf_chatbot.get_pdf_path()

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# PDF Chatbot")
    
    with gr.Row():
        pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
        upload_button = gr.Button("Process PDF")

    upload_status = gr.Textbox(label="Upload Status")
    upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
    path_button = gr.Button("Get PDF Path")
    pdf_path_display = gr.Textbox(label="Current PDF Path")
    chatbot_interface = gr.Chatbot()
    msg = gr.Textbox()
    clear = gr.Button("Clear")

    msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
    clear.click(clear_chatbot, outputs=[chatbot_interface])
    path_button.click(get_pdf_path, outputs=[pdf_path_display])

if __name__ == "__main__":
    demo.launch()