langchain-chat-with-pdf-openai

Sleeping

File size: 3,549 Bytes

d8c3a88
d2e3c7f
 
 
 
 
6c5c0ad
ccff99d
d2e3c7f
d43bb1b
633ac28
d2e3c7f
 
 
 
 
8af0aff
355b657
d2e3c7f
3f31c68
ccff99d
3f31c68
355b657
ccff99d
355b657
ccff99d
 
355b657
 
 
 
 
ccff99d
 
 
 
5e8e8f0
d2e3c7f
 
 
 
f74eb2e
873a6e6
ccff99d
 
b840efb
 
ccff99d
b840efb
a261843
d2e3c7f
ccff99d
d2e3c7f
ccff99d
 
d2e3c7f
 
ccff99d
 
ff0e62c
ccff99d
 
d2e3c7f
 
 
 
8af0aff
 
 
 
 
 
d2e3c7f
 
8af0aff
 
 
 
 
 
 
 
d2e3c7f
f74eb2e
ccff99d
f74eb2e
 
ccff99d
d2e3c7f
 
f74eb2e
d2e3c7f
 
 
5e8e8f0
d2e3c7f
 
 
95c7827
d2e3c7f
5e8e8f0
d2e3c7f
91326a4

import os
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate

class AdvancedPdfChatbot:
    def __init__(self, openai_api_key):
        os.environ["OPENAI_API_KEY"] = openai_api_key
        self.embeddings = OpenAIEmbeddings()
        self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        self.llm = ChatOpenAI(temperature=0, model_name='gpt-4')
        
        self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
        self.db = None
        self.chain = None
        
        self.template = """
        You are a study partner assistant helping students analyze PDF documents.
        
        Answer the question based only on the most recent provided resources.
        Provide the most relevant and concise answer possible.
        
        Context: {context}
        Question: {question}
        Answer:
        """
        self.qa_prompt = PromptTemplate(
            template=self.template, 
            input_variables=["context", "question"]
        )

    def load_and_process_pdf(self, pdf_path):
        loader = PyPDFLoader(pdf_path)
        documents = loader.load()
        texts = self.text_splitter.split_documents(documents)
        self.db = FAISS.from_documents(texts, self.embeddings)
        
        self.chain = ConversationalRetrievalChain.from_llm(
            llm=self.llm,
            retriever=self.db.as_retriever(),
            memory=self.memory,
            combine_docs_chain_kwargs={"prompt": self.qa_prompt}
        )

    def chat(self, query):
        if not self.chain:
            return "Please upload a PDF first."
        
        result = self.chain({"question": query})
        return result['answer']

    def clear_memory(self):
        self.memory.clear()

# Gradio interface setup remains mostly the same
pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))

def upload_pdf(pdf_file):
    if pdf_file is None:
        return "Please upload a PDF file."
    file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
    try:
        pdf_chatbot.load_and_process_pdf(file_path)
        return f"PDF processed successfully: {file_path}"
    except Exception as e:
        return f"Error processing PDF: {str(e)}"

def respond(message, history):
    if not message:
        return "", history
    try:
        bot_message = pdf_chatbot.chat(message)
        history.append((message, bot_message))
        return "", history
    except Exception as e:
        return f"Error: {str(e)}", history

def clear_chatbot():
    pdf_chatbot.clear_memory()
    return []

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# PDF Chatbot")
    
    with gr.Row():
        pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
        upload_button = gr.Button("Process PDF")

    upload_status = gr.Textbox(label="Upload Status")
    upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
    chatbot_interface = gr.Chatbot()
    msg = gr.Textbox()
    msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])

if __name__ == "__main__":
    demo.launch()