Pavan178's picture
Update app.py
ccff99d verified
raw
history blame
3.55 kB
import os
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
class AdvancedPdfChatbot:
def __init__(self, openai_api_key):
os.environ["OPENAI_API_KEY"] = openai_api_key
self.embeddings = OpenAIEmbeddings()
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
self.llm = ChatOpenAI(temperature=0, model_name='gpt-4')
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
self.db = None
self.chain = None
self.template = """
You are a study partner assistant helping students analyze PDF documents.
Answer the question based only on the most recent provided resources.
Provide the most relevant and concise answer possible.
Context: {context}
Question: {question}
Answer:
"""
self.qa_prompt = PromptTemplate(
template=self.template,
input_variables=["context", "question"]
)
def load_and_process_pdf(self, pdf_path):
loader = PyPDFLoader(pdf_path)
documents = loader.load()
texts = self.text_splitter.split_documents(documents)
self.db = FAISS.from_documents(texts, self.embeddings)
self.chain = ConversationalRetrievalChain.from_llm(
llm=self.llm,
retriever=self.db.as_retriever(),
memory=self.memory,
combine_docs_chain_kwargs={"prompt": self.qa_prompt}
)
def chat(self, query):
if not self.chain:
return "Please upload a PDF first."
result = self.chain({"question": query})
return result['answer']
def clear_memory(self):
self.memory.clear()
# Gradio interface setup remains mostly the same
pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))
def upload_pdf(pdf_file):
if pdf_file is None:
return "Please upload a PDF file."
file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
try:
pdf_chatbot.load_and_process_pdf(file_path)
return f"PDF processed successfully: {file_path}"
except Exception as e:
return f"Error processing PDF: {str(e)}"
def respond(message, history):
if not message:
return "", history
try:
bot_message = pdf_chatbot.chat(message)
history.append((message, bot_message))
return "", history
except Exception as e:
return f"Error: {str(e)}", history
def clear_chatbot():
pdf_chatbot.clear_memory()
return []
# Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# PDF Chatbot")
with gr.Row():
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
upload_button = gr.Button("Process PDF")
upload_status = gr.Textbox(label="Upload Status")
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
chatbot_interface = gr.Chatbot()
msg = gr.Textbox()
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
if __name__ == "__main__":
demo.launch()