Last commit not found
import os | |
import gradio as gr | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import OpenAIEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.chat_models import ChatOpenAI | |
from langchain.memory import ConversationBufferMemory | |
from langchain.prompts import PromptTemplate | |
openai_api_key = os.environ.get("OPENAI_API_KEY") | |
class AdvancedPdfChatbot: | |
def __init__(self, openai_api_key): | |
os.environ["OPENAI_API_KEY"] = openai_api_key | |
self.embeddings = OpenAIEmbeddings() | |
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
self.llm = ChatOpenAI(temperature=0,model_name='gpt-4o-mini') | |
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) | |
self.qa_chain = None | |
self.pdf_path = None | |
self.template = """ | |
You are a study partner assistant, students give you pdfs | |
and you help them to answer their questions. | |
Answer the question based on the most recent provided resources only. | |
Give the most relevant answer. | |
Context: {context} | |
Question: {question} | |
Answer: | |
(Note :YOUR OUTPUT IS RENDERED IN PROPER PARAGRAPHS, BULLET POINTS, HEADLINES AND SUBTEXTS IN CLEAN PROFESSIONAL MANNER - this is very important) | |
""" | |
self.prompt = PromptTemplate(template=self.template, input_variables=["context", "question"]) | |
def load_and_process_pdf(self, pdf_path): | |
loader = PyPDFLoader(pdf_path) | |
documents = loader.load() | |
texts = self.text_splitter.split_documents(documents) | |
self.db = FAISS.from_documents(texts, self.embeddings) | |
self.pdf_path = pdf_path | |
self.setup_conversation_chain() | |
def setup_conversation_chain(self): | |
self.qa_chain = ConversationalRetrievalChain.from_llm( | |
self.llm, | |
retriever=self.db.as_retriever(), | |
memory=self.memory, | |
combine_docs_chain_kwargs={"prompt": self.prompt} | |
) | |
def chat(self, query): | |
if not self.qa_chain: | |
return "Please upload a PDF first." | |
result = self.qa_chain({"question": query}) | |
return result['answer'] | |
def get_pdf_path(self): | |
# Return the stored PDF path | |
if self.pdf_path: | |
return self.pdf_path | |
else: | |
return "No PDF uploaded yet." | |
# Initialize the chatbot | |
pdf_chatbot = AdvancedPdfChatbot(openai_api_key) | |
def upload_pdf(pdf_file): | |
if pdf_file is None: | |
return "Please upload a PDF file." | |
file_path = pdf_file.name | |
pdf_chatbot.load_and_process_pdf(file_path) | |
return file_path | |
def respond(message, history): | |
bot_message = pdf_chatbot.chat(message) | |
history.append((message, bot_message)) | |
return "", history | |
def clear_chatbot(): | |
pdf_chatbot.memory.clear() | |
return [] | |
def get_pdf_path(): | |
# Call the method to return the current PDF path | |
return pdf_chatbot.get_pdf_path() | |
# Create the Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# PDF Chatbot") | |
with gr.Row(): | |
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"]) | |
upload_button = gr.Button("Process PDF") | |
upload_status = gr.Textbox(label="Upload Status") | |
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status]) | |
path_button = gr.Button("Get PDF Path") | |
pdf_path_display = gr.Textbox(label="Current PDF Path") | |
chatbot_interface = gr.Chatbot() | |
msg = gr.Textbox() | |
clear = gr.Button("Clear") | |
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface]) | |
clear.click(clear_chatbot, outputs=[chatbot_interface]) | |
path_button.click(get_pdf_path, outputs=[pdf_path_display]) | |
if __name__ == "__main__": | |
demo.launch() | |