File size: 4,844 Bytes
c2af1e5 d2e3c7f 28f9d4d d2e3c7f bd0ebd6 d43bb1b 633ac28 d43bb1b aff3a65 a526ade d2e3c7f 0a080de 355b657 d2e3c7f ff0e62c 355b657 80803b0 741c69d 80803b0 741c69d 355b657 33831c4 f40cac0 355b657 5e8e8f0 d2e3c7f f74eb2e ff0e62c d2e3c7f 510767a 355b657 d2e3c7f ff0e62c d2e3c7f 5afc751 d2e3c7f f74eb2e ff0e62c d2e3c7f f74eb2e d2e3c7f 5e8e8f0 d2e3c7f 596dcf4 0ae5df7 d2e3c7f 5e8e8f0 d2e3c7f f74eb2e ff0e62c 5e8e8f0 d2e3c7f bd0ebd6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import os
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
openai_api_key = os.environ.get("OPENAI_API_KEY")
class AdvancedPdfChatbot:
def __init__(self, openai_api_key):
os.environ["OPENAI_API_KEY"] = openai_api_key
self.embeddings = OpenAIEmbeddings()
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
self.llm = ChatOpenAI(temperature=0,model_name='gpt-4o-mini')
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
self.qa_chain = None
self.pdf_path = None
self.template = """
You are a study partner assistant, students give you pdfs
and you help them to answer their questions.
Answer the question based on the most recent provided resources only.
Give the most relevant answer.
Instructions:
Use given source for Context: Generate responses using only the provided content.
Cite Sources: Reference content using [page: paragraph] or [page: line] format.
Address Multiple Subjects: If the query relates to multiple subjects with the same name, provide distinct responses for each.
Relevance Only: Exclude irrelevant or outlier information.
Keep it Concise: Provide clear, direct, and descriptive answers, answer in great details when needed and keep short responses when needed.
No Guesswork: Do not generate information beyond the given content.
No Match: If no relevant content is found, reply with: "No relevant information found.
Add comprehensive details and break down the responses into parts whenever needed.
Context: {context}
Question: {question}
Answer:
(Note :YOUR OUTPUT IS RENDERED IN PROPER PARAGRAPHS or BULLET POINTS when needed, modify the response formats as needed, only choose the formats based on the type of question asked)
"""
self.prompt = PromptTemplate(template=self.template, input_variables=["context", "question"])
def load_and_process_pdf(self, pdf_path):
loader = PyPDFLoader(pdf_path)
documents = loader.load()
texts = self.text_splitter.split_documents(documents)
self.db = FAISS.from_documents(texts, self.embeddings)
self.pdf_path = pdf_path
self.setup_conversation_chain()
def setup_conversation_chain(self):
self.qa_chain = ConversationalRetrievalChain.from_llm(
self.llm,
retriever=self.db.as_retriever(),
memory=self.memory,
combine_docs_chain_kwargs={"prompt": self.prompt}
)
def chat(self, query):
if not self.qa_chain:
return "Please upload a PDF first."
result = self.qa_chain({"question": query})
return result['answer']
def get_pdf_path(self):
# Return the stored PDF path
if self.pdf_path:
return self.pdf_path
else:
return "No PDF uploaded yet."
# Initialize the chatbot
pdf_chatbot = AdvancedPdfChatbot(openai_api_key)
def upload_pdf(pdf_file):
if pdf_file is None:
return "Please upload a PDF file."
file_path = pdf_file.name
pdf_chatbot.load_and_process_pdf(file_path)
return file_path
def respond(message, history):
bot_message = pdf_chatbot.chat(message)
history.append((message, bot_message))
return "", history
def clear_chatbot():
pdf_chatbot.memory.clear()
return []
def get_pdf_path():
# Call the method to return the current PDF path
return pdf_chatbot.get_pdf_path()
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# PDF Chatbot")
with gr.Row():
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
upload_button = gr.Button("Process PDF")
upload_status = gr.Textbox(label="Upload Status")
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
path_button = gr.Button("Get PDF Path")
pdf_path_display = gr.Textbox(label="Current PDF Path")
chatbot_interface = gr.Chatbot()
msg = gr.Textbox()
clear = gr.Button("Clear")
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
clear.click(clear_chatbot, outputs=[chatbot_interface])
path_button.click(get_pdf_path, outputs=[pdf_path_display])
if __name__ == "__main__":
demo.launch()
|