|
import os |
|
import gradio as gr |
|
import logging |
|
from langchain.document_loaders import PyPDFLoader |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.embeddings import OpenAIEmbeddings |
|
from langchain.vectorstores import FAISS |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.chains import LLMChain |
|
from langchain.memory import ConversationBufferMemory |
|
from langchain.prompts import PromptTemplate |
|
from PyPDF2 import PdfReader |
|
import spaces |
|
|
|
|
|
class ContextAwareResponseGenerator: |
|
def __init__(self, llm): |
|
self.llm = llm |
|
self.response_prompt = PromptTemplate( |
|
input_variables=['context', 'query', 'chat_history'], |
|
template="""Given the context, query, and chat history, generate the best response that is clear and helpful. Use structured responses in various formats such as Paragraphs, Headlines and subtexts, bullet points, Sections. |
|
|
|
Context: {context} |
|
Query: {query} |
|
Chat History: {chat_history} |
|
|
|
Choose the most appropriate response structure and generate the response directly, without explicit guidance on which format to use. Your response should be based on the query and context provided.""" |
|
) |
|
self.response_chain = LLMChain(llm=self.llm, prompt=self.response_prompt) |
|
@spaces.GPU |
|
def generate_response(self, context, query, chat_history=''): |
|
try: |
|
|
|
response = self.response_chain.run({ |
|
'context': context, |
|
'query': query, |
|
'chat_history': chat_history or "No previous context" |
|
}) |
|
return response.strip() |
|
except Exception as e: |
|
logging.error(f"Response generation error: {e}") |
|
return self._default_response(query) |
|
|
|
def _default_response(self, query): |
|
return f"I couldn't generate a response for: {query}" |
|
|
|
class AdvancedPdfChatbot: |
|
@spaces.GPU |
|
def __init__(self, openai_api_key): |
|
os.environ["OPENAI_API_KEY"] = openai_api_key |
|
self.llm = ChatOpenAI(temperature=0.2, model_name='gpt-4o') |
|
|
|
self.embeddings = OpenAIEmbeddings() |
|
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
|
|
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) |
|
self.response_generator = ContextAwareResponseGenerator(self.llm) |
|
|
|
self.db = None |
|
self.document_context = "" |
|
|
|
@spaces.GPU |
|
def load_and_process_pdf(self, pdf_path): |
|
try: |
|
reader = PdfReader(pdf_path) |
|
metadata = { |
|
"title": reader.metadata.get("/Title", "Untitled"), |
|
"author": reader.metadata.get("/Author", "Unknown") |
|
} |
|
|
|
loader = PyPDFLoader(pdf_path) |
|
documents = loader.load() |
|
texts = self.text_splitter.split_documents(documents) |
|
|
|
self.db = FAISS.from_documents(texts[:50], self.embeddings) |
|
self.document_context = f"Document: {metadata['title']} by {metadata['author']}" |
|
|
|
return True |
|
except Exception as e: |
|
logging.error(f"PDF processing error: {e}") |
|
return False |
|
|
|
@spaces.GPU |
|
def chat(self, query, is_new_question=False): |
|
if not self.db: |
|
return "Please upload a PDF first." |
|
|
|
|
|
chat_history = self.memory.load_memory_variables({}).get('chat_history', []) |
|
|
|
|
|
if is_new_question: |
|
chat_history = [] |
|
|
|
|
|
response = self.response_generator.generate_response( |
|
context=self.document_context, |
|
query=query, |
|
chat_history=str(chat_history) |
|
) |
|
|
|
|
|
self.memory.save_context({"input": query}, {"output": response}) |
|
|
|
return response |
|
|
|
|
|
pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY")) |
|
|
|
def upload_pdf(pdf_file): |
|
if not pdf_file: |
|
return "Upload a PDF file." |
|
file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file |
|
return "PDF processed successfully" if pdf_chatbot.load_and_process_pdf(file_path) else "Processing failed" |
|
|
|
def respond(message, history): |
|
try: |
|
is_new_question = len(history) == 0 |
|
bot_message = pdf_chatbot.chat(message, is_new_question) |
|
history.append((message, bot_message)) |
|
return "", history |
|
except Exception as e: |
|
return f"Error: {e}", history |
|
|
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("# Advanced PDF Chatbot") |
|
with gr.Row(): |
|
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"]) |
|
upload_button = gr.Button("Process PDF") |
|
|
|
upload_status = gr.Textbox(label="Upload Status") |
|
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status]) |
|
|
|
chatbot_interface = gr.Chatbot() |
|
msg = gr.Textbox(placeholder="Enter your query...") |
|
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface]) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|