Pavan178's picture
Update app.py
831b4a5 verified
raw
history blame
5.26 kB
import os
import gradio as gr
import logging
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from PyPDF2 import PdfReader
class ContextAwareResponseGenerator:
def __init__(self, llm):
self.llm = llm
self.response_prompt = PromptTemplate(
input_variables=['context', 'query', 'chat_history'],
template="""Given the context, query, and chat history, generate the best response that is clear and helpful.
Context: {context}
Query: {query}
Chat History: {chat_history}
Choose the most appropriate response structure and generate the response directly, without explicit guidance on which format to use. Your response should be based on the query and context provided."""
)
self.response_chain = LLMChain(llm=self.llm, prompt=self.response_prompt)
def generate_response(self, context, query, chat_history=''):
try:
# Generate the response content with structure handled by the LLM itself
response = self.response_chain.run({
'context': context,
'query': query,
'chat_history': chat_history or "No previous context"
})
return response.strip() # LLM decides on the structure
except Exception as e:
logging.error(f"Response generation error: {e}")
return self._default_response(query)
def _default_response(self, query):
return f"I couldn't generate a response for: {query}"
class AdvancedPdfChatbot:
def __init__(self, openai_api_key):
os.environ["OPENAI_API_KEY"] = openai_api_key
self.llm = ChatOpenAI(temperature=0.2, model_name='gpt-4o')
self.embeddings = OpenAIEmbeddings()
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
self.response_generator = ContextAwareResponseGenerator(self.llm)
self.db = None
self.document_context = ""
def load_and_process_pdf(self, pdf_path):
try:
reader = PdfReader(pdf_path)
metadata = {
"title": reader.metadata.get("/Title", "Untitled"),
"author": reader.metadata.get("/Author", "Unknown")
}
loader = PyPDFLoader(pdf_path)
documents = loader.load()
texts = self.text_splitter.split_documents(documents)
self.db = FAISS.from_documents(texts[:50], self.embeddings)
self.document_context = f"Document: {metadata['title']} by {metadata['author']}"
return True
except Exception as e:
logging.error(f"PDF processing error: {e}")
return False
def chat(self, query, is_new_question=False):
if not self.db:
return "Please upload a PDF first."
# Retrieve chat history
chat_history = self.memory.load_memory_variables({}).get('chat_history', [])
# Reset chat history for new questions
if is_new_question:
chat_history = [] # For new questions, reset the chat history
# Generate context-aware response
response = self.response_generator.generate_response(
context=self.document_context,
query=query,
chat_history=str(chat_history)
)
# Store conversation in memory
self.memory.save_context({"input": query}, {"output": response})
return response
# Gradio Interface
pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))
def upload_pdf(pdf_file):
if not pdf_file:
return "Upload a PDF file."
file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
return "PDF processed successfully" if pdf_chatbot.load_and_process_pdf(file_path) else "Processing failed"
def respond(message, history):
try:
is_new_question = len(history) == 0 # If history is empty, it's a new question
bot_message = pdf_chatbot.chat(message, is_new_question)
history.append((message, bot_message))
return "", history
except Exception as e:
return f"Error: {e}", history
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# Advanced PDF Chatbot")
with gr.Row():
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
upload_button = gr.Button("Process PDF")
upload_status = gr.Textbox(label="Upload Status")
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
chatbot_interface = gr.Chatbot()
msg = gr.Textbox(placeholder="Enter your query...")
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
if __name__ == "__main__":
demo.launch()