Pavan178's picture
Update app.py
45bb9f0 verified
raw
history blame
5.73 kB
import os
import gradio as gr
import logging
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
import concurrent.futures
import timeout_decorator
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class QueryRefiner:
def __init__(self):
self.refinement_llm = ChatOpenAI(temperature=0.2, model_name='gpt-3.5-turbo', request_timeout=30)
self.refinement_prompt = PromptTemplate(
input_variables=['query', 'context'],
template="""Refine and enhance the following query for maximum clarity and precision:
Original Query: {query}
Document Context: {context}
Enhanced Query Requirements:
- Clarify any ambiguous terms
- Add specific context-driven details
- Ensure precise information retrieval
- Restructure for optimal comprehension
Refined Query:"""
)
self.refinement_chain = LLMChain(
llm=self.refinement_llm,
prompt=self.refinement_prompt
)
def refine_query(self, original_query, context_hints=''):
try:
refined_query = self.refinement_chain.run({
'query': original_query,
'context': context_hints or "General academic document"
})
return refined_query.strip()
except Exception as e:
logger.error(f"Query refinement error: {e}")
return original_query
class AdvancedPdfChatbot:
def __init__(self, openai_api_key):
os.environ["OPENAI_API_KEY"] = openai_api_key
self.embeddings = OpenAIEmbeddings(request_timeout=30)
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
self.llm = ChatOpenAI(temperature=0, model_name='gpt-4', request_timeout=30)
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
self.query_refiner = QueryRefiner()
self.db = None
self.chain = None
self.qa_prompt = PromptTemplate(
template="""You are an expert academic assistant analyzing a document.
Context: {context}
Question: {question}
Provide a comprehensive, precise answer based strictly on the document's content.
If the answer isn't directly available, explain why. Try to structure your response according to context such as paragraphs or bullet points, headlines and subtexts""",
input_variables=["context", "question"]
)
def load_and_process_pdf(self, pdf_path):
loader = PyPDFLoader(pdf_path)
documents = loader.load()
texts = self.text_splitter.split_documents(documents)
self.db = FAISS.from_documents(texts, self.embeddings)
self.chain = ConversationalRetrievalChain.from_llm(
llm=self.llm,
retriever=self.db.as_retriever(search_kwargs={"k": 3}),
memory=self.memory,
combine_docs_chain_kwargs={"prompt": self.qa_prompt}
)
def chat(self, query):
if not self.chain:
return "Please upload a PDF first."
context_hints = self._extract_document_type()
refined_query = self.query_refiner.refine_query(query, context_hints)
result = self.chain({"question": refined_query})
return result['answer']
def _extract_document_type(self):
"""Extract basic document characteristics"""
if not self.db:
return ""
try:
first_doc = list(self.db.docstore._dict.values())[0].page_content[:500]
return f"Document appears to cover: {first_doc[:100]}..."
except:
return "Academic/technical document"
def clear_memory(self):
self.memory.clear()
# Gradio Interface
pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))
def upload_pdf(pdf_file):
if pdf_file is None:
return "Please upload a PDF file."
file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
try:
pdf_chatbot.load_and_process_pdf(file_path)
return f"PDF processed successfully: {file_path}"
except Exception as e:
logger.error(f"PDF processing error: {e}")
return f"Error processing PDF: {str(e)}"
def respond(message, history):
if not message:
return "", history
try:
bot_message = pdf_chatbot.chat(message)
history.append((message, bot_message))
return "", history
except Exception as e:
logger.error(f"Chat response error: {e}")
return f"Error: {str(e)}", history
def clear_chatbot():
pdf_chatbot.clear_memory()
return []
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# Advanced PDF Chatbot")
with gr.Row():
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
upload_button = gr.Button("Process PDF")
upload_status = gr.Textbox(label="Upload Status")
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
chatbot_interface = gr.Chatbot()
msg = gr.Textbox(placeholder="Enter your query...")
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
clear_button = gr.Button("Clear Conversation")
clear_button.click(clear_chatbot, outputs=[chatbot_interface])
if __name__ == "__main__":
demo.launch()