File size: 5,079 Bytes
d8c3a88 d2e3c7f 4277202 d2e3c7f 6c5c0ad 6a6fbcd d2e3c7f d43bb1b 75fd4bb 4b219d0 58bf31d 75fd4bb 58bf31d 75fd4bb 58bf31d 75fd4bb 58bf31d 75fd4bb 58bf31d 75fd4bb 58bf31d 75fd4bb 58bf31d 75fd4bb 58bf31d e8434ee 58bf31d e8434ee 58bf31d e8434ee 6a6fbcd 58bf31d e8434ee 6a6fbcd d2e3c7f 58bf31d 75fd4bb 1e82c8e 58bf31d 355b657 d2e3c7f 58bf31d 75fd4bb 3f31c68 58bf31d 5e8e8f0 d2e3c7f 7f36a98 75fd4bb 58bf31d 75fd4bb 58bf31d 7f36a98 58bf31d 7f36a98 75fd4bb 7f36a98 58bf31d 75fd4bb f8d8d78 d2e3c7f 58bf31d 75fd4bb 58bf31d 75fd4bb 58bf31d ccff99d 58bf31d 75fd4bb 58bf31d d2e3c7f 58bf31d 75fd4bb 6a6fbcd 75fd4bb 7f36a98 75fd4bb 7f36a98 6a6fbcd d2e3c7f 6a6fbcd d2e3c7f 5e8e8f0 d2e3c7f 58bf31d d2e3c7f 6a6fbcd d2e3c7f 5e8e8f0 d2e3c7f 58bf31d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
import os
import gradio as gr
import logging
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from PyPDF2 import PdfReader
class ContextAwareResponseGenerator:
def __init__(self, llm):
self.llm = llm
self.response_prompt = PromptTemplate(
input_variables=['context', 'query', 'chat_history'],
template="""Analyze the context, query, and chat history to generate an optimal response:
Context: {context}
Query: {query}
Chat History: {chat_history}
Response Structure Selection Criteria:
1. Technical academic breakdown
2. Concise summary with key points
3. Markdown with hierarchical insights
4. Narrative explanation
5. Comparative analysis
Choose the most appropriate response structure (1-5) and generate the response accordingly:"""
)
self.response_chain = LLMChain(llm=self.llm, prompt=self.response_prompt)
def generate_response(self, context, query, chat_history=''):
try:
# Generate structured response
full_response = self.response_chain.run({
'context': context,
'query': query,
'chat_history': chat_history or "No previous context"
})
# Extract only the actual response content (after the structure selection)
response_content = full_response[1:].strip()
return response_content
except Exception as e:
logging.error(f"Response generation error: {e}")
return f"I couldn't generate a response for: {query}"
class AdvancedPdfChatbot:
def __init__(self, openai_api_key):
os.environ["OPENAI_API_KEY"] = openai_api_key
self.llm = ChatOpenAI(temperature=0.2, model_name='gpt-4o')
self.embeddings = OpenAIEmbeddings()
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
self.response_generator = ContextAwareResponseGenerator(self.llm)
self.db = None
self.document_context = ""
def load_and_process_pdf(self, pdf_path):
try:
reader = PdfReader(pdf_path)
metadata = {
"title": reader.metadata.get("/Title", "Untitled"),
"author": reader.metadata.get("/Author", "Unknown")
}
loader = PyPDFLoader(pdf_path)
documents = loader.load()
texts = self.text_splitter.split_documents(documents)
self.db = FAISS.from_documents(texts[:50], self.embeddings)
self.document_context = f"Document: {metadata['title']} by {metadata['author']}"
return True
except Exception as e:
logging.error(f"PDF processing error: {e}")
return False
def chat(self, query):
if not self.db:
return "Please upload a PDF first."
# Retrieve chat history
chat_history = self.memory.load_memory_variables({}).get('chat_history', [])
# Generate context-aware response
response = self.response_generator.generate_response(
context=self.document_context,
query=query,
chat_history=str(chat_history)
)
# Store conversation in memory
self.memory.save_context({"input": query}, {"output": response})
return response
# Gradio Interface
pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))
def upload_pdf(pdf_file):
if not pdf_file:
return "Upload a PDF file."
file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
return "PDF processed successfully" if pdf_chatbot.load_and_process_pdf(file_path) else "Processing failed"
def respond(message, history):
try:
bot_message = pdf_chatbot.chat(message)
history.append((message, bot_message))
return "", history
except Exception as e:
return f"Error: {e}", history
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# Advanced PDF Chatbot")
with gr.Row():
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
upload_button = gr.Button("Process PDF")
upload_status = gr.Textbox(label="Upload Status")
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
chatbot_interface = gr.Chatbot()
msg = gr.Textbox(placeholder="Enter your query...")
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
if __name__ == "__main__":
demo.launch() |