File size: 5,466 Bytes
d8c3a88 d2e3c7f 4277202 d2e3c7f 6c5c0ad 343938a d2e3c7f d43bb1b 75fd4bb 58027e2 4b219d0 58bf31d 75fd4bb 58bf31d a23d4e3 58bf31d 75fd4bb 58bf31d 75fd4bb 343938a 75fd4bb 58bf31d 58027e2 58bf31d 75fd4bb 343938a b10e9f4 58bf31d 343938a 6a6fbcd 58bf31d b10e9f4 6a6fbcd d2e3c7f 58027e2 d2e3c7f 298792b 75fd4bb 1e82c8e 58bf31d 355b657 d2e3c7f 58bf31d 75fd4bb 3f31c68 58bf31d 58027e2 d2e3c7f 7f36a98 75fd4bb 58bf31d 75fd4bb 58bf31d 7f36a98 58bf31d 7f36a98 75fd4bb 7f36a98 58bf31d 75fd4bb 58027e2 831b4a5 58bf31d 75fd4bb 58bf31d 75fd4bb 831b4a5 58bf31d ccff99d 58bf31d 75fd4bb 58bf31d d2e3c7f 58bf31d 75fd4bb 6a6fbcd 75fd4bb 7f36a98 75fd4bb 831b4a5 75fd4bb 7f36a98 6a6fbcd d2e3c7f 6a6fbcd d2e3c7f 5e8e8f0 d2e3c7f 58bf31d d2e3c7f 6a6fbcd d2e3c7f 5e8e8f0 d2e3c7f b10e9f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import os
import gradio as gr
import logging
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate
from PyPDF2 import PdfReader
import spaces
class ContextAwareResponseGenerator:
def __init__(self, llm):
self.llm = llm
self.response_prompt = PromptTemplate(
input_variables=['context', 'query', 'chat_history'],
template="""Given the context, query, and chat history, generate the best response that is clear and helpful. Use structured responses in various formats such as Paragraphs, Headlines and subtexts, bullet points, Sections.
Context: {context}
Query: {query}
Chat History: {chat_history}
Choose the most appropriate response structure and generate the response directly, without explicit guidance on which format to use. Your response should be based on the query and context provided."""
)
self.response_chain = LLMChain(llm=self.llm, prompt=self.response_prompt)
@spaces.GPU
def generate_response(self, context, query, chat_history=''):
try:
# Generate the response content with structure handled by the LLM itself
response = self.response_chain.run({
'context': context,
'query': query,
'chat_history': chat_history or "No previous context"
})
return response.strip() # LLM decides on the structure
except Exception as e:
logging.error(f"Response generation error: {e}")
return self._default_response(query)
def _default_response(self, query):
return f"I couldn't generate a response for: {query}"
class AdvancedPdfChatbot:
@spaces.GPU
def __init__(self, openai_api_key):
os.environ["OPENAI_API_KEY"] = openai_api_key
self.llm = ChatOpenAI(temperature=0.2, model_name='gpt-4o')
self.embeddings = OpenAIEmbeddings()
self.text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
self.memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
self.response_generator = ContextAwareResponseGenerator(self.llm)
self.db = None
self.document_context = ""
@spaces.GPU
def load_and_process_pdf(self, pdf_path):
try:
reader = PdfReader(pdf_path)
metadata = {
"title": reader.metadata.get("/Title", "Untitled"),
"author": reader.metadata.get("/Author", "Unknown")
}
loader = PyPDFLoader(pdf_path)
documents = loader.load()
texts = self.text_splitter.split_documents(documents)
self.db = FAISS.from_documents(texts[:50], self.embeddings)
self.document_context = f"Document: {metadata['title']} by {metadata['author']}"
return True
except Exception as e:
logging.error(f"PDF processing error: {e}")
return False
@spaces.GPU
def chat(self, query, is_new_question=False):
if not self.db:
return "Please upload a PDF first."
# Retrieve chat history
chat_history = self.memory.load_memory_variables({}).get('chat_history', [])
# Reset chat history for new questions
if is_new_question:
chat_history = [] # For new questions, reset the chat history
# Generate context-aware response
response = self.response_generator.generate_response(
context=self.document_context,
query=query,
chat_history=str(chat_history)
)
# Store conversation in memory
self.memory.save_context({"input": query}, {"output": response})
return response
# Gradio Interface
pdf_chatbot = AdvancedPdfChatbot(os.environ.get("OPENAI_API_KEY"))
def upload_pdf(pdf_file):
if not pdf_file:
return "Upload a PDF file."
file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
return "PDF processed successfully" if pdf_chatbot.load_and_process_pdf(file_path) else "Processing failed"
def respond(message, history):
try:
is_new_question = len(history) == 0 # If history is empty, it's a new question
bot_message = pdf_chatbot.chat(message, is_new_question)
history.append((message, bot_message))
return "", history
except Exception as e:
return f"Error: {e}", history
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("# Advanced PDF Chatbot")
with gr.Row():
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
upload_button = gr.Button("Process PDF")
upload_status = gr.Textbox(label="Upload Status")
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
chatbot_interface = gr.Chatbot()
msg = gr.Textbox(placeholder="Enter your query...")
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
if __name__ == "__main__":
demo.launch()
|