Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ from langchain.chains import ConversationalRetrievalChain, LLMChain
|
|
10 |
from langchain.memory import ConversationBufferMemory
|
11 |
from langchain.prompts import PromptTemplate
|
12 |
import concurrent.futures
|
13 |
-
|
14 |
from PyPDF2 import PdfReader # New import for PDF metadata extraction
|
15 |
|
16 |
# Configure logging
|
@@ -92,7 +92,9 @@ If the answer isn't directly available, explain why. """,
|
|
92 |
# Extract document context and store it in memory
|
93 |
document_context = self._extract_document_type()
|
94 |
logger.info(f"Extracted document context: {document_context}")
|
95 |
-
|
|
|
|
|
96 |
|
97 |
except Exception as e:
|
98 |
logger.error(f"PDF processing error: {e}")
|
@@ -144,7 +146,6 @@ If the answer isn't directly available, explain why. """,
|
|
144 |
def _extract_headings(self, text):
|
145 |
"""Extract headings from the first document's content"""
|
146 |
try:
|
147 |
-
# Simple heuristic: Extract lines with uppercase or title-case words (like headings)
|
148 |
headings = [line for line in text.split("\n") if line.strip().istitle()]
|
149 |
return ', '.join(headings[:5]) # Return the first 5 headings
|
150 |
except Exception as e:
|
@@ -186,18 +187,15 @@ def clear_chatbot():
|
|
186 |
# Gradio UI
|
187 |
with gr.Blocks() as demo:
|
188 |
gr.Markdown("# Advanced PDF Chatbot")
|
189 |
-
|
190 |
with gr.Row():
|
191 |
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
|
192 |
upload_button = gr.Button("Process PDF")
|
193 |
|
194 |
upload_status = gr.Textbox(label="Upload Status")
|
195 |
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
|
196 |
-
|
197 |
chatbot_interface = gr.Chatbot()
|
198 |
msg = gr.Textbox(placeholder="Enter your query...")
|
199 |
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
|
200 |
-
|
201 |
clear_button = gr.Button("Clear Conversation")
|
202 |
clear_button.click(clear_chatbot, outputs=[chatbot_interface])
|
203 |
|
|
|
10 |
from langchain.memory import ConversationBufferMemory
|
11 |
from langchain.prompts import PromptTemplate
|
12 |
import concurrent.futures
|
13 |
+
import timeout_decorator
|
14 |
from PyPDF2 import PdfReader # New import for PDF metadata extraction
|
15 |
|
16 |
# Configure logging
|
|
|
92 |
# Extract document context and store it in memory
|
93 |
document_context = self._extract_document_type()
|
94 |
logger.info(f"Extracted document context: {document_context}")
|
95 |
+
|
96 |
+
# Save document context in memory properly
|
97 |
+
self.memory.save_context({"input": "System"}, {"output": f"Document context: {document_context}"})
|
98 |
|
99 |
except Exception as e:
|
100 |
logger.error(f"PDF processing error: {e}")
|
|
|
146 |
def _extract_headings(self, text):
|
147 |
"""Extract headings from the first document's content"""
|
148 |
try:
|
|
|
149 |
headings = [line for line in text.split("\n") if line.strip().istitle()]
|
150 |
return ', '.join(headings[:5]) # Return the first 5 headings
|
151 |
except Exception as e:
|
|
|
187 |
# Gradio UI
|
188 |
with gr.Blocks() as demo:
|
189 |
gr.Markdown("# Advanced PDF Chatbot")
|
|
|
190 |
with gr.Row():
|
191 |
pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
|
192 |
upload_button = gr.Button("Process PDF")
|
193 |
|
194 |
upload_status = gr.Textbox(label="Upload Status")
|
195 |
upload_button.click(upload_pdf, inputs=[pdf_upload], outputs=[upload_status])
|
|
|
196 |
chatbot_interface = gr.Chatbot()
|
197 |
msg = gr.Textbox(placeholder="Enter your query...")
|
198 |
msg.submit(respond, inputs=[msg, chatbot_interface], outputs=[msg, chatbot_interface])
|
|
|
199 |
clear_button = gr.Button("Clear Conversation")
|
200 |
clear_button.click(clear_chatbot, outputs=[chatbot_interface])
|
201 |
|