Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -66,11 +66,11 @@ class AdvancedRAGSystem:
|
|
66 |
self.context = None
|
67 |
self.source_documents = 0
|
68 |
|
69 |
-
def _validate_file(self, file_path: Path)
|
70 |
"""Validate if the file is of supported format and exists"""
|
71 |
return file_path.suffix.lower() == DocumentFormat.PDF.value and file_path.exists()
|
72 |
|
73 |
-
def _extract_text_from_pdf(self, pdf_path: Path)
|
74 |
"""Extract text from a PDF file with proper error handling"""
|
75 |
try:
|
76 |
with open(pdf_path, 'rb') as file:
|
@@ -83,7 +83,7 @@ class AdvancedRAGSystem:
|
|
83 |
logger.error(f"Error processing PDF {pdf_path}: {str(e)}")
|
84 |
raise ValueError(f"Failed to process PDF {pdf_path}: {str(e)}")
|
85 |
|
86 |
-
def _create_document_chunks(self, texts: List[str])
|
87 |
"""Split documents into chunks using the configured parameters"""
|
88 |
text_splitter = RecursiveCharacterTextSplitter(
|
89 |
chunk_size=self.config.chunk_size,
|
@@ -93,7 +93,7 @@ class AdvancedRAGSystem:
|
|
93 |
)
|
94 |
return text_splitter.create_documents(texts)
|
95 |
|
96 |
-
def process_pdfs(self, pdf_files: List[str])
|
97 |
"""Process and index PDF documents with improved error handling"""
|
98 |
try:
|
99 |
# Convert to Path objects and validate
|
@@ -127,17 +127,17 @@ class AdvancedRAGSystem:
|
|
127 |
logger.error(error_msg)
|
128 |
raise RuntimeError(error_msg)
|
129 |
|
130 |
-
def get_retriever(self)
|
131 |
"""Get the document retriever with current configuration"""
|
132 |
if not self.vector_store:
|
133 |
raise RuntimeError("Vector store not initialized. Please process documents first.")
|
134 |
return self.vector_store.as_retriever(search_kwargs={"k": self.config.retriever_k})
|
135 |
|
136 |
-
def _format_context(self, documents: List[Any])
|
137 |
"""Format retrieved documents into a single context string"""
|
138 |
return "\n\n".join(doc.page_content for doc in documents)
|
139 |
|
140 |
-
def query(self, question: str)
|
141 |
"""Query the RAG system with improved error handling and response formatting"""
|
142 |
try:
|
143 |
if not self.vector_store:
|
@@ -186,10 +186,10 @@ Context:
|
|
186 |
|
187 |
|
188 |
|
189 |
-
def create_gradio_interface(rag_system: AdvancedRAGSystem)
|
190 |
"""Create an improved Gradio interface for the RAG system"""
|
191 |
|
192 |
-
def process_files(files: List[Any], chunk_size: int, overlap: int)
|
193 |
"""Process uploaded files with updated configuration"""
|
194 |
if not files:
|
195 |
return "Please upload PDF files"
|
@@ -203,7 +203,7 @@ def create_gradio_interface(rag_system: AdvancedRAGSystem) -> gr.Blocks:
|
|
203 |
except Exception as e:
|
204 |
return f"Error: {str(e)}"
|
205 |
|
206 |
-
def query_streaming(question: str)
|
207 |
try:
|
208 |
for response in rag_system.query(question):
|
209 |
yield response
|
|
|
66 |
self.context = None
|
67 |
self.source_documents = 0
|
68 |
|
69 |
+
def _validate_file(self, file_path: Path) :
|
70 |
"""Validate if the file is of supported format and exists"""
|
71 |
return file_path.suffix.lower() == DocumentFormat.PDF.value and file_path.exists()
|
72 |
|
73 |
+
def _extract_text_from_pdf(self, pdf_path: Path) :
|
74 |
"""Extract text from a PDF file with proper error handling"""
|
75 |
try:
|
76 |
with open(pdf_path, 'rb') as file:
|
|
|
83 |
logger.error(f"Error processing PDF {pdf_path}: {str(e)}")
|
84 |
raise ValueError(f"Failed to process PDF {pdf_path}: {str(e)}")
|
85 |
|
86 |
+
def _create_document_chunks(self, texts: List[str]) :
|
87 |
"""Split documents into chunks using the configured parameters"""
|
88 |
text_splitter = RecursiveCharacterTextSplitter(
|
89 |
chunk_size=self.config.chunk_size,
|
|
|
93 |
)
|
94 |
return text_splitter.create_documents(texts)
|
95 |
|
96 |
+
def process_pdfs(self, pdf_files: List[str]) :
|
97 |
"""Process and index PDF documents with improved error handling"""
|
98 |
try:
|
99 |
# Convert to Path objects and validate
|
|
|
127 |
logger.error(error_msg)
|
128 |
raise RuntimeError(error_msg)
|
129 |
|
130 |
+
def get_retriever(self) :
|
131 |
"""Get the document retriever with current configuration"""
|
132 |
if not self.vector_store:
|
133 |
raise RuntimeError("Vector store not initialized. Please process documents first.")
|
134 |
return self.vector_store.as_retriever(search_kwargs={"k": self.config.retriever_k})
|
135 |
|
136 |
+
def _format_context(self, documents: List[Any]) :
|
137 |
"""Format retrieved documents into a single context string"""
|
138 |
return "\n\n".join(doc.page_content for doc in documents)
|
139 |
|
140 |
+
def query(self, question: str) :
|
141 |
"""Query the RAG system with improved error handling and response formatting"""
|
142 |
try:
|
143 |
if not self.vector_store:
|
|
|
186 |
|
187 |
|
188 |
|
189 |
+
def create_gradio_interface(rag_system: AdvancedRAGSystem) :
|
190 |
"""Create an improved Gradio interface for the RAG system"""
|
191 |
|
192 |
+
def process_files(files: List[Any], chunk_size: int, overlap: int) :
|
193 |
"""Process uploaded files with updated configuration"""
|
194 |
if not files:
|
195 |
return "Please upload PDF files"
|
|
|
203 |
except Exception as e:
|
204 |
return f"Error: {str(e)}"
|
205 |
|
206 |
+
def query_streaming(question: str) :
|
207 |
try:
|
208 |
for response in rag_system.query(question):
|
209 |
yield response
|