bhuvanmdev commited on
Commit
f21ea57
·
verified ·
1 Parent(s): ada214d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -66,11 +66,11 @@ class AdvancedRAGSystem:
66
  self.context = None
67
  self.source_documents = 0
68
 
69
- def _validate_file(self, file_path: Path) -> bool:
70
  """Validate if the file is of supported format and exists"""
71
  return file_path.suffix.lower() == DocumentFormat.PDF.value and file_path.exists()
72
 
73
- def _extract_text_from_pdf(self, pdf_path: Path) -> str:
74
  """Extract text from a PDF file with proper error handling"""
75
  try:
76
  with open(pdf_path, 'rb') as file:
@@ -83,7 +83,7 @@ class AdvancedRAGSystem:
83
  logger.error(f"Error processing PDF {pdf_path}: {str(e)}")
84
  raise ValueError(f"Failed to process PDF {pdf_path}: {str(e)}")
85
 
86
- def _create_document_chunks(self, texts: List[str]) -> List[Any]:
87
  """Split documents into chunks using the configured parameters"""
88
  text_splitter = RecursiveCharacterTextSplitter(
89
  chunk_size=self.config.chunk_size,
@@ -93,7 +93,7 @@ class AdvancedRAGSystem:
93
  )
94
  return text_splitter.create_documents(texts)
95
 
96
- def process_pdfs(self, pdf_files: List[str]) -> str:
97
  """Process and index PDF documents with improved error handling"""
98
  try:
99
  # Convert to Path objects and validate
@@ -127,17 +127,17 @@ class AdvancedRAGSystem:
127
  logger.error(error_msg)
128
  raise RuntimeError(error_msg)
129
 
130
- def get_retriever(self) -> BaseRetriever:
131
  """Get the document retriever with current configuration"""
132
  if not self.vector_store:
133
  raise RuntimeError("Vector store not initialized. Please process documents first.")
134
  return self.vector_store.as_retriever(search_kwargs={"k": self.config.retriever_k})
135
 
136
- def _format_context(self, documents: List[Any]) -> str:
137
  """Format retrieved documents into a single context string"""
138
  return "\n\n".join(doc.page_content for doc in documents)
139
 
140
- def query(self, question: str) -> Dict[str, str]:
141
  """Query the RAG system with improved error handling and response formatting"""
142
  try:
143
  if not self.vector_store:
@@ -186,10 +186,10 @@ Context:
186
 
187
 
188
 
189
- def create_gradio_interface(rag_system: AdvancedRAGSystem) -> gr.Blocks:
190
  """Create an improved Gradio interface for the RAG system"""
191
 
192
- def process_files(files: List[Any], chunk_size: int, overlap: int) -> str:
193
  """Process uploaded files with updated configuration"""
194
  if not files:
195
  return "Please upload PDF files"
@@ -203,7 +203,7 @@ def create_gradio_interface(rag_system: AdvancedRAGSystem) -> gr.Blocks:
203
  except Exception as e:
204
  return f"Error: {str(e)}"
205
 
206
- def query_streaming(question: str) -> Generator[str, None, None]:
207
  try:
208
  for response in rag_system.query(question):
209
  yield response
 
66
  self.context = None
67
  self.source_documents = 0
68
 
69
+ def _validate_file(self, file_path: Path) :
70
  """Validate if the file is of supported format and exists"""
71
  return file_path.suffix.lower() == DocumentFormat.PDF.value and file_path.exists()
72
 
73
+ def _extract_text_from_pdf(self, pdf_path: Path) :
74
  """Extract text from a PDF file with proper error handling"""
75
  try:
76
  with open(pdf_path, 'rb') as file:
 
83
  logger.error(f"Error processing PDF {pdf_path}: {str(e)}")
84
  raise ValueError(f"Failed to process PDF {pdf_path}: {str(e)}")
85
 
86
+ def _create_document_chunks(self, texts: List[str]) :
87
  """Split documents into chunks using the configured parameters"""
88
  text_splitter = RecursiveCharacterTextSplitter(
89
  chunk_size=self.config.chunk_size,
 
93
  )
94
  return text_splitter.create_documents(texts)
95
 
96
+ def process_pdfs(self, pdf_files: List[str]) :
97
  """Process and index PDF documents with improved error handling"""
98
  try:
99
  # Convert to Path objects and validate
 
127
  logger.error(error_msg)
128
  raise RuntimeError(error_msg)
129
 
130
+ def get_retriever(self) :
131
  """Get the document retriever with current configuration"""
132
  if not self.vector_store:
133
  raise RuntimeError("Vector store not initialized. Please process documents first.")
134
  return self.vector_store.as_retriever(search_kwargs={"k": self.config.retriever_k})
135
 
136
+ def _format_context(self, documents: List[Any]) :
137
  """Format retrieved documents into a single context string"""
138
  return "\n\n".join(doc.page_content for doc in documents)
139
 
140
+ def query(self, question: str) :
141
  """Query the RAG system with improved error handling and response formatting"""
142
  try:
143
  if not self.vector_store:
 
186
 
187
 
188
 
189
+ def create_gradio_interface(rag_system: AdvancedRAGSystem) :
190
  """Create an improved Gradio interface for the RAG system"""
191
 
192
+ def process_files(files: List[Any], chunk_size: int, overlap: int) :
193
  """Process uploaded files with updated configuration"""
194
  if not files:
195
  return "Please upload PDF files"
 
203
  except Exception as e:
204
  return f"Error: {str(e)}"
205
 
206
+ def query_streaming(question: str) :
207
  try:
208
  for response in rag_system.query(question):
209
  yield response