Spaces:

bhuvanmdev
/

QA_document

Sleeping

App Files Files Community

bhuvanmdev commited on Nov 24, 2024

Commit

f21ea57

verified ·

1 Parent(s): ada214d

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -10

app.py CHANGED Viewed

@@ -66,11 +66,11 @@ class AdvancedRAGSystem:
         self.context = None
         self.source_documents = 0
-    def _validate_file(self, file_path: Path) -> bool:
         """Validate if the file is of supported format and exists"""
         return file_path.suffix.lower() == DocumentFormat.PDF.value and file_path.exists()
-    def _extract_text_from_pdf(self, pdf_path: Path) -> str:
         """Extract text from a PDF file with proper error handling"""
         try:
             with open(pdf_path, 'rb') as file:
@@ -83,7 +83,7 @@ class AdvancedRAGSystem:
             logger.error(f"Error processing PDF {pdf_path}: {str(e)}")
             raise ValueError(f"Failed to process PDF {pdf_path}: {str(e)}")
-    def _create_document_chunks(self, texts: List[str]) -> List[Any]:
         """Split documents into chunks using the configured parameters"""
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=self.config.chunk_size,
@@ -93,7 +93,7 @@ class AdvancedRAGSystem:
         )
         return text_splitter.create_documents(texts)
-    def process_pdfs(self, pdf_files: List[str]) -> str:
         """Process and index PDF documents with improved error handling"""
         try:
             # Convert to Path objects and validate
@@ -127,17 +127,17 @@ class AdvancedRAGSystem:
             logger.error(error_msg)
             raise RuntimeError(error_msg)
-    def get_retriever(self) -> BaseRetriever:
         """Get the document retriever with current configuration"""
         if not self.vector_store:
             raise RuntimeError("Vector store not initialized. Please process documents first.")
         return self.vector_store.as_retriever(search_kwargs={"k": self.config.retriever_k})
-    def _format_context(self, documents: List[Any]) -> str:
         """Format retrieved documents into a single context string"""
         return "\n\n".join(doc.page_content for doc in documents)
-    def query(self, question: str) -> Dict[str, str]:
         """Query the RAG system with improved error handling and response formatting"""
         try:
             if not self.vector_store:
@@ -186,10 +186,10 @@ Context:
-def create_gradio_interface(rag_system: AdvancedRAGSystem) -> gr.Blocks:
     """Create an improved Gradio interface for the RAG system"""
-    def process_files(files: List[Any], chunk_size: int, overlap: int) -> str:
         """Process uploaded files with updated configuration"""
         if not files:
             return "Please upload PDF files"
@@ -203,7 +203,7 @@ def create_gradio_interface(rag_system: AdvancedRAGSystem) -> gr.Blocks:
         except Exception as e:
             return f"Error: {str(e)}"
-    def query_streaming(question: str) -> Generator[str, None, None]:
         try:
             for response in rag_system.query(question):
                 yield response

         self.context = None
         self.source_documents = 0
+    def _validate_file(self, file_path: Path) :
         """Validate if the file is of supported format and exists"""
         return file_path.suffix.lower() == DocumentFormat.PDF.value and file_path.exists()
+    def _extract_text_from_pdf(self, pdf_path: Path) :
         """Extract text from a PDF file with proper error handling"""
         try:
             with open(pdf_path, 'rb') as file:
             logger.error(f"Error processing PDF {pdf_path}: {str(e)}")
             raise ValueError(f"Failed to process PDF {pdf_path}: {str(e)}")
+    def _create_document_chunks(self, texts: List[str]) :
         """Split documents into chunks using the configured parameters"""
         text_splitter = RecursiveCharacterTextSplitter(
             chunk_size=self.config.chunk_size,
         )
         return text_splitter.create_documents(texts)
+    def process_pdfs(self, pdf_files: List[str]) :
         """Process and index PDF documents with improved error handling"""
         try:
             # Convert to Path objects and validate
             logger.error(error_msg)
             raise RuntimeError(error_msg)
+    def get_retriever(self) :
         """Get the document retriever with current configuration"""
         if not self.vector_store:
             raise RuntimeError("Vector store not initialized. Please process documents first.")
         return self.vector_store.as_retriever(search_kwargs={"k": self.config.retriever_k})
+    def _format_context(self, documents: List[Any]) :
         """Format retrieved documents into a single context string"""
         return "\n\n".join(doc.page_content for doc in documents)
+    def query(self, question: str) :
         """Query the RAG system with improved error handling and response formatting"""
         try:
             if not self.vector_store:
+def create_gradio_interface(rag_system: AdvancedRAGSystem) :
     """Create an improved Gradio interface for the RAG system"""
+    def process_files(files: List[Any], chunk_size: int, overlap: int) :
         """Process uploaded files with updated configuration"""
         if not files:
             return "Please upload PDF files"
         except Exception as e:
             return f"Error: {str(e)}"
+    def query_streaming(question: str) :
         try:
             for response in rag_system.query(question):
                 yield response