Spaces:

TalatMasud
/

chatbot-backend

Running

TalatMasood commited on Feb 14

Commit

be32fd8

1 Parent(s): aee2bfd

Changes to be committed:

modified: chroma/5c23c332-257c-4409-8a58-767cdd7c3dea/length.bin
modified: chroma/chroma.sqlite3
modified: config/__pycache__/config.cpython-312.pyc
modified: src/__pycache__/main.cpython-312.pyc
modified: src/agents/__pycache__/rag_agent.cpython-312.pyc
modified: src/agents/__pycache__/system_instructions_rag.cpython-312.pyc
modified: src/agents/rag_agent.py
modified: src/agents/system_instructions_rag.py
modified: src/utils/__pycache__/drive_document_processor.cpython-312.pyc
modified: src/utils/__pycache__/enhanced_excel_processor.cpython-312.pyc
modified: src/utils/__pycache__/google_drive_service.cpython-312.pyc
modified: src/utils/drive_document_processor.py
modified: src/utils/enhanced_excel_processor.py
modified: src/utils/google_drive_service.py
modified: src/vectorstores/__pycache__/chroma_vectorstore.cpython-312.pyc
modified: src/vectorstores/chroma_vectorstore.py
new file: temp_downloads/17he27jN4louYr1xOYASf4BP2e-tGTICt.xlsx
new file: temp_downloads/1K608-Qr03M6nf5FhB6AajbHm8kjQujx1.xlsx

Enhanced the Support for Excel sheets

Files changed (16) hide show

config/__pycache__/config.cpython-312.pyc +0 -0
src/__pycache__/main.cpython-312.pyc +0 -0
src/agents/__pycache__/rag_agent.cpython-312.pyc +0 -0
src/agents/__pycache__/system_instructions_rag.cpython-312.pyc +0 -0
src/agents/rag_agent.py +114 -40
src/agents/system_instructions_rag.py +22 -5
src/utils/__pycache__/drive_document_processor.cpython-312.pyc +0 -0
src/utils/__pycache__/enhanced_excel_processor.cpython-312.pyc +0 -0
src/utils/__pycache__/google_drive_service.cpython-312.pyc +0 -0
src/utils/drive_document_processor.py +105 -74
src/utils/enhanced_excel_processor.py +151 -77
src/utils/google_drive_service.py +49 -11
src/vectorstores/__pycache__/chroma_vectorstore.cpython-312.pyc +0 -0
src/vectorstores/chroma_vectorstore.py +42 -22
temp_downloads/17he27jN4louYr1xOYASf4BP2e-tGTICt.xlsx +0 -0
temp_downloads/1K608-Qr03M6nf5FhB6AajbHm8kjQujx1.xlsx +0 -0

config/__pycache__/config.cpython-312.pyc CHANGED Viewed

Binary files a/config/__pycache__/config.cpython-312.pyc and b/config/__pycache__/config.cpython-312.pyc differ

src/__pycache__/main.cpython-312.pyc CHANGED Viewed

Binary files a/src/__pycache__/main.cpython-312.pyc and b/src/__pycache__/main.cpython-312.pyc differ

src/agents/__pycache__/rag_agent.cpython-312.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/rag_agent.cpython-312.pyc and b/src/agents/__pycache__/rag_agent.cpython-312.pyc differ

src/agents/__pycache__/system_instructions_rag.cpython-312.pyc CHANGED Viewed

Binary files a/src/agents/__pycache__/system_instructions_rag.cpython-312.pyc and b/src/agents/__pycache__/system_instructions_rag.cpython-312.pyc differ

src/agents/rag_agent.py CHANGED Viewed

@@ -45,85 +45,156 @@ class RAGAgent(ExcelAwareRAGAgent):
     async def generate_response(
         self,
         query: str,
-        conversation_id: Optional[str] = None,
-        temperature: float = 0.7,
         max_tokens: Optional[int] = None,
         context_docs: Optional[List[str]] = None
     ) -> RAGResponse:
-        """Generate a response using RAG with conversation history"""
         try:
-            # Create new conversation if no ID provided
-            if not conversation_id:
-                conversation_id = str(uuid.uuid4())
-                await self.mongodb.create_conversation(conversation_id)
-            # Get conversation history
-            history = await self.mongodb.get_recent_messages(
-                conversation_id,
-                limit=self.conversation_manager.max_messages
             )
-            # Get relevant history within token limits
-            relevant_history = self.conversation_manager.get_relevant_history(
-                messages=history,
-                current_query=query
-            ) if history else []
             # Retrieve context if not provided
             if not context_docs:
                 context_docs, sources, scores = await self.retrieve_context(
-                    query,
-                    conversation_history=relevant_history
                 )
             else:
                 sources = None
                 scores = None
-            # Check if this is an Excel-related query and enhance context if needed
-            has_excel_content = any('Sheet:' in doc for doc in (context_docs or []))
             if has_excel_content:
                 try:
                     context_docs = self._process_excel_context(context_docs, query)
                 except Exception as e:
                     logger.warning(f"Error processing Excel context: {str(e)}")
-                    # Continue with original context if Excel processing fails
             # Generate prompt with context and history
             augmented_prompt = self.conversation_manager.generate_prompt_with_history(
                 current_query=query,
-                history=relevant_history,
                 context_docs=context_docs
             )
-            # Generate initial response using LLM
             response = self.llm.generate(
-                augmented_prompt,
                 temperature=temperature,
                 max_tokens=max_tokens
             )
-            # Enhance response for Excel queries if applicable
             if has_excel_content:
                 try:
-                    response = await self.enhance_excel_response(
                         query=query,
-                        response=response,
                         context_docs=context_docs
                     )
                 except Exception as e:
                     logger.warning(f"Error enhancing Excel response: {str(e)}")
-                    # Continue with original response if enhancement fails
             return RAGResponse(
-                response=response,
                 context_docs=context_docs,
                 sources=sources,
                 scores=scores
             )
         except Exception as e:
-            logger.error(f"Error generating response: {str(e)}")
             raise
     async def retrieve_context(
         self,
@@ -133,15 +204,6 @@ class RAGAgent(ExcelAwareRAGAgent):
     ) -> Tuple[List[str], List[Dict], Optional[List[float]]]:
         """
         Retrieve context with conversation history enhancement
-        Args:
-            query (str): Current query
-            conversation_history (Optional[List[Dict]]): Recent conversation history
-            top_k (int): Number of documents to retrieve
-        Returns:
-            Tuple[List[str], List[Dict], Optional[List[float]]]:
-                Retrieved documents, sources, and scores
         """
         # Enhance query with conversation history
         if conversation_history:
@@ -153,8 +215,14 @@ class RAGAgent(ExcelAwareRAGAgent):
         else:
             enhanced_query = query
         # Embed the enhanced query
         query_embedding = self.embedding.embed_query(enhanced_query)
         # Retrieve similar documents
         results = self.vector_store.similarity_search(
@@ -162,6 +230,12 @@ class RAGAgent(ExcelAwareRAGAgent):
             top_k=top_k
         )
         # Process results
         documents = [doc['text'] for doc in results]
         sources = [self._convert_metadata_to_strings(doc['metadata'])

     async def generate_response(
         self,
         query: str,
+        conversation_id: Optional[str],
+        temperature: float,
         max_tokens: Optional[int] = None,
         context_docs: Optional[List[str]] = None
     ) -> RAGResponse:
+        """Generate response with specific handling for different query types"""
         try:
+            # First, check if this is an introduction/welcome message query
+            is_introduction = (
+                "wants support" in query and
+                "This is Introduction" in query and
+                ("A new user with name:" in query or "An old user with name:" in query)
             )
+            if is_introduction:
+                # Handle introduction message - no context needed
+                welcome_message = self._handle_contact_query(query)
+                return RAGResponse(
+                    response=welcome_message,
+                    context_docs=[],
+                    sources=[],
+                    scores=None
+                )
+            # Get conversation history if conversation_id exists
+            history = []
+            if conversation_id:
+                history = await self.mongodb.get_recent_messages(
+                    conversation_id,
+                    limit=self.conversation_manager.max_messages
+                )
+                # Get relevant history within token limits
+                history = self.conversation_manager.get_relevant_history(
+                    messages=history,
+                    current_query=query
+                )
             # Retrieve context if not provided
             if not context_docs:
                 context_docs, sources, scores = await self.retrieve_context(
+                    query=query,
+                    conversation_history=history
                 )
             else:
                 sources = None
                 scores = None
+            # Check if we have any relevant context
+            if not context_docs:
+                return RAGResponse(
+                    response="Information about this is not available, do you want to inquire about something else?",
+                    context_docs=[],
+                    sources=[],
+                    scores=None
+                )
+            # Check if this is an Excel-related query
+            has_excel_content = any('Sheet:' in doc for doc in context_docs)
             if has_excel_content:
                 try:
                     context_docs = self._process_excel_context(context_docs, query)
                 except Exception as e:
                     logger.warning(f"Error processing Excel context: {str(e)}")
             # Generate prompt with context and history
             augmented_prompt = self.conversation_manager.generate_prompt_with_history(
                 current_query=query,
+                history=history,
                 context_docs=context_docs
             )
+            # Generate initial response
             response = self.llm.generate(
+                prompt=augmented_prompt,
                 temperature=temperature,
                 max_tokens=max_tokens
             )
+            # Clean the response
+            cleaned_response = self._clean_response(response)
+            # For Excel queries, enhance the response
             if has_excel_content:
                 try:
+                    enhanced_response = await self.enhance_excel_response(
                         query=query,
+                        response=cleaned_response,
                         context_docs=context_docs
                     )
+                    if enhanced_response:
+                        cleaned_response = enhanced_response
                 except Exception as e:
                     logger.warning(f"Error enhancing Excel response: {str(e)}")
+            # Return the final response
             return RAGResponse(
+                response=cleaned_response,
                 context_docs=context_docs,
                 sources=sources,
                 scores=scores
             )
         except Exception as e:
+            logger.error(f"Error in SystemInstructionsRAGAgent: {str(e)}")
             raise
+    def _create_response_prompt(self, query: str, context_docs: List[str]) -> str:
+        """
+        Create prompt for generating response from context
+        Args:
+            query (str): User query
+            context_docs (List[str]): Retrieved context documents
+        Returns:
+            str: Formatted prompt for the LLM
+        """
+        if not context_docs:
+            return f"Query: {query}\nResponse: Information about this is not available, do you want to inquire about something else?"
+        # Format context documents
+        formatted_context = "\n\n".join(
+            f"Context {i+1}:\n{doc.strip()}"
+            for i, doc in enumerate(context_docs)
+            if doc and doc.strip()
+        )
+        # Build the prompt with detailed instructions
+        prompt = f"""You are a knowledgeable assistant. Use the following context to answer the query accurately and informatively.
+    Context Information:
+    {formatted_context}
+    Query: {query}
+    Instructions:
+    1. Base your response ONLY on the information provided in the context above
+    2. If the context contains numbers, statistics, or specific details, include them in your response
+    3. Keep your response focused and relevant to the query
+    4. Use clear and professional language
+    5. If the context includes technical terms, explain them appropriately
+    6. Do not make assumptions or add information not present in the context
+    7. If specific sections of a report are mentioned, maintain their original structure
+    8. Format the response in a clear, readable manner
+    9. If the context includes chronological information, maintain the proper sequence
+    Response:"""
+        return prompt
     async def retrieve_context(
         self,
     ) -> Tuple[List[str], List[Dict], Optional[List[float]]]:
         """
         Retrieve context with conversation history enhancement
         """
         # Enhance query with conversation history
         if conversation_history:
         else:
             enhanced_query = query
+        # Debug log the enhanced query
+        logger.info(f"Enhanced query: {enhanced_query}")
         # Embed the enhanced query
         query_embedding = self.embedding.embed_query(enhanced_query)
+        # Debug log embedding shape
+        logger.info(f"Query embedding shape: {len(query_embedding)}")
         # Retrieve similar documents
         results = self.vector_store.similarity_search(
             top_k=top_k
         )
+        # Debug log search results
+        logger.info(f"Number of search results: {len(results)}")
+        for i, result in enumerate(results):
+            logger.info(f"Result {i} score: {result.get('score', 'N/A')}")
+            logger.info(f"Result {i} text preview: {result.get('text', '')[:100]}...")
         # Process results
         documents = [doc['text'] for doc in results]
         sources = [self._convert_metadata_to_strings(doc['metadata'])

src/agents/system_instructions_rag.py CHANGED Viewed

@@ -103,19 +103,36 @@ class SystemInstructionsRAGAgent(RAGAgent):
         if not context_docs:
             return False
-        # Extract key terms from query
         query_words = query.lower().split()
-        stop_words = {'share', 'me', 'a', 'about', 'information', 'what', 'is', 'are', 'the', 'in', 'how', 'why', 'when', 'where'}
         query_terms = {word for word in query_words if word not in stop_words}
         # Check each context document for relevance
         for doc in context_docs:
             if not doc:
                 continue
             doc_lower = doc.lower()
-            if any(term in doc_lower for term in query_terms):
-                # Found relevant content
                 return True
         return False
     def _create_response_prompt(self, query: str, context_docs: List[str]) -> str:

         if not context_docs:
             return False
+        # Extract key terms from query (keeping important words)
         query_words = query.lower().split()
+        stop_words = {'me', 'a', 'about', 'what', 'is', 'are', 'the', 'in', 'how', 'why', 'when', 'where'}
+        # Remove only basic stop words, keep important terms like "report", "share", etc.
         query_terms = {word for word in query_words if word not in stop_words}
+        # Add additional relevant terms that might appear in the content
+        related_terms = {
+            'comprehensive',
+            'report',
+            'overview',
+            'summary',
+            'details',
+            'information'
+        }
+        query_terms.update(word for word in query_words if word in related_terms)
         # Check each context document for relevance
         for doc in context_docs:
             if not doc:
                 continue
             doc_lower = doc.lower()
+            # Consider document relevant if it contains any query terms
+            # or if it starts with common report headers
+            if any(term in doc_lower for term in query_terms) or \
+                any(header in doc_lower for header in ['overview', 'comprehensive report', 'summary']):
                 return True
         return False
     def _create_response_prompt(self, query: str, context_docs: List[str]) -> str:

src/utils/__pycache__/drive_document_processor.cpython-312.pyc CHANGED Viewed

Binary files a/src/utils/__pycache__/drive_document_processor.cpython-312.pyc and b/src/utils/__pycache__/drive_document_processor.cpython-312.pyc differ

src/utils/__pycache__/enhanced_excel_processor.cpython-312.pyc CHANGED Viewed

Binary files a/src/utils/__pycache__/enhanced_excel_processor.cpython-312.pyc and b/src/utils/__pycache__/enhanced_excel_processor.cpython-312.pyc differ

src/utils/__pycache__/google_drive_service.cpython-312.pyc CHANGED Viewed

Binary files a/src/utils/__pycache__/google_drive_service.cpython-312.pyc and b/src/utils/__pycache__/google_drive_service.cpython-312.pyc differ

src/utils/drive_document_processor.py CHANGED Viewed

@@ -37,7 +37,7 @@ class DriveDocumentProcessor:
         # Define supported MIME types
         self.supported_mime_types = {
             # Google Docs
-            'application/vnd.google-apps.document': '.docx',  # Export Google Docs as DOCX
             # Microsoft Word Documents
             'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
@@ -60,33 +60,45 @@ class DriveDocumentProcessor:
             'application/pdf': '.pdf'
         }
-        # Define export MIME types for Google Docs formats
         self.google_docs_export_types = {
             'application/vnd.google-apps.document': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
         }
     async def process_documents(
         self,
-        vector_store: ChromaVectorStore
     ) -> Dict[str, Any]:
         """
         Process all documents in the specified Drive folder
         Args:
             vector_store (ChromaVectorStore): Vector store instance
         Returns:
             Dict[str, Any]: Processing results
         """
         try:
             # Get documents from folder
-            files = self.google_drive_service.get_folder_contents(self.folder_id)
             processed_files = []
             skipped_files = []
             errors = []
             for file in files:
                 result = await self._process_single_file(file, vector_store)
                 if result['status'] == 'processed':
@@ -122,13 +134,31 @@ class DriveDocumentProcessor:
                 detail=f"Failed to process drive documents: {str(e)}"
             )
     async def _process_single_file(
         self,
         file: Dict[str, Any],
         vector_store: ChromaVectorStore
     ) -> Dict[str, Any]:
         """Process a single Drive file"""
         mime_type = file.get('mimeType', '')
         # Skip if mime type not supported
@@ -137,13 +167,14 @@ class DriveDocumentProcessor:
                 'status': 'skipped',
                 'data': {
                     'name': file['name'],
                     'reason': f'Unsupported mime type: {mime_type}'
                 }
             }
         try:
             document_id = file['id']
-            modified_time = file.get('modifiedTime', 'N/A')  # Get last modified time
             # Check if document should be processed
             if self.save_document(document_id, vector_store, modified_time):
@@ -159,7 +190,7 @@ class DriveDocumentProcessor:
                         str(temp_file_path)
                     )
-                    # Add to vector store
                     self._add_to_vector_store(
                         processed_doc['chunks'],
                         file,
@@ -171,6 +202,7 @@ class DriveDocumentProcessor:
                         'status': 'processed',
                         'data': {
                             'name': file['name'],
                             'id': file['id'],
                             'chunks_processed': len(processed_doc['chunks'])
                         }
@@ -181,11 +213,11 @@ class DriveDocumentProcessor:
                     if temp_file_path.exists():
                         temp_file_path.unlink()
             else:
-                # Return skipped status if document already exists and is up to date
                 return {
                     'status': 'skipped',
                     'data': {
                         'name': file['name'],
                         'reason': 'Document already exists in the memory.'
                     }
                 }
@@ -196,46 +228,10 @@ class DriveDocumentProcessor:
                 'status': 'error',
                 'data': {
                     'file_name': file['name'],
                     'error': str(e)
                 }
             }
-        except Exception as e:
-            logger.error(f"Error processing file {file['name']}: {str(e)}")
-            return {
-                'status': 'error',
-                'data': {
-                    'file_name': file['name'],
-                    'error': str(e)
-                }
-            }
-    async def _download_and_save_file(
-        self,
-        file_id: str,
-        mime_type: str
-    ) -> Path:
-        """Download and save file to temporary location"""
-        extension = self.supported_mime_types[mime_type]
-        temp_file_path = self.temp_dir / f"{file_id}{extension}"
-        if mime_type in self.google_docs_export_types:
-            # Download Google Doc in the specified export format
-            content = self.google_drive_service.export_file(
-                file_id,
-                self.google_docs_export_types[mime_type]
-            )
-        else:
-            # Download regular file
-            content = self.google_drive_service.download_file(file_id)
-        with open(temp_file_path, 'wb') as f:
-            if isinstance(content, str):
-                f.write(content.encode('utf-8'))
-            else:
-                f.write(content)
-        return temp_file_path
     def _add_to_vector_store(
         self,
@@ -244,20 +240,18 @@ class DriveDocumentProcessor:
         mime_type: str,
         vector_store: ChromaVectorStore
     ) -> None:
-        """Add processed chunks to vector store"""
         chunk_metadatas = []
         chunk_ids = []
-        # document_id = file['id']
-        modified_time = file.get('modifiedTime', 'N/A')  # Get last modified time
-        #self.delete_updated_document(document_id, vector_store, modified_time)
         for i, chunk in enumerate(chunks):
             chunk_id = f"{file['id']}-chunk-{i}"
             chunk_ids.append(chunk_id)
             chunk_metadatas.append({
-                "source": file['name'],
                 "document_id": file['id'],
                 "chunk_index": i,
                 "mime_type": mime_type,
@@ -272,44 +266,81 @@ class DriveDocumentProcessor:
             metadatas=chunk_metadatas,
             ids=chunk_ids
         )
-    def save_document(self, document_id: str, vector_store: ChromaVectorStore, modified_date: str) -> bool:
-        """
-        Deletes all chunks of a document if the modified_time does not match the given modified_date.
         Args:
-            document_id (str): The ID of the document.
-            vector_store (ChromaVectorStore): The Chroma vector store instance.
-            modified_date (str): The expected modification date.
         """
         try:
             # Retrieve all chunks for the given document_id
             chunks = vector_store.get_document_chunks(document_id)
             if not chunks:
-                logging.warning(f"No chunks found for document_id: {document_id}. Nothing to delete.")
                 return True
             # Check the modified_time of the first chunk
             first_chunk_metadata = chunks[0].get("metadata", {})
             if first_chunk_metadata.get("modified_time") != modified_date:
-                # If modified_time doesn't match, delete all chunks
                 vector_store.delete_document(document_id)
-                logging.info(f"Deleted all chunks for document_id: {document_id} due to modified_time mismatch.")
                 return True
-            else:
-                logging.info(f"No deletion needed for document_id: {document_id}, modified_time is unchanged.")
-                return False
         except Exception as e:
-            logging.error(f"Error while deleting chunks for document_id {document_id}: {str(e)}")
             return True
     def _cleanup_temp_dir(self) -> None:
         """Clean up temporary directory if empty"""
-        if self.temp_dir.exists() and not any(self.temp_dir.iterdir()):
-            self.temp_dir.rmdir()

         # Define supported MIME types
         self.supported_mime_types = {
             # Google Docs
+            'application/vnd.google-apps.document': '.docx',
             # Microsoft Word Documents
             'application/vnd.openxmlformats-officedocument.wordprocessingml.document': '.docx',
             'application/pdf': '.pdf'
         }
         self.google_docs_export_types = {
             'application/vnd.google-apps.document': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
         }
     async def process_documents(
         self,
+        vector_store: ChromaVectorStore,
+        include_subfolders: bool = True  # New parameter with default True for backward compatibility
     ) -> Dict[str, Any]:
         """
         Process all documents in the specified Drive folder
         Args:
             vector_store (ChromaVectorStore): Vector store instance
+            include_subfolders (bool): Whether to process documents in subfolders
         Returns:
             Dict[str, Any]: Processing results
         """
         try:
             # Get documents from folder
+            files = self.google_drive_service.get_folder_contents(
+                self.folder_id,
+                include_subfolders=include_subfolders
+            )
             processed_files = []
             skipped_files = []
             errors = []
             for file in files:
+                # Skip if it's a folder
+                if file.get('mimeType') == 'application/vnd.google-apps.folder':
+                    continue
+                # Get file path (including folder structure if available)
+                file_path = self._get_file_path(file)
+                file['display_path'] = file_path
                 result = await self._process_single_file(file, vector_store)
                 if result['status'] == 'processed':
                 detail=f"Failed to process drive documents: {str(e)}"
             )
+    def _get_file_path(self, file: Dict[str, Any]) -> str:
+        """
+        Get the full path for a file including its folder structure
+        Args:
+            file (Dict[str, Any]): File metadata
+        Returns:
+            str: Display path of the file
+        """
+        path_parts = [file['name']]
+        # Add folder path if available (new structure)
+        if folder_path := file.get('folder_path', []):
+            for folder in reversed(folder_path):
+                path_parts.insert(0, folder['name'])
+        return '/'.join(path_parts)
     async def _process_single_file(
         self,
         file: Dict[str, Any],
         vector_store: ChromaVectorStore
     ) -> Dict[str, Any]:
         """Process a single Drive file"""
         mime_type = file.get('mimeType', '')
         # Skip if mime type not supported
                 'status': 'skipped',
                 'data': {
                     'name': file['name'],
+                    'path': file.get('display_path', file['name']),
                     'reason': f'Unsupported mime type: {mime_type}'
                 }
             }
         try:
             document_id = file['id']
+            modified_time = file.get('modifiedTime', 'N/A')
             # Check if document should be processed
             if self.save_document(document_id, vector_store, modified_time):
                         str(temp_file_path)
                     )
+                    # Add to vector store with path information
                     self._add_to_vector_store(
                         processed_doc['chunks'],
                         file,
                         'status': 'processed',
                         'data': {
                             'name': file['name'],
+                            'path': file.get('display_path', file['name']),
                             'id': file['id'],
                             'chunks_processed': len(processed_doc['chunks'])
                         }
                     if temp_file_path.exists():
                         temp_file_path.unlink()
             else:
                 return {
                     'status': 'skipped',
                     'data': {
                         'name': file['name'],
+                        'path': file.get('display_path', file['name']),
                         'reason': 'Document already exists in the memory.'
                     }
                 }
                 'status': 'error',
                 'data': {
                     'file_name': file['name'],
+                    'path': file.get('display_path', file['name']),
                     'error': str(e)
                 }
             }
     def _add_to_vector_store(
         self,
         mime_type: str,
         vector_store: ChromaVectorStore
     ) -> None:
+        """Add processed chunks to vector store with path information"""
         chunk_metadatas = []
         chunk_ids = []
+        modified_time = file.get('modifiedTime', 'N/A')
+        file_path = file.get('display_path', file['name'])
         for i, chunk in enumerate(chunks):
             chunk_id = f"{file['id']}-chunk-{i}"
             chunk_ids.append(chunk_id)
             chunk_metadatas.append({
+                "source": file_path,  # Use full path instead of just name
                 "document_id": file['id'],
                 "chunk_index": i,
                 "mime_type": mime_type,
             metadatas=chunk_metadatas,
             ids=chunk_ids
         )
+    async def _download_and_save_file(
+        self,
+        file_id: str,
+        mime_type: str
+    ) -> Path:
+        """Download and save file to temporary location"""
+        extension = self.supported_mime_types[mime_type]
+        temp_file_path = self.temp_dir / f"{file_id}{extension}"
+        if mime_type in self.google_docs_export_types:
+            # Download Google Doc in the specified export format
+            content = self.google_drive_service.export_file(
+                file_id,
+                self.google_docs_export_types[mime_type]
+            )
+        else:
+            # Download regular file
+            content = self.google_drive_service.download_file(file_id)
+        with open(temp_file_path, 'wb') as f:
+            if isinstance(content, str):
+                f.write(content.encode('utf-8'))
+            else:
+                f.write(content)
+        return temp_file_path
+    def save_document(
+        self,
+        document_id: str,
+        vector_store: ChromaVectorStore,
+        modified_date: str
+    ) -> bool:
+        """
+        Check if document needs to be processed based on modification date
         Args:
+            document_id (str): ID of the document to check
+            vector_store (ChromaVectorStore): Vector store instance
+            modified_date (str): Modified date to compare against
+        Returns:
+            bool: True if document should be processed, False otherwise
         """
         try:
             # Retrieve all chunks for the given document_id
             chunks = vector_store.get_document_chunks(document_id)
             if not chunks:
+                # Document doesn't exist in vector store
                 return True
             # Check the modified_time of the first chunk
             first_chunk_metadata = chunks[0].get("metadata", {})
             if first_chunk_metadata.get("modified_time") != modified_date:
+                # If modified_time doesn't match, delete existing chunks
                 vector_store.delete_document(document_id)
+                logger.info(f"Document {document_id} has been modified, will reprocess")
                 return True
+            logger.info(f"Document {document_id} is up to date, skipping")
+            return False
         except Exception as e:
+            logger.error(f"Error checking document status: {str(e)}")
+            # In case of error, process the document to be safe
             return True
     def _cleanup_temp_dir(self) -> None:
         """Clean up temporary directory if empty"""
+        try:
+            if self.temp_dir.exists() and not any(self.temp_dir.iterdir()):
+                self.temp_dir.rmdir()
+        except Exception as e:
+            logger.error(f"Error cleaning up temp directory: {str(e)}")
+            # Don't raise the error as this is a cleanup operation

src/utils/enhanced_excel_processor.py CHANGED Viewed

@@ -2,7 +2,9 @@ from typing import Dict, List, Any, Optional
 import pandas as pd
 import numpy as np
 from pathlib import Path
-import json
 class EnhancedExcelProcessor:
     def __init__(self):
@@ -13,7 +15,7 @@ class EnhancedExcelProcessor:
     def process_excel(self, file_path: Path) -> str:
         """
-        Process Excel file with enhanced multi-sheet handling
         Args:
             file_path (Path): Path to Excel file
@@ -21,97 +23,146 @@ class EnhancedExcelProcessor:
         Returns:
             str: Structured text representation of Excel content
         """
-        # Read all sheets
         excel_file = pd.ExcelFile(file_path)
         sheets_data = {}
         for sheet_name in excel_file.sheet_names:
-            df = pd.read_excel(excel_file, sheet_name=sheet_name)
             sheets_data[sheet_name] = df
-            # Generate sheet summary
-            self.sheet_summaries[sheet_name] = self._generate_sheet_summary(df)
-            # Extract sheet metadata
-            self.sheet_metadata[sheet_name] = {
-                'columns': list(df.columns),
-                'rows': len(df),
-                'numeric_columns': df.select_dtypes(include=[np.number]).columns.tolist(),
-                'date_columns': df.select_dtypes(include=['datetime64']).columns.tolist(),
-                'categorical_columns': df.select_dtypes(include=['object']).columns.tolist()
-            }
         # Detect relationships between sheets
         self.relationships = self._detect_relationships(sheets_data)
         # Generate structured text representation
-        return self._generate_structured_text(sheets_data)
-    def _generate_sheet_summary(self, df: pd.DataFrame) -> Dict:
-        """Generate statistical summary for a sheet"""
         summary = {
             'total_rows': len(df),
             'total_columns': len(df.columns),
             'column_types': {},
             'numeric_summaries': {},
             'categorical_summaries': {},
-            'null_counts': df.isnull().sum().to_dict()
         }
-        # Process numeric columns
         numeric_cols = df.select_dtypes(include=[np.number]).columns
         for col in numeric_cols:
             summary['numeric_summaries'][col] = {
-                'mean': float(df[col].mean()),
-                'median': float(df[col].median()),
-                'std': float(df[col].std()),
-                'min': float(df[col].min()),
-                'max': float(df[col].max())
             }
             summary['column_types'][col] = 'numeric'
-        # Process categorical columns
         categorical_cols = df.select_dtypes(include=['object']).columns
         for col in categorical_cols:
-            value_counts = df[col].value_counts()
-            summary['categorical_summaries'][col] = {
-                'unique_values': int(len(value_counts)),
-                'top_values': value_counts.head(5).to_dict()
-            }
             summary['column_types'][col] = 'categorical'
         return summary
-    def _detect_relationships(self, sheets_data: Dict[str, pd.DataFrame]) -> Dict:
-        """Detect potential relationships between sheets"""
-        relationships = {}
-        sheet_names = list(sheets_data.keys())
-        for i, sheet1 in enumerate(sheet_names):
-            for sheet2 in sheet_names[i+1:]:
-                common_cols = set(sheets_data[sheet1].columns) & set(sheets_data[sheet2].columns)
-                if common_cols:
-                    relationships[f"{sheet1}__{sheet2}"] = {
-                        'common_columns': list(common_cols),
-                        'type': 'potential_join'
-                    }
-                # Check for foreign key relationships
-                for col1 in sheets_data[sheet1].columns:
-                    for col2 in sheets_data[sheet2].columns:
-                        if (col1.lower().endswith('_id') or col2.lower().endswith('_id')):
-                            unique_vals1 = set(sheets_data[sheet1][col1].dropna())
-                            unique_vals2 = set(sheets_data[sheet2][col2].dropna())
-                            if unique_vals1 & unique_vals2:
-                                relationships[f"{sheet1}__{sheet2}__{col1}__{col2}"] = {
-                                    'type': 'foreign_key',
-                                    'columns': [col1, col2]
-                                }
-        return relationships
-    def _generate_structured_text(self, sheets_data: Dict[str, pd.DataFrame]) -> str:
-        """Generate structured text representation of Excel content"""
         output_parts = []
         # Overall summary
@@ -130,36 +181,59 @@ class EnhancedExcelProcessor:
             # Basic info
             output_parts.append(f"Rows: {metadata['rows']}")
             output_parts.append(f"Columns: {', '.join(metadata['columns'])}")
-            output_parts.append("")
-            # Column summaries
             if metadata['numeric_columns']:
-                output_parts.append("Numeric Columns Summary:")
                 for col in metadata['numeric_columns']:
                     stats = summary['numeric_summaries'][col]
                     output_parts.append(f"  {col}:")
                     output_parts.append(f"    Range: {stats['min']} to {stats['max']}")
                     output_parts.append(f"    Average: {stats['mean']:.2f}")
-                output_parts.append("")
             if metadata['categorical_columns']:
-                output_parts.append("Categorical Columns Summary:")
                 for col in metadata['categorical_columns']:
-                    cats = summary['categorical_summaries'][col]
-                    output_parts.append(f"  {col}:")
-                    output_parts.append(f"    Unique Values: {cats['unique_values']}")
-                    if cats['top_values']:
-                        output_parts.append("    Top Values: " +
-                                         ", ".join(f"{k} ({v})" for k, v in
-                                                 list(cats['top_values'].items())[:3]))
-                output_parts.append("")
-            # Sample data
-            output_parts.append("Sample Data:")
-            output_parts.append(df.head(3).to_string())
             output_parts.append("\n")
-        # Relationships
         if self.relationships:
             output_parts.append("Sheet Relationships:")
             for rel_key, rel_info in self.relationships.items():
@@ -173,7 +247,7 @@ class EnhancedExcelProcessor:
                                      f"{parts[0]}.{parts[2]} and {parts[1]}.{parts[3]}")
         return "\n".join(output_parts)
     def get_sheet_summary(self, sheet_name: str) -> Optional[Dict]:
         """Get summary for a specific sheet"""
         return self.sheet_summaries.get(sheet_name)

 import pandas as pd
 import numpy as np
 from pathlib import Path
+import logging
+from openpyxl import load_workbook
+from openpyxl.utils.cell import get_column_letter
 class EnhancedExcelProcessor:
     def __init__(self):
     def process_excel(self, file_path: Path) -> str:
         """
+        Process Excel file with enhanced data extraction
         Args:
             file_path (Path): Path to Excel file
         Returns:
             str: Structured text representation of Excel content
         """
+        # Read all sheets with improved handling
         excel_file = pd.ExcelFile(file_path)
         sheets_data = {}
+        # Load workbook for additional metadata
+        workbook = load_workbook(file_path, data_only=True)
         for sheet_name in excel_file.sheet_names:
+            # Read with pandas for data structure
+            df = pd.read_excel(
+                excel_file,
+                sheet_name=sheet_name,
+                header=None  # Read without assuming header to capture all data
+            )
+            # Clean column names
+            if df.iloc[0].notna().any():  # If first row has any data
+                df.columns = [f"Column_{i}" if pd.isna(x) else str(x).strip()
+                            for i, x in enumerate(df.iloc[0])]
+                df = df.iloc[1:]  # Remove header row from data
             sheets_data[sheet_name] = df
+            # Generate enhanced sheet summary
+            self.sheet_summaries[sheet_name] = self._generate_enhanced_sheet_summary(
+                df,
+                workbook[sheet_name]
+            )
+            # Extract enhanced sheet metadata
+            self.sheet_metadata[sheet_name] = self._extract_enhanced_metadata(
+                df,
+                workbook[sheet_name]
+            )
         # Detect relationships between sheets
         self.relationships = self._detect_relationships(sheets_data)
         # Generate structured text representation
+        return self._generate_enhanced_structured_text(sheets_data, workbook)
+    def _generate_enhanced_sheet_summary(self, df: pd.DataFrame, ws) -> Dict:
+        """Generate comprehensive statistical summary for a sheet"""
         summary = {
             'total_rows': len(df),
             'total_columns': len(df.columns),
             'column_types': {},
             'numeric_summaries': {},
             'categorical_summaries': {},
+            'null_counts': df.isnull().sum().to_dict(),
+            'merged_cells': self._get_merged_cells_info(ws),
+            'formulas': self._get_formulas_info(ws)
         }
+        # Process numeric columns with enhanced detection
         numeric_cols = df.select_dtypes(include=[np.number]).columns
         for col in numeric_cols:
+            col_data = pd.to_numeric(df[col], errors='coerce')
             summary['numeric_summaries'][col] = {
+                'mean': float(col_data.mean()) if not col_data.empty else None,
+                'median': float(col_data.median()) if not col_data.empty else None,
+                'std': float(col_data.std()) if not col_data.empty else None,
+                'min': float(col_data.min()) if not col_data.empty else None,
+                'max': float(col_data.max()) if not col_data.empty else None,
+                'sum': float(col_data.sum()) if not col_data.empty else None
             }
             summary['column_types'][col] = 'numeric'
+        # Process categorical and text columns with enhanced analysis
         categorical_cols = df.select_dtypes(include=['object']).columns
         for col in categorical_cols:
+            # Clean and process values
+            values = df[col].astype(str).replace('nan', pd.NA).dropna()
+            if not values.empty:
+                value_counts = values.value_counts()
+                summary['categorical_summaries'][col] = {
+                    'unique_values': int(len(value_counts)),
+                    'top_values': value_counts.head(5).to_dict(),
+                    'contains_currency': self._detect_currency(values),
+                    'contains_dates': self._detect_dates(values)
+                }
             summary['column_types'][col] = 'categorical'
         return summary
+    def _extract_enhanced_metadata(self, df: pd.DataFrame, ws) -> Dict:
+        """Extract comprehensive metadata including Excel-specific features"""
+        metadata = {
+            'columns': list(df.columns),
+            'rows': len(df),
+            'numeric_columns': df.select_dtypes(include=[np.number]).columns.tolist(),
+            'date_columns': df.select_dtypes(include=['datetime64']).columns.tolist(),
+            'categorical_columns': df.select_dtypes(include=['object']).columns.tolist(),
+            'column_widths': {get_column_letter(i+1): ws.column_dimensions[get_column_letter(i+1)].width
+                            for i in range(len(df.columns))
+                            if get_column_letter(i+1) in ws.column_dimensions},
+            'hidden_rows': [idx for idx in range(1, ws.max_row + 1) if ws.row_dimensions[idx].hidden],
+            'hidden_columns': [get_column_letter(idx) for idx in range(1, ws.max_column + 1)
+                             if ws.column_dimensions[get_column_letter(idx)].hidden],
+            'has_charts': bool(ws._charts),
+            'has_images': bool(ws._images),
+            'frozen_panes': ws.freeze_panes is not None
+        }
+        return metadata
+    def _get_merged_cells_info(self, ws) -> List[Dict]:
+        """Extract information about merged cells"""
+        merged_cells = []
+        for merged_range in ws.merged_cells.ranges:
+            merged_cells.append({
+                'range': str(merged_range),
+                'start_cell': merged_range.start_cell.coordinate,
+                'end_cell': merged_range.end_cell.coordinate
+            })
+        return merged_cells
+    def _get_formulas_info(self, ws) -> Dict[str, str]:
+        """Extract formulas from the worksheet"""
+        formulas = {}
+        for row in ws.iter_rows():
+            for cell in row:
+                if cell.formula:
+                    formulas[cell.coordinate] = cell.formula
+        return formulas
+    def _detect_currency(self, series: pd.Series) -> bool:
+        """Detect if a series contains currency values"""
+        currency_patterns = ['$', '€', '£', '¥']
+        return any(series.astype(str).str.contains('|'.join(currency_patterns)).any())
+    def _detect_dates(self, series: pd.Series) -> bool:
+        """Detect if a series contains date values"""
+        try:
+            pd.to_datetime(series, errors='raise')
+            return True
+        except:
+            return False
+    def _generate_enhanced_structured_text(self, sheets_data: Dict[str, pd.DataFrame], workbook) -> str:
+        """Generate detailed structured text representation of Excel content"""
         output_parts = []
         # Overall summary
             # Basic info
             output_parts.append(f"Rows: {metadata['rows']}")
             output_parts.append(f"Columns: {', '.join(metadata['columns'])}")
+            # Add information about hidden elements
+            if metadata['hidden_rows']:
+                output_parts.append(f"Hidden Rows: {len(metadata['hidden_rows'])}")
+            if metadata['hidden_columns']:
+                output_parts.append(f"Hidden Columns: {len(metadata['hidden_columns'])}")
+            # Add information about merged cells
+            if summary['merged_cells']:
+                output_parts.append("\nMerged Cells:")
+                for merge_info in summary['merged_cells'][:5]:  # Show first 5 merged ranges
+                    output_parts.append(f"  - Range: {merge_info['range']}")
+            # Numeric columns summary
             if metadata['numeric_columns']:
+                output_parts.append("\nNumeric Columns Summary:")
                 for col in metadata['numeric_columns']:
                     stats = summary['numeric_summaries'][col]
                     output_parts.append(f"  {col}:")
                     output_parts.append(f"    Range: {stats['min']} to {stats['max']}")
                     output_parts.append(f"    Average: {stats['mean']:.2f}")
+                    output_parts.append(f"    Sum: {stats['sum']:.2f}")
+            # Categorical columns summary
             if metadata['categorical_columns']:
+                output_parts.append("\nCategorical Columns Summary:")
                 for col in metadata['categorical_columns']:
+                    if col in summary['categorical_summaries']:
+                        cats = summary['categorical_summaries'][col]
+                        output_parts.append(f"  {col}:")
+                        output_parts.append(f"    Unique Values: {cats['unique_values']}")
+                        if cats['top_values']:
+                            output_parts.append("    Top Values: " +
+                                             ", ".join(f"{k} ({v})" for k, v in
+                                                     list(cats['top_values'].items())[:3]))
+                        if cats['contains_currency']:
+                            output_parts.append("    Contains Currency Values")
+                        if cats['contains_dates']:
+                            output_parts.append("    Contains Date Values")
+            # Add formula information
+            if summary['formulas']:
+                output_parts.append("\nFormulas Present:")
+                for cell, formula in list(summary['formulas'].items())[:5]:  # Show first 5 formulas
+                    output_parts.append(f"  {cell}: {formula}")
+            # Sample data with improved formatting
+            output_parts.append("\nSample Data:")
+            sample_data = df.head(5).fillna("").to_string(index=False)
+            output_parts.append(sample_data)
             output_parts.append("\n")
+        # Sheet relationships
         if self.relationships:
             output_parts.append("Sheet Relationships:")
             for rel_key, rel_info in self.relationships.items():
                                      f"{parts[0]}.{parts[2]} and {parts[1]}.{parts[3]}")
         return "\n".join(output_parts)
     def get_sheet_summary(self, sheet_name: str) -> Optional[Dict]:
         """Get summary for a specific sheet"""
         return self.sheet_summaries.get(sheet_name)

src/utils/google_drive_service.py CHANGED Viewed

@@ -3,7 +3,8 @@ from google.oauth2 import service_account
 from googleapiclient.discovery import build
 from googleapiclient.http import MediaIoBaseDownload
 import io
-import os
 class GoogleDriveService:
     def __init__(self, credentials_path: str):
@@ -19,24 +20,61 @@ class GoogleDriveService:
         )
         self.service = build('drive', 'v3', credentials=self.credentials)
-    def get_folder_contents(self, folder_id: str):
         """
-        Get contents of a Drive folder
         Args:
             folder_id (str): ID of the folder to process
         Returns:
             List[Dict]: List of file metadata
         """
-        query = f"'{folder_id}' in parents and trashed=false"
-        results = self.service.files().list(
-            q=query,
-            fields="files(id, name, mimeType,modifiedTime)",
-            supportsAllDrives=True,
-            includeItemsFromAllDrives=True
-        ).execute()
-        return results.get('files', [])
     def download_file(self, file_id: str) -> bytes:
         """

 from googleapiclient.discovery import build
 from googleapiclient.http import MediaIoBaseDownload
 import io
+from typing import List, Dict, Any
+import logging
 class GoogleDriveService:
     def __init__(self, credentials_path: str):
         )
         self.service = build('drive', 'v3', credentials=self.credentials)
+    def get_folder_contents(self, folder_id: str, include_subfolders: bool = False) -> List[Dict[str, Any]]:
         """
+        Get contents of a Drive folder including subfolders if specified
         Args:
             folder_id (str): ID of the folder to process
+            include_subfolders (bool): Whether to include contents of subfolders (default: False)
         Returns:
             List[Dict]: List of file metadata
         """
+        all_files = []
+        try:
+            # Get all items in the current folder
+            query = f"'{folder_id}' in parents and trashed=false"
+            results = self.service.files().list(
+                q=query,
+                fields="files(id, name, mimeType, modifiedTime, parents)",
+                supportsAllDrives=True,
+                includeItemsFromAllDrives=True
+            ).execute()
+            items = results.get('files', [])
+            for item in items:
+                if item['mimeType'] == 'application/vnd.google-apps.folder' and include_subfolders:
+                    # Recursively get contents of subfolder
+                    try:
+                        subfolder_files = self.get_folder_contents(
+                            item['id'],
+                            include_subfolders=True
+                        )
+                        # Add folder path information to each file
+                        for file in subfolder_files:
+                            if not file.get('folder_path'):
+                                file['folder_path'] = []
+                            file['folder_path'].insert(0, {
+                                'id': item['id'],
+                                'name': item['name']
+                            })
+                        all_files.extend(subfolder_files)
+                    except Exception as e:
+                        logging.error(f"Error processing subfolder {item['name']}: {str(e)}")
+                        continue
+                else:
+                    # For backward compatibility, maintain original structure
+                    # but add folder path information
+                    item['folder_path'] = []
+                    all_files.append(item)
+            return all_files
+        except Exception as e:
+            logging.error(f"Error getting folder contents for folder {folder_id}: {str(e)}")
+            return []  # Return empty list for backward compatibility
     def download_file(self, file_id: str) -> bytes:
         """

src/vectorstores/__pycache__/chroma_vectorstore.cpython-312.pyc CHANGED Viewed

Binary files a/src/vectorstores/__pycache__/chroma_vectorstore.cpython-312.pyc and b/src/vectorstores/__pycache__/chroma_vectorstore.cpython-312.pyc differ

src/vectorstores/chroma_vectorstore.py CHANGED Viewed

@@ -93,42 +93,62 @@ class ChromaVectorStore(BaseVectorStore):
         **kwargs
     ) -> List[Dict[str, Any]]:
         """
-        Perform similarity search
-        Args:
-            query_embedding (List[float]): Embedding of the query
-            top_k (int): Number of top similar documents to retrieve
-            **kwargs: Additional search parameters
-        Returns:
-            List[Dict[str, Any]]: List of documents with their text, metadata, and scores
         """
         try:
             results = self.collection.query(
                 query_embeddings=[query_embedding],
-                n_results=top_k,
                 include=['documents', 'metadatas', 'distances']
             )
-            # Handle the case where no results are found
             if not results or 'documents' not in results or not results['documents']:
                 return []
-            # Format results to include text, metadata, and scores
             formatted_results = []
             documents = results['documents'][0]  # First query's results
             metadatas = results['metadatas'][0] if results.get('metadatas') else [None] * len(documents)
             distances = results['distances'][0] if results.get('distances') else [None] * len(documents)
             for doc, meta, dist in zip(documents, metadatas, distances):
-                formatted_results.append({
-                    'text': doc,
-                    'metadata': meta or {},
-                    'score': 1.0 - (dist or 0.0) if dist is not None else None  # Convert distance to similarity score
-                })
-            return formatted_results
         except Exception as e:
             logging.error(f"Error performing similarity search in ChromaDB: {str(e)}")
             raise

         **kwargs
     ) -> List[Dict[str, Any]]:
         """
+        Perform similarity search with improved matching
         """
         try:
+            # Increase n_results to get more potential matches
             results = self.collection.query(
                 query_embeddings=[query_embedding],
+                n_results=10,  # Get more initial results
                 include=['documents', 'metadatas', 'distances']
             )
             if not results or 'documents' not in results or not results['documents']:
+                logging.warning("No results found in similarity search")
                 return []
             formatted_results = []
             documents = results['documents'][0]  # First query's results
             metadatas = results['metadatas'][0] if results.get('metadatas') else [None] * len(documents)
             distances = results['distances'][0] if results.get('distances') else [None] * len(documents)
+            # Process all results
             for doc, meta, dist in zip(documents, metadatas, distances):
+                # Convert distance to similarity score (1 is most similar, 0 is least)
+                similarity_score = 1.0 - (dist or 0.0) if dist is not None else None
+                # More permissive threshold and include all results for filtering
+                if similarity_score is not None and similarity_score > 0.2:  # Lower threshold
+                    formatted_results.append({
+                        'text': doc,
+                        'metadata': meta or {},
+                        'score': similarity_score
+                    })
+            # Sort by score and get top_k results
+            formatted_results.sort(key=lambda x: x['score'] or 0, reverse=True)
+            # Check if results are from same document and get consecutive chunks
+            if formatted_results:
+                first_doc_id = formatted_results[0]['metadata'].get('document_id')
+                all_chunks_same_doc = []
+                # Get all chunks from the same document
+                for result in formatted_results:
+                    if result['metadata'].get('document_id') == first_doc_id:
+                        all_chunks_same_doc.append(result)
+                # Sort chunks by their index to maintain document flow
+                all_chunks_same_doc.sort(
+                    key=lambda x: x['metadata'].get('chunk_index', 0)
+                )
+                # Return either all chunks from same document or top_k results
+                if len(all_chunks_same_doc) > 0:
+                    return all_chunks_same_doc[:top_k]
+            return formatted_results[:top_k]
         except Exception as e:
             logging.error(f"Error performing similarity search in ChromaDB: {str(e)}")
             raise

temp_downloads/17he27jN4louYr1xOYASf4BP2e-tGTICt.xlsx ADDED Viewed

Binary file (9.81 kB). View file

temp_downloads/1K608-Qr03M6nf5FhB6AajbHm8kjQujx1.xlsx ADDED Viewed

Binary file (30.4 kB). View file