import sys import os sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import unittest from unittest.mock import MagicMock, patch, mock_open import pinecone from langchain.schema import Document from core.rag_engine import RAGPrep from typing import List, Dict, Optional from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader from langchain_openai import OpenAIEmbeddings import pinecone from tqdm.auto import tqdm from langchain.schema import Document from config import get_settings class TestRAGPrep(unittest.TestCase): def setUp(self): """Set up test fixtures""" self.settings = get_settings() self.mock_settings = MagicMock() self.mock_settings.INDEX_NAME = "test-index" self.mock_settings.PINECONE_API_KEY = self.settings.PINECONE_API_KEY self.mock_settings.CLOUD = "aws" self.mock_settings.REGION = "us-east-1" self.mock_settings.PDF_DIRECTORY = self.settings.PDF_DIRECTORY self.mock_settings.CHUNK_SIZE = 1000 self.mock_settings.CHUNK_OVERLAP = 200 self.mock_settings.DIMENSIONS = 1536 self.mock_settings.OPENAI_API_KEY = self.settings.OPENAI_API_KEY # Create patcher for get_settings and other dependencies self.settings_patcher = patch('core.rag_engine.get_settings', return_value=self.mock_settings) self.embeddings_patcher = patch('core.rag_engine.OpenAIEmbeddings') self.pinecone_patcher = patch('core.rag_engine.pinecone.Pinecone') # Start all patchers self.mock_get_settings = self.settings_patcher.start() self.mock_embeddings = self.embeddings_patcher.start() self.mock_pinecone = self.pinecone_patcher.start() def tearDown(self): """Clean up after tests""" self.settings_patcher.stop() self.embeddings_patcher.stop() self.pinecone_patcher.stop() def test_init(self): """Test RAGPrep initialization""" # Create instance rag_prep = RAGPrep() # Assert initialization self.assertEqual(rag_prep.index_name, "test-index") self.assertEqual(rag_prep.settings, self.mock_settings) self.mock_pinecone.assert_called_once_with(self.mock_settings.PINECONE_API_KEY) self.mock_embeddings.assert_called_once_with(openai_api_key=self.mock_settings.OPENAI_API_KEY) @patch('core.rag_engine.DirectoryLoader') def test_load_and_split_pdfs(self, mock_loader_class): """Test PDF loading and splitting""" # Setup mock documents mock_docs = [ Document(page_content="Test content 1", metadata={"source": "test1.pdf", "page": 1}), Document(page_content="Test content 2", metadata={"source": "test2.pdf", "page": 1}) ] # Configure the mock loader mock_loader_instance = MagicMock() mock_loader_instance.load.return_value = mock_docs mock_loader_class.return_value = mock_loader_instance # Create instance and test rag_prep = RAGPrep() chunks = rag_prep.load_and_split_pdfs() # Assertions self.assertIsInstance(chunks, list) mock_loader_class.assert_called_once_with( self.mock_settings.PDF_DIRECTORY, glob="**/*.pdf", loader_cls=PyPDFLoader ) mock_loader_instance.load.assert_called_once() def test_process_and_upload(self): """Test processing and uploading documents""" # Setup mock documents mock_docs = [ Document(page_content="Test 1", metadata={"source": "test.pdf", "page": 1}), Document(page_content="Test 2", metadata={"source": "test.pdf", "page": 2}) ] # Create mock embeddings instance mock_embeddings_instance = MagicMock() mock_embeddings_instance.embed_documents.return_value = [[0.1] * 1536, [0.2] * 1536] self.mock_embeddings.return_value = mock_embeddings_instance # Mock the index mock_index = MagicMock() self.mock_pinecone.return_value.Index.return_value = mock_index # Mock load_and_split_pdfs with patch.object(RAGPrep, 'load_and_split_pdfs', return_value=mock_docs): # Create instance and test rag_prep = RAGPrep() rag_prep.process_and_upload() # Assertions mock_embeddings_instance.embed_documents.assert_called_once() self.assertTrue(mock_index.upsert.called) # Verify the format of the upsert call called_args = mock_index.upsert.call_args[1]['vectors'] self.assertEqual(len(called_args), 2) # Two documents self.assertTrue(all(len(v[1]) == 1536 for v in called_args)) def test_cleanup_index_success(self): """Test successful index cleanup""" with patch('pinecone.Pinecone') as mock_pinecone: # Setup mock mock_pc = mock_pinecone.return_value mock_pc.list_indexes.return_value.names.return_value = ["test-index"] mock_index = MagicMock() mock_pc.Index.return_value = mock_index # Create instance and test rag_prep = RAGPrep() result = rag_prep.cleanup_index() # Assertions self.assertTrue(result) mock_index.delete.assert_called_once_with(delete_all=True) def test_cleanup_index_no_index(self): """Test cleanup when index doesn't exist""" with patch('pinecone.Pinecone') as mock_pinecone: # Setup mock mock_pc = mock_pinecone.return_value mock_pc.list_indexes.return_value.names.return_value = [] # Create instance and test rag_prep = RAGPrep() result = rag_prep.cleanup_index() # Assertions self.assertTrue(result) mock_pc.Index.assert_not_called() def test_cleanup_index_error(self): """Test cleanup with error""" with patch('pinecone.Pinecone') as mock_pinecone: # Setup mock to raise exception mock_pc = mock_pinecone.return_value mock_pc.list_indexes.return_value.names.return_value = ["test-index"] mock_pc.Index.side_effect = Exception("Test error") # Create instance and test rag_prep = RAGPrep() result = rag_prep.cleanup_index() # Assertions self.assertFalse(result) if __name__ == '__main__': unittest.main()