File size: 6,764 Bytes
965ac15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import unittest
from unittest.mock import MagicMock, patch, mock_open
import pinecone
from langchain.schema import Document
from core.rag_engine import RAGPrep
from typing import List, Dict, Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_openai import OpenAIEmbeddings
import pinecone
from tqdm.auto import tqdm
from langchain.schema import Document
from config import get_settings


class TestRAGPrep(unittest.TestCase):
    def setUp(self):
        """Set up test fixtures"""
        self.settings = get_settings()
        self.mock_settings = MagicMock()
        self.mock_settings.INDEX_NAME = "test-index"
        self.mock_settings.PINECONE_API_KEY = self.settings.PINECONE_API_KEY
        self.mock_settings.CLOUD = "aws"
        self.mock_settings.REGION = "us-east-1"
        self.mock_settings.PDF_DIRECTORY = self.settings.PDF_DIRECTORY
        self.mock_settings.CHUNK_SIZE = 1000
        self.mock_settings.CHUNK_OVERLAP = 200
        self.mock_settings.DIMENSIONS = 1536
        self.mock_settings.OPENAI_API_KEY = self.settings.OPENAI_API_KEY

        # Create patcher for get_settings and other dependencies
        self.settings_patcher = patch('core.rag_engine.get_settings', return_value=self.mock_settings)
        self.embeddings_patcher = patch('core.rag_engine.OpenAIEmbeddings')
        self.pinecone_patcher = patch('core.rag_engine.pinecone.Pinecone')
        
        # Start all patchers
        self.mock_get_settings = self.settings_patcher.start()
        self.mock_embeddings = self.embeddings_patcher.start()
        self.mock_pinecone = self.pinecone_patcher.start()
        
    def tearDown(self):
        """Clean up after tests"""
        self.settings_patcher.stop()
        self.embeddings_patcher.stop()
        self.pinecone_patcher.stop()

    def test_init(self):
        """Test RAGPrep initialization"""
        # Create instance
        rag_prep = RAGPrep()
        
        # Assert initialization
        self.assertEqual(rag_prep.index_name, "test-index")
        self.assertEqual(rag_prep.settings, self.mock_settings)
        self.mock_pinecone.assert_called_once_with(self.mock_settings.PINECONE_API_KEY)
        self.mock_embeddings.assert_called_once_with(openai_api_key=self.mock_settings.OPENAI_API_KEY)

    @patch('core.rag_engine.DirectoryLoader')
    def test_load_and_split_pdfs(self, mock_loader_class):
        """Test PDF loading and splitting"""
        # Setup mock documents
        mock_docs = [
            Document(page_content="Test content 1", metadata={"source": "test1.pdf", "page": 1}),
            Document(page_content="Test content 2", metadata={"source": "test2.pdf", "page": 1})
        ]
        
        # Configure the mock loader
        mock_loader_instance = MagicMock()
        mock_loader_instance.load.return_value = mock_docs
        mock_loader_class.return_value = mock_loader_instance
        
        # Create instance and test
        rag_prep = RAGPrep()
        chunks = rag_prep.load_and_split_pdfs()
        
        # Assertions
        self.assertIsInstance(chunks, list)
        mock_loader_class.assert_called_once_with(
            self.mock_settings.PDF_DIRECTORY,
            glob="**/*.pdf",
            loader_cls=PyPDFLoader
        )
        mock_loader_instance.load.assert_called_once()

    def test_process_and_upload(self):
        """Test processing and uploading documents"""
        # Setup mock documents
        mock_docs = [
            Document(page_content="Test 1", metadata={"source": "test.pdf", "page": 1}),
            Document(page_content="Test 2", metadata={"source": "test.pdf", "page": 2})
        ]
        
        # Create mock embeddings instance
        mock_embeddings_instance = MagicMock()
        mock_embeddings_instance.embed_documents.return_value = [[0.1] * 1536, [0.2] * 1536]
        self.mock_embeddings.return_value = mock_embeddings_instance
        
        # Mock the index
        mock_index = MagicMock()
        self.mock_pinecone.return_value.Index.return_value = mock_index
        
        # Mock load_and_split_pdfs
        with patch.object(RAGPrep, 'load_and_split_pdfs', return_value=mock_docs):
            # Create instance and test
            rag_prep = RAGPrep()
            rag_prep.process_and_upload()
            
            # Assertions
            mock_embeddings_instance.embed_documents.assert_called_once()
            self.assertTrue(mock_index.upsert.called)
            # Verify the format of the upsert call
            called_args = mock_index.upsert.call_args[1]['vectors']
            self.assertEqual(len(called_args), 2)  # Two documents
            self.assertTrue(all(len(v[1]) == 1536 for v in called_args))  
    def test_cleanup_index_success(self):
        """Test successful index cleanup"""
        with patch('pinecone.Pinecone') as mock_pinecone:
            # Setup mock
            mock_pc = mock_pinecone.return_value
            mock_pc.list_indexes.return_value.names.return_value = ["test-index"]
            mock_index = MagicMock()
            mock_pc.Index.return_value = mock_index
            
            # Create instance and test
            rag_prep = RAGPrep()
            result = rag_prep.cleanup_index()
            
            # Assertions
            self.assertTrue(result)
            mock_index.delete.assert_called_once_with(delete_all=True)

    def test_cleanup_index_no_index(self):
        """Test cleanup when index doesn't exist"""
        with patch('pinecone.Pinecone') as mock_pinecone:
            # Setup mock
            mock_pc = mock_pinecone.return_value
            mock_pc.list_indexes.return_value.names.return_value = []
            
            # Create instance and test
            rag_prep = RAGPrep()
            result = rag_prep.cleanup_index()
            
            # Assertions
            self.assertTrue(result)
            mock_pc.Index.assert_not_called()

    def test_cleanup_index_error(self):
        """Test cleanup with error"""
        with patch('pinecone.Pinecone') as mock_pinecone:
            # Setup mock to raise exception
            mock_pc = mock_pinecone.return_value
            mock_pc.list_indexes.return_value.names.return_value = ["test-index"]
            mock_pc.Index.side_effect = Exception("Test error")
            
            # Create instance and test
            rag_prep = RAGPrep()
            result = rag_prep.cleanup_index()
            
            # Assertions
            self.assertFalse(result)

if __name__ == '__main__':
    unittest.main()