Spaces:
Build error
Build error
File size: 6,764 Bytes
965ac15 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import unittest
from unittest.mock import MagicMock, patch, mock_open
import pinecone
from langchain.schema import Document
from core.rag_engine import RAGPrep
from typing import List, Dict, Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_openai import OpenAIEmbeddings
import pinecone
from tqdm.auto import tqdm
from langchain.schema import Document
from config import get_settings
class TestRAGPrep(unittest.TestCase):
def setUp(self):
"""Set up test fixtures"""
self.settings = get_settings()
self.mock_settings = MagicMock()
self.mock_settings.INDEX_NAME = "test-index"
self.mock_settings.PINECONE_API_KEY = self.settings.PINECONE_API_KEY
self.mock_settings.CLOUD = "aws"
self.mock_settings.REGION = "us-east-1"
self.mock_settings.PDF_DIRECTORY = self.settings.PDF_DIRECTORY
self.mock_settings.CHUNK_SIZE = 1000
self.mock_settings.CHUNK_OVERLAP = 200
self.mock_settings.DIMENSIONS = 1536
self.mock_settings.OPENAI_API_KEY = self.settings.OPENAI_API_KEY
# Create patcher for get_settings and other dependencies
self.settings_patcher = patch('core.rag_engine.get_settings', return_value=self.mock_settings)
self.embeddings_patcher = patch('core.rag_engine.OpenAIEmbeddings')
self.pinecone_patcher = patch('core.rag_engine.pinecone.Pinecone')
# Start all patchers
self.mock_get_settings = self.settings_patcher.start()
self.mock_embeddings = self.embeddings_patcher.start()
self.mock_pinecone = self.pinecone_patcher.start()
def tearDown(self):
"""Clean up after tests"""
self.settings_patcher.stop()
self.embeddings_patcher.stop()
self.pinecone_patcher.stop()
def test_init(self):
"""Test RAGPrep initialization"""
# Create instance
rag_prep = RAGPrep()
# Assert initialization
self.assertEqual(rag_prep.index_name, "test-index")
self.assertEqual(rag_prep.settings, self.mock_settings)
self.mock_pinecone.assert_called_once_with(self.mock_settings.PINECONE_API_KEY)
self.mock_embeddings.assert_called_once_with(openai_api_key=self.mock_settings.OPENAI_API_KEY)
@patch('core.rag_engine.DirectoryLoader')
def test_load_and_split_pdfs(self, mock_loader_class):
"""Test PDF loading and splitting"""
# Setup mock documents
mock_docs = [
Document(page_content="Test content 1", metadata={"source": "test1.pdf", "page": 1}),
Document(page_content="Test content 2", metadata={"source": "test2.pdf", "page": 1})
]
# Configure the mock loader
mock_loader_instance = MagicMock()
mock_loader_instance.load.return_value = mock_docs
mock_loader_class.return_value = mock_loader_instance
# Create instance and test
rag_prep = RAGPrep()
chunks = rag_prep.load_and_split_pdfs()
# Assertions
self.assertIsInstance(chunks, list)
mock_loader_class.assert_called_once_with(
self.mock_settings.PDF_DIRECTORY,
glob="**/*.pdf",
loader_cls=PyPDFLoader
)
mock_loader_instance.load.assert_called_once()
def test_process_and_upload(self):
"""Test processing and uploading documents"""
# Setup mock documents
mock_docs = [
Document(page_content="Test 1", metadata={"source": "test.pdf", "page": 1}),
Document(page_content="Test 2", metadata={"source": "test.pdf", "page": 2})
]
# Create mock embeddings instance
mock_embeddings_instance = MagicMock()
mock_embeddings_instance.embed_documents.return_value = [[0.1] * 1536, [0.2] * 1536]
self.mock_embeddings.return_value = mock_embeddings_instance
# Mock the index
mock_index = MagicMock()
self.mock_pinecone.return_value.Index.return_value = mock_index
# Mock load_and_split_pdfs
with patch.object(RAGPrep, 'load_and_split_pdfs', return_value=mock_docs):
# Create instance and test
rag_prep = RAGPrep()
rag_prep.process_and_upload()
# Assertions
mock_embeddings_instance.embed_documents.assert_called_once()
self.assertTrue(mock_index.upsert.called)
# Verify the format of the upsert call
called_args = mock_index.upsert.call_args[1]['vectors']
self.assertEqual(len(called_args), 2) # Two documents
self.assertTrue(all(len(v[1]) == 1536 for v in called_args))
def test_cleanup_index_success(self):
"""Test successful index cleanup"""
with patch('pinecone.Pinecone') as mock_pinecone:
# Setup mock
mock_pc = mock_pinecone.return_value
mock_pc.list_indexes.return_value.names.return_value = ["test-index"]
mock_index = MagicMock()
mock_pc.Index.return_value = mock_index
# Create instance and test
rag_prep = RAGPrep()
result = rag_prep.cleanup_index()
# Assertions
self.assertTrue(result)
mock_index.delete.assert_called_once_with(delete_all=True)
def test_cleanup_index_no_index(self):
"""Test cleanup when index doesn't exist"""
with patch('pinecone.Pinecone') as mock_pinecone:
# Setup mock
mock_pc = mock_pinecone.return_value
mock_pc.list_indexes.return_value.names.return_value = []
# Create instance and test
rag_prep = RAGPrep()
result = rag_prep.cleanup_index()
# Assertions
self.assertTrue(result)
mock_pc.Index.assert_not_called()
def test_cleanup_index_error(self):
"""Test cleanup with error"""
with patch('pinecone.Pinecone') as mock_pinecone:
# Setup mock to raise exception
mock_pc = mock_pinecone.return_value
mock_pc.list_indexes.return_value.names.return_value = ["test-index"]
mock_pc.Index.side_effect = Exception("Test error")
# Create instance and test
rag_prep = RAGPrep()
result = rag_prep.cleanup_index()
# Assertions
self.assertFalse(result)
if __name__ == '__main__':
unittest.main() |