Spaces:
Build error
Build error
import sys | |
import os | |
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
import unittest | |
from unittest.mock import MagicMock, patch, mock_open | |
import pinecone | |
from langchain.schema import Document | |
from core.rag_engine import RAGPrep | |
from typing import List, Dict, Optional | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader | |
from langchain_openai import OpenAIEmbeddings | |
import pinecone | |
from tqdm.auto import tqdm | |
from langchain.schema import Document | |
from config import get_settings | |
class TestRAGPrep(unittest.TestCase): | |
def setUp(self): | |
"""Set up test fixtures""" | |
self.settings = get_settings() | |
self.mock_settings = MagicMock() | |
self.mock_settings.INDEX_NAME = "test-index" | |
self.mock_settings.PINECONE_API_KEY = self.settings.PINECONE_API_KEY | |
self.mock_settings.CLOUD = "aws" | |
self.mock_settings.REGION = "us-east-1" | |
self.mock_settings.PDF_DIRECTORY = self.settings.PDF_DIRECTORY | |
self.mock_settings.CHUNK_SIZE = 1000 | |
self.mock_settings.CHUNK_OVERLAP = 200 | |
self.mock_settings.DIMENSIONS = 1536 | |
self.mock_settings.OPENAI_API_KEY = self.settings.OPENAI_API_KEY | |
# Create patcher for get_settings and other dependencies | |
self.settings_patcher = patch('core.rag_engine.get_settings', return_value=self.mock_settings) | |
self.embeddings_patcher = patch('core.rag_engine.OpenAIEmbeddings') | |
self.pinecone_patcher = patch('core.rag_engine.pinecone.Pinecone') | |
# Start all patchers | |
self.mock_get_settings = self.settings_patcher.start() | |
self.mock_embeddings = self.embeddings_patcher.start() | |
self.mock_pinecone = self.pinecone_patcher.start() | |
def tearDown(self): | |
"""Clean up after tests""" | |
self.settings_patcher.stop() | |
self.embeddings_patcher.stop() | |
self.pinecone_patcher.stop() | |
def test_init(self): | |
"""Test RAGPrep initialization""" | |
# Create instance | |
rag_prep = RAGPrep() | |
# Assert initialization | |
self.assertEqual(rag_prep.index_name, "test-index") | |
self.assertEqual(rag_prep.settings, self.mock_settings) | |
self.mock_pinecone.assert_called_once_with(self.mock_settings.PINECONE_API_KEY) | |
self.mock_embeddings.assert_called_once_with(openai_api_key=self.mock_settings.OPENAI_API_KEY) | |
def test_load_and_split_pdfs(self, mock_loader_class): | |
"""Test PDF loading and splitting""" | |
# Setup mock documents | |
mock_docs = [ | |
Document(page_content="Test content 1", metadata={"source": "test1.pdf", "page": 1}), | |
Document(page_content="Test content 2", metadata={"source": "test2.pdf", "page": 1}) | |
] | |
# Configure the mock loader | |
mock_loader_instance = MagicMock() | |
mock_loader_instance.load.return_value = mock_docs | |
mock_loader_class.return_value = mock_loader_instance | |
# Create instance and test | |
rag_prep = RAGPrep() | |
chunks = rag_prep.load_and_split_pdfs() | |
# Assertions | |
self.assertIsInstance(chunks, list) | |
mock_loader_class.assert_called_once_with( | |
self.mock_settings.PDF_DIRECTORY, | |
glob="**/*.pdf", | |
loader_cls=PyPDFLoader | |
) | |
mock_loader_instance.load.assert_called_once() | |
def test_process_and_upload(self): | |
"""Test processing and uploading documents""" | |
# Setup mock documents | |
mock_docs = [ | |
Document(page_content="Test 1", metadata={"source": "test.pdf", "page": 1}), | |
Document(page_content="Test 2", metadata={"source": "test.pdf", "page": 2}) | |
] | |
# Create mock embeddings instance | |
mock_embeddings_instance = MagicMock() | |
mock_embeddings_instance.embed_documents.return_value = [[0.1] * 1536, [0.2] * 1536] | |
self.mock_embeddings.return_value = mock_embeddings_instance | |
# Mock the index | |
mock_index = MagicMock() | |
self.mock_pinecone.return_value.Index.return_value = mock_index | |
# Mock load_and_split_pdfs | |
with patch.object(RAGPrep, 'load_and_split_pdfs', return_value=mock_docs): | |
# Create instance and test | |
rag_prep = RAGPrep() | |
rag_prep.process_and_upload() | |
# Assertions | |
mock_embeddings_instance.embed_documents.assert_called_once() | |
self.assertTrue(mock_index.upsert.called) | |
# Verify the format of the upsert call | |
called_args = mock_index.upsert.call_args[1]['vectors'] | |
self.assertEqual(len(called_args), 2) # Two documents | |
self.assertTrue(all(len(v[1]) == 1536 for v in called_args)) | |
def test_cleanup_index_success(self): | |
"""Test successful index cleanup""" | |
with patch('pinecone.Pinecone') as mock_pinecone: | |
# Setup mock | |
mock_pc = mock_pinecone.return_value | |
mock_pc.list_indexes.return_value.names.return_value = ["test-index"] | |
mock_index = MagicMock() | |
mock_pc.Index.return_value = mock_index | |
# Create instance and test | |
rag_prep = RAGPrep() | |
result = rag_prep.cleanup_index() | |
# Assertions | |
self.assertTrue(result) | |
mock_index.delete.assert_called_once_with(delete_all=True) | |
def test_cleanup_index_no_index(self): | |
"""Test cleanup when index doesn't exist""" | |
with patch('pinecone.Pinecone') as mock_pinecone: | |
# Setup mock | |
mock_pc = mock_pinecone.return_value | |
mock_pc.list_indexes.return_value.names.return_value = [] | |
# Create instance and test | |
rag_prep = RAGPrep() | |
result = rag_prep.cleanup_index() | |
# Assertions | |
self.assertTrue(result) | |
mock_pc.Index.assert_not_called() | |
def test_cleanup_index_error(self): | |
"""Test cleanup with error""" | |
with patch('pinecone.Pinecone') as mock_pinecone: | |
# Setup mock to raise exception | |
mock_pc = mock_pinecone.return_value | |
mock_pc.list_indexes.return_value.names.return_value = ["test-index"] | |
mock_pc.Index.side_effect = Exception("Test error") | |
# Create instance and test | |
rag_prep = RAGPrep() | |
result = rag_prep.cleanup_index() | |
# Assertions | |
self.assertFalse(result) | |
if __name__ == '__main__': | |
unittest.main() |