sehatech-demo / tests /test_pinecone_rag.py
larawehbe's picture
Upload folder using huggingface_hub
965ac15 verified
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import unittest
from unittest.mock import MagicMock, patch, mock_open
import pinecone
from langchain.schema import Document
from core.rag_engine import RAGPrep
from typing import List, Dict, Optional
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_openai import OpenAIEmbeddings
import pinecone
from tqdm.auto import tqdm
from langchain.schema import Document
from config import get_settings
class TestRAGPrep(unittest.TestCase):
def setUp(self):
"""Set up test fixtures"""
self.settings = get_settings()
self.mock_settings = MagicMock()
self.mock_settings.INDEX_NAME = "test-index"
self.mock_settings.PINECONE_API_KEY = self.settings.PINECONE_API_KEY
self.mock_settings.CLOUD = "aws"
self.mock_settings.REGION = "us-east-1"
self.mock_settings.PDF_DIRECTORY = self.settings.PDF_DIRECTORY
self.mock_settings.CHUNK_SIZE = 1000
self.mock_settings.CHUNK_OVERLAP = 200
self.mock_settings.DIMENSIONS = 1536
self.mock_settings.OPENAI_API_KEY = self.settings.OPENAI_API_KEY
# Create patcher for get_settings and other dependencies
self.settings_patcher = patch('core.rag_engine.get_settings', return_value=self.mock_settings)
self.embeddings_patcher = patch('core.rag_engine.OpenAIEmbeddings')
self.pinecone_patcher = patch('core.rag_engine.pinecone.Pinecone')
# Start all patchers
self.mock_get_settings = self.settings_patcher.start()
self.mock_embeddings = self.embeddings_patcher.start()
self.mock_pinecone = self.pinecone_patcher.start()
def tearDown(self):
"""Clean up after tests"""
self.settings_patcher.stop()
self.embeddings_patcher.stop()
self.pinecone_patcher.stop()
def test_init(self):
"""Test RAGPrep initialization"""
# Create instance
rag_prep = RAGPrep()
# Assert initialization
self.assertEqual(rag_prep.index_name, "test-index")
self.assertEqual(rag_prep.settings, self.mock_settings)
self.mock_pinecone.assert_called_once_with(self.mock_settings.PINECONE_API_KEY)
self.mock_embeddings.assert_called_once_with(openai_api_key=self.mock_settings.OPENAI_API_KEY)
@patch('core.rag_engine.DirectoryLoader')
def test_load_and_split_pdfs(self, mock_loader_class):
"""Test PDF loading and splitting"""
# Setup mock documents
mock_docs = [
Document(page_content="Test content 1", metadata={"source": "test1.pdf", "page": 1}),
Document(page_content="Test content 2", metadata={"source": "test2.pdf", "page": 1})
]
# Configure the mock loader
mock_loader_instance = MagicMock()
mock_loader_instance.load.return_value = mock_docs
mock_loader_class.return_value = mock_loader_instance
# Create instance and test
rag_prep = RAGPrep()
chunks = rag_prep.load_and_split_pdfs()
# Assertions
self.assertIsInstance(chunks, list)
mock_loader_class.assert_called_once_with(
self.mock_settings.PDF_DIRECTORY,
glob="**/*.pdf",
loader_cls=PyPDFLoader
)
mock_loader_instance.load.assert_called_once()
def test_process_and_upload(self):
"""Test processing and uploading documents"""
# Setup mock documents
mock_docs = [
Document(page_content="Test 1", metadata={"source": "test.pdf", "page": 1}),
Document(page_content="Test 2", metadata={"source": "test.pdf", "page": 2})
]
# Create mock embeddings instance
mock_embeddings_instance = MagicMock()
mock_embeddings_instance.embed_documents.return_value = [[0.1] * 1536, [0.2] * 1536]
self.mock_embeddings.return_value = mock_embeddings_instance
# Mock the index
mock_index = MagicMock()
self.mock_pinecone.return_value.Index.return_value = mock_index
# Mock load_and_split_pdfs
with patch.object(RAGPrep, 'load_and_split_pdfs', return_value=mock_docs):
# Create instance and test
rag_prep = RAGPrep()
rag_prep.process_and_upload()
# Assertions
mock_embeddings_instance.embed_documents.assert_called_once()
self.assertTrue(mock_index.upsert.called)
# Verify the format of the upsert call
called_args = mock_index.upsert.call_args[1]['vectors']
self.assertEqual(len(called_args), 2) # Two documents
self.assertTrue(all(len(v[1]) == 1536 for v in called_args))
def test_cleanup_index_success(self):
"""Test successful index cleanup"""
with patch('pinecone.Pinecone') as mock_pinecone:
# Setup mock
mock_pc = mock_pinecone.return_value
mock_pc.list_indexes.return_value.names.return_value = ["test-index"]
mock_index = MagicMock()
mock_pc.Index.return_value = mock_index
# Create instance and test
rag_prep = RAGPrep()
result = rag_prep.cleanup_index()
# Assertions
self.assertTrue(result)
mock_index.delete.assert_called_once_with(delete_all=True)
def test_cleanup_index_no_index(self):
"""Test cleanup when index doesn't exist"""
with patch('pinecone.Pinecone') as mock_pinecone:
# Setup mock
mock_pc = mock_pinecone.return_value
mock_pc.list_indexes.return_value.names.return_value = []
# Create instance and test
rag_prep = RAGPrep()
result = rag_prep.cleanup_index()
# Assertions
self.assertTrue(result)
mock_pc.Index.assert_not_called()
def test_cleanup_index_error(self):
"""Test cleanup with error"""
with patch('pinecone.Pinecone') as mock_pinecone:
# Setup mock to raise exception
mock_pc = mock_pinecone.return_value
mock_pc.list_indexes.return_value.names.return_value = ["test-index"]
mock_pc.Index.side_effect = Exception("Test error")
# Create instance and test
rag_prep = RAGPrep()
result = rag_prep.cleanup_index()
# Assertions
self.assertFalse(result)
if __name__ == '__main__':
unittest.main()