import pytest import os from _utils.splitters.Splitter_class import Splitter from _utils.models.gerar_relatorio import ( DocumentChunk, ) base_dir = os.path.dirname(os.path.abspath(__file__)) chunk_size = 1000 chunk_overlap = 200 cwd = os.getcwd() pdf_file = os.path.join(cwd, "tests", "fixtures", "_pdf-uma-pagina.pdf") class TestSplitters: splitter = Splitter(chunk_size, chunk_overlap) @pytest.mark.asyncio async def test_load_and_split_document_No_llama_parse_No_Bubble(self, monkeypatch): should_use_llama_parse = False isBubble = False result_chunks, result_strings = await self.splitter.load_and_split_document( pdf_file, should_use_llama_parse, isBubble ) assert isinstance(result_chunks, list) assert isinstance(result_strings, list) assert len(result_chunks) > 0 assert len(result_strings) > 0 assert all(isinstance(item, str) for item in result_strings) assert all(isinstance(item, DocumentChunk) for item in result_chunks) assert all( (chunk_size - 100) < len(item.content) < (chunk_size + 100) for item in result_chunks ) @pytest.mark.asyncio async def test_load_and_split_document_No_llama_parse_No_Bubble_with_bigger_chunk( self, monkeypatch ): should_use_llama_parse = False isBubble = False chunk_size = 3500 splitter_temp = Splitter(chunk_size, chunk_overlap) result_chunks, result_strings = await splitter_temp.load_and_split_document( pdf_file, should_use_llama_parse, isBubble ) assert isinstance(result_chunks, list) assert isinstance(result_strings, list) assert len(result_chunks) > 0 assert len(result_strings) > 0 assert all(isinstance(item, str) for item in result_strings) assert all(isinstance(item, DocumentChunk) for item in result_chunks) assert all( (chunk_size - 200) < len(item.content) < (chunk_size + 200) for item in result_chunks ) @pytest.mark.asyncio async def test_load_and_split_document_With_llama_parse_No_Bubble( self, monkeypatch ): should_use_llama_parse = True isBubble = False result_chunks, result_strings = await self.splitter.load_and_split_document( pdf_file, should_use_llama_parse, isBubble ) assert isinstance(result_chunks, list) assert isinstance(result_strings, list) assert len(result_chunks) > 0 assert len(result_strings) > 0 assert all(isinstance(item, str) for item in result_strings) assert all(isinstance(item, DocumentChunk) for item in result_chunks) # Teste abaixo não passa ainda --> Será consertado no futuro # assert all( # (chunk_size - 100) < len(item.content) < (chunk_size + 100) # for item in result_chunks # )