from dataclasses import dataclass from typing import List, Tuple from _utils.langchain_utils.Splitter_class import Splitter from _utils.models.gerar_documento import DocumentChunk @dataclass class HandleFilesClass: async def get_full_text_and_all_PDFs_chunks( self, listaPDFs: List[str], splitterObject: Splitter, should_use_llama_parse: bool, isBubble: bool, ) -> Tuple[List[DocumentChunk], List[str]]: all_PDFs_chunks: List[DocumentChunk] = [] pages: List[str] = [] # Load and process document for pdf_path in listaPDFs: chunks, pages = await splitterObject.load_and_split_document( pdf_path, should_use_llama_parse, isBubble ) all_PDFs_chunks = all_PDFs_chunks + chunks return all_PDFs_chunks, pages