File size: 849 Bytes
d514965
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from dataclasses import dataclass
from typing import List, Tuple

from _utils.langchain_utils.Splitter_class import Splitter
from _utils.models.gerar_documento import DocumentChunk


@dataclass
class HandleFilesClass:
    async def get_full_text_and_all_PDFs_chunks(
        self,
        listaPDFs: List[str],
        splitterObject: Splitter,
        should_use_llama_parse: bool,
        isBubble: bool,
    ) -> Tuple[List[DocumentChunk], List[str]]:
        all_PDFs_chunks: List[DocumentChunk] = []

        pages: List[str] = []

        # Load and process document
        for pdf_path in listaPDFs:
            chunks, pages = await splitterObject.load_and_split_document(
                pdf_path, should_use_llama_parse, isBubble
            )
            all_PDFs_chunks = all_PDFs_chunks + chunks

        return all_PDFs_chunks, pages