Spaces:
Running
Running
File size: 849 Bytes
d514965 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from dataclasses import dataclass
from typing import List, Tuple
from _utils.langchain_utils.Splitter_class import Splitter
from _utils.models.gerar_documento import DocumentChunk
@dataclass
class HandleFilesClass:
async def get_full_text_and_all_PDFs_chunks(
self,
listaPDFs: List[str],
splitterObject: Splitter,
should_use_llama_parse: bool,
isBubble: bool,
) -> Tuple[List[DocumentChunk], List[str]]:
all_PDFs_chunks: List[DocumentChunk] = []
pages: List[str] = []
# Load and process document
for pdf_path in listaPDFs:
chunks, pages = await splitterObject.load_and_split_document(
pdf_path, should_use_llama_parse, isBubble
)
all_PDFs_chunks = all_PDFs_chunks + chunks
return all_PDFs_chunks, pages
|