Spaces:
Sleeping
Sleeping
from .asg_loader import DocumentLoading | |
from langchain_text_splitters import RecursiveCharacterTextSplitter | |
class TextSplitting: | |
def mineru_recursive_splitter(self, file_path, survey_id, mode): | |
docs = DocumentLoading().load_pdf(file_path, survey_id, mode) | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=400, | |
chunk_overlap=30, | |
length_function=len, | |
is_separator_regex=False, | |
) | |
texts = text_splitter.create_documents([docs]) | |
return texts | |
def pypdf_recursive_splitter(self, file_path, survey_id): | |
docs = DocumentLoading().pypdf_loader(file_path, survey_id) | |
text_splitter = RecursiveCharacterTextSplitter( | |
chunk_size=300, | |
chunk_overlap=20, | |
length_function=len, | |
is_separator_regex=False, | |
) | |
texts = text_splitter.create_documents([docs]) | |
return texts |