from typing import Generator, List def chunk_document( doc: str, desired_chunk_size: int, max_chunk_size: int ) -> Generator[str, None, None]: chunk = '' for line in doc.splitlines(): chunk += line + '\n' if len(chunk) >= desired_chunk_size: yield chunk[:max_chunk_size] chunk = '' if chunk: yield chunk def chunk_documents( docs: List[str], desired_chunk_size: int = 500, max_chunk_size: int = 3000 ) -> List[str]: chunks = [] for doc in docs: chunks += list(chunk_document( doc=doc, desired_chunk_size=desired_chunk_size, max_chunk_size=max_chunk_size )) return chunks