File size: 373 Bytes
4ac5387
479a9a1
 
 
4ac5387
479a9a1
 
 
1
2
3
4
5
6
7
8
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

def read_documents(directory):
    return PyPDFLoader(directory).load()

def chunk_data(docs, chunk_size=800, chunk_overlap=40):
    return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap).split_documents(docs)