Spaces:
Sleeping
Sleeping
File size: 373 Bytes
4ac5387 479a9a1 4ac5387 479a9a1 |
1 2 3 4 5 6 7 8 |
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
def read_documents(directory):
return PyPDFLoader(directory).load()
def chunk_data(docs, chunk_size=800, chunk_overlap=40):
return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap).split_documents(docs) |