Spaces:
Sleeping
Sleeping
from langchain_community.document_loaders import PyMuPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
def load_and_split_pdf(filepath): | |
loader = PyMuPDFLoader(filepath) # Use PyMuPDFLoader instead | |
documents = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
splits = text_splitter.split_documents(documents) | |
return splits | |