JohnsonMLEngineer commited on
Commit
4ac5387
·
verified ·
1 Parent(s): 9b05a53

Update document_processor.py

Browse files
Files changed (1) hide show
  1. document_processor.py +2 -3
document_processor.py CHANGED
@@ -1,9 +1,8 @@
1
- from langchain_community.document_loaders import PyPDFDirectoryLoader
2
  from langchain.text_splitter import RecursiveCharacterTextSplitter
3
 
4
- directory = PyPDFDirectoryLoader("documents/")
5
  def read_documents(directory):
6
- return PyPDFDirectoryLoader(directory).load()
7
 
8
  def chunk_data(docs, chunk_size=800, chunk_overlap=40):
9
  return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap).split_documents(docs)
 
1
+ from langchain_community.document_loaders import PyPDFLoader
2
  from langchain.text_splitter import RecursiveCharacterTextSplitter
3
 
 
4
  def read_documents(directory):
5
+ return PyPDFLoader(directory).load()
6
 
7
  def chunk_data(docs, chunk_size=800, chunk_overlap=40):
8
  return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap).split_documents(docs)