Spaces:
Sleeping
Sleeping
Update document_processor.py
Browse files- document_processor.py +2 -3
document_processor.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
-
from langchain_community.document_loaders import
|
2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
|
4 |
-
directory = PyPDFDirectoryLoader("documents/")
|
5 |
def read_documents(directory):
|
6 |
-
return
|
7 |
|
8 |
def chunk_data(docs, chunk_size=800, chunk_overlap=40):
|
9 |
return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap).split_documents(docs)
|
|
|
1 |
+
from langchain_community.document_loaders import PyPDFLoader
|
2 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
3 |
|
|
|
4 |
def read_documents(directory):
|
5 |
+
return PyPDFLoader(directory).load()
|
6 |
|
7 |
def chunk_data(docs, chunk_size=800, chunk_overlap=40):
|
8 |
return RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap).split_documents(docs)
|