Spaces:
Sleeping
Sleeping
Update PDF_Reader.py
Browse files- PDF_Reader.py +2 -1
PDF_Reader.py
CHANGED
@@ -2,6 +2,7 @@ from langchain_experimental.text_splitter import SemanticChunker
|
|
2 |
from langchain_chroma import Chroma
|
3 |
from langchain_community.document_loaders import PyPDFLoader
|
4 |
from langchain.embeddings import HuggingFaceEmbeddings
|
|
|
5 |
embedding_modelPath = "sentence-transformers/all-MiniLM-l6-v2"
|
6 |
embeddings = HuggingFaceEmbeddings(model_name=embedding_modelPath,model_kwargs = {'device':'cpu'},encode_kwargs = {'normalize_embeddings': False})
|
7 |
|
@@ -20,7 +21,7 @@ def replace_t_with_space(list_of_documents):
|
|
20 |
doc.page_content = doc.page_content.replace('\t', ' ') # Replace tabs with spaces
|
21 |
return list_of_documents
|
22 |
|
23 |
-
def read_pdf(
|
24 |
loader = PyPDFLoader(pdf_path)
|
25 |
docs = loader.load()
|
26 |
print("Total Documents :",len(docs))
|
|
|
2 |
from langchain_chroma import Chroma
|
3 |
from langchain_community.document_loaders import PyPDFLoader
|
4 |
from langchain.embeddings import HuggingFaceEmbeddings
|
5 |
+
|
6 |
embedding_modelPath = "sentence-transformers/all-MiniLM-l6-v2"
|
7 |
embeddings = HuggingFaceEmbeddings(model_name=embedding_modelPath,model_kwargs = {'device':'cpu'},encode_kwargs = {'normalize_embeddings': False})
|
8 |
|
|
|
21 |
doc.page_content = doc.page_content.replace('\t', ' ') # Replace tabs with spaces
|
22 |
return list_of_documents
|
23 |
|
24 |
+
def read_pdf(pdf_path):
|
25 |
loader = PyPDFLoader(pdf_path)
|
26 |
docs = loader.load()
|
27 |
print("Total Documents :",len(docs))
|