Samarth991 commited on
Commit
d45c8e8
·
verified ·
1 Parent(s): 1d73ddf

Update PDF_Reader.py

Browse files
Files changed (1) hide show
  1. PDF_Reader.py +2 -1
PDF_Reader.py CHANGED
@@ -2,6 +2,7 @@ from langchain_experimental.text_splitter import SemanticChunker
2
  from langchain_chroma import Chroma
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain.embeddings import HuggingFaceEmbeddings
 
5
  embedding_modelPath = "sentence-transformers/all-MiniLM-l6-v2"
6
  embeddings = HuggingFaceEmbeddings(model_name=embedding_modelPath,model_kwargs = {'device':'cpu'},encode_kwargs = {'normalize_embeddings': False})
7
 
@@ -20,7 +21,7 @@ def replace_t_with_space(list_of_documents):
20
  doc.page_content = doc.page_content.replace('\t', ' ') # Replace tabs with spaces
21
  return list_of_documents
22
 
23
- def read_pdf(uploaded_file):
24
  loader = PyPDFLoader(pdf_path)
25
  docs = loader.load()
26
  print("Total Documents :",len(docs))
 
2
  from langchain_chroma import Chroma
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain.embeddings import HuggingFaceEmbeddings
5
+
6
  embedding_modelPath = "sentence-transformers/all-MiniLM-l6-v2"
7
  embeddings = HuggingFaceEmbeddings(model_name=embedding_modelPath,model_kwargs = {'device':'cpu'},encode_kwargs = {'normalize_embeddings': False})
8
 
 
21
  doc.page_content = doc.page_content.replace('\t', ' ') # Replace tabs with spaces
22
  return list_of_documents
23
 
24
+ def read_pdf(pdf_path):
25
  loader = PyPDFLoader(pdf_path)
26
  docs = loader.load()
27
  print("Total Documents :",len(docs))