Samarth991 commited on
Commit
a0b6dfc
·
1 Parent(s): 6a1b5d7
Files changed (1) hide show
  1. PDF_Reader.py +3 -1
PDF_Reader.py CHANGED
@@ -5,6 +5,7 @@ from langchain_chroma import Chroma
5
  from langchain_community.document_loaders import PyPDFLoader
6
  from langchain.embeddings import HuggingFaceEmbeddings
7
  from PyPDF2 import PdfReader
 
8
 
9
  embedding_modelPath = "sentence-transformers/all-MiniLM-l6-v2"
10
  embeddings = HuggingFaceEmbeddings(model_name=embedding_modelPath,model_kwargs = {'device':'cpu'},encode_kwargs = {'normalize_embeddings': False})
@@ -32,6 +33,7 @@ def read_pdf_text(pdf_path):
32
 
33
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
34
  text_chunks = text_splitter.split_text(text)
 
35
  return text_chunks
36
 
37
  def read_pdf(pdf_path):
@@ -50,7 +52,7 @@ def PDF_4_QA(file_path):
50
  #docs = read_pdf(file_path)
51
  #cleaned_docs = Chunks(docs)
52
  cleaned_docs = read_pdf_text(file_path)
53
- vectordb = Chroma.from_documents(
54
  documents=cleaned_docs,
55
  embedding=embeddings,
56
  persist_directory="Chroma/docs"
 
5
  from langchain_community.document_loaders import PyPDFLoader
6
  from langchain.embeddings import HuggingFaceEmbeddings
7
  from PyPDF2 import PdfReader
8
+ from langchain.docstore.document import Document
9
 
10
  embedding_modelPath = "sentence-transformers/all-MiniLM-l6-v2"
11
  embeddings = HuggingFaceEmbeddings(model_name=embedding_modelPath,model_kwargs = {'device':'cpu'},encode_kwargs = {'normalize_embeddings': False})
 
33
 
34
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
35
  text_chunks = text_splitter.split_text(text)
36
+ text_chunks =
37
  return text_chunks
38
 
39
  def read_pdf(pdf_path):
 
52
  #docs = read_pdf(file_path)
53
  #cleaned_docs = Chunks(docs)
54
  cleaned_docs = read_pdf_text(file_path)
55
+ vectordb = Chroma.from_texts(
56
  documents=cleaned_docs,
57
  embedding=embeddings,
58
  persist_directory="Chroma/docs"