Spaces:

Samarth991
/

RAG-PDF_With_LLAMA-3B

Sleeping

Samarth991 commited on Dec 25, 2024

Commit

a0b6dfc

1 Parent(s): 6a1b5d7

updated

Files changed (1) hide show

PDF_Reader.py CHANGED Viewed

@@ -5,6 +5,7 @@ from langchain_chroma import Chroma
 from langchain_community.document_loaders import PyPDFLoader
 from langchain.embeddings import HuggingFaceEmbeddings
 from PyPDF2 import PdfReader
 embedding_modelPath = "sentence-transformers/all-MiniLM-l6-v2"
 embeddings = HuggingFaceEmbeddings(model_name=embedding_modelPath,model_kwargs = {'device':'cpu'},encode_kwargs = {'normalize_embeddings': False})
@@ -32,6 +33,7 @@ def read_pdf_text(pdf_path):
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
     text_chunks = text_splitter.split_text(text)
     return text_chunks
 def read_pdf(pdf_path):
@@ -50,7 +52,7 @@ def PDF_4_QA(file_path):
     #docs = read_pdf(file_path)
     #cleaned_docs = Chunks(docs)
     cleaned_docs = read_pdf_text(file_path)
-    vectordb = Chroma.from_documents(
         documents=cleaned_docs,
         embedding=embeddings,
         persist_directory="Chroma/docs"

 from langchain_community.document_loaders import PyPDFLoader
 from langchain.embeddings import HuggingFaceEmbeddings
 from PyPDF2 import PdfReader
+from langchain.docstore.document import Document
 embedding_modelPath = "sentence-transformers/all-MiniLM-l6-v2"
 embeddings = HuggingFaceEmbeddings(model_name=embedding_modelPath,model_kwargs = {'device':'cpu'},encode_kwargs = {'normalize_embeddings': False})
     text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
     text_chunks = text_splitter.split_text(text)
+    text_chunks =
     return text_chunks
 def read_pdf(pdf_path):
     #docs = read_pdf(file_path)
     #cleaned_docs = Chunks(docs)
     cleaned_docs = read_pdf_text(file_path)
+    vectordb = Chroma.from_texts(
         documents=cleaned_docs,
         embedding=embeddings,
         persist_directory="Chroma/docs"