Chris4K commited on
Commit
49b8a0a
·
1 Parent(s): ca3c913

Update vector_store_retriever.py

Browse files
Files changed (1) hide show
  1. vector_store_retriever.py +0 -20
vector_store_retriever.py CHANGED
@@ -96,26 +96,6 @@ load_model("meta-llama/Llama-2-70b-chat-hf")
96
  #####
97
  #########
98
 
99
- from langchain.document_loaders import PyPDFDirectoryLoader
100
- from langchain.document_loaders.utils import RecursiveCharacterTextSplitter
101
- from langchain.vectorstores import Chroma
102
-
103
- def load_and_process_pdfs(directory_path: str, chunk_size: int = 500, chunk_overlap: int = 200, collection_name: str = "my-collection"):
104
- # Load PDF files from the specified directory
105
- loader = PyPDFDirectoryLoader(directory_path)
106
- documents = loader.load()
107
-
108
- # Split the text into chunks
109
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
110
- texts = text_splitter.split_documents(documents)
111
-
112
- # Create a Chroma vector store from the processed texts
113
- db = Chroma.from_documents(texts, hf, collection_name=collection_name)
114
-
115
- return db # You can return the Chroma vector store if needed
116
-
117
- # Call the function with the desired directory path and parameters
118
- load_and_process_pdfs("new_papers/")
119
 
120
  ###
121
  ###
 
96
  #####
97
  #########
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
  ###
101
  ###