Spaces:
Sleeping
Sleeping
Update functions.py
Browse files- functions.py +5 -5
functions.py
CHANGED
@@ -204,15 +204,15 @@ def create_retriever_from_chroma(vectorstore_path="./docs/chroma/", search_type=
|
|
204 |
docs = extract_sentences_from_web(links=urls)
|
205 |
|
206 |
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
is_separator_regex = True
|
211 |
)
|
212 |
-
|
213 |
|
214 |
|
215 |
-
|
216 |
documents=split_docs, embedding=embeddings, persist_directory=vectorstore_path
|
217 |
)
|
218 |
|
|
|
204 |
docs = extract_sentences_from_web(links=urls)
|
205 |
|
206 |
|
207 |
+
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
|
208 |
+
chunk_size=chunk_size, chunk_overlap=chunk_overlap,
|
209 |
+
separators=["\n\n \n\n","\n\n\n", "\n\n", r"In \[[0-9]+\]", r"\n+", r"\s+"],
|
210 |
is_separator_regex = True
|
211 |
)
|
212 |
+
split_docs = text_splitter.split_documents(docs)
|
213 |
|
214 |
|
215 |
+
vectorstore = Chroma.from_documents(
|
216 |
documents=split_docs, embedding=embeddings, persist_directory=vectorstore_path
|
217 |
)
|
218 |
|