Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -282,9 +282,10 @@ def split_into_chunks(texts, references, chunk_size, chunk_overlap):
|
|
282 |
return chunks
|
283 |
|
284 |
# Setup Vectorstore
|
285 |
-
def setup_vectorstore(chunks, model_name
|
286 |
print("Start setup_vectorstore_function")
|
287 |
embedding_model = HuggingFaceEmbeddings(model_name=model_name)
|
|
|
288 |
vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
|
289 |
vectorstore.persist()
|
290 |
print("test1", vectorstore._persist_directory)
|
@@ -292,6 +293,41 @@ def setup_vectorstore(chunks, model_name, persist_directory):
|
|
292 |
return vectorstore
|
293 |
|
294 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
295 |
# Setup LLM
|
296 |
def setup_llm(model_name, temperature, api_key):
|
297 |
llm = ChatGroq(model=model_name, temperature=temperature, api_key=api_key)
|
@@ -382,7 +418,7 @@ def initialize():
|
|
382 |
#print(f"Total number of code_chunks: {len(code_chunks)}")
|
383 |
print(f"Total number of doc_chunks: {len(doc_chunks)}")
|
384 |
|
385 |
-
docstore = setup_vectorstore(doc_chunks, EMBEDDING_MODEL_NAME
|
386 |
#codestore = setup_vectorstore(code_chunks, EMBEDDING_MODEL_NAME, PERSIST_CODE_DIRECTORY)
|
387 |
|
388 |
#llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)
|
|
|
282 |
return chunks
|
283 |
|
284 |
# Setup Vectorstore
|
285 |
+
#def setup_vectorstore(chunks, model_name):
|
286 |
print("Start setup_vectorstore_function")
|
287 |
embedding_model = HuggingFaceEmbeddings(model_name=model_name)
|
288 |
+
persist_directory =
|
289 |
vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
|
290 |
vectorstore.persist()
|
291 |
print("test1", vectorstore._persist_directory)
|
|
|
293 |
return vectorstore
|
294 |
|
295 |
|
296 |
+
def setup_vectorstore(chunks, model_name):
|
297 |
+
print("Start setup_vectorstore_function")
|
298 |
+
|
299 |
+
# Create a temporary directory to use as the persist_directory
|
300 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
301 |
+
print(f"Using temporary directory: {temp_dir}")
|
302 |
+
|
303 |
+
# Initialize the embedding model
|
304 |
+
embedding_model = HuggingFaceEmbeddings(model_name=model_name)
|
305 |
+
|
306 |
+
# Set up the vectorstore with the temporary directory
|
307 |
+
vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=temp_dir)
|
308 |
+
vectorstore.persist()
|
309 |
+
|
310 |
+
# Optionally, display the persist directory for debugging
|
311 |
+
print("Persist directory:", vectorstore._persist_directory)
|
312 |
+
print("Available methods in vectorstore:", dir(vectorstore))
|
313 |
+
|
314 |
+
# At this point, you can use your API upload method to upload the persisted vectorstore files
|
315 |
+
for root, _, files in os.walk(temp_dir):
|
316 |
+
for file_name in files:
|
317 |
+
file_path = os.path.join(root, file_name)
|
318 |
+
target_path_in_repo = os.path.relpath(file_path, temp_dir)
|
319 |
+
print(f"Uploading file: {file_path} -> {target_path_in_repo}")
|
320 |
+
api.upload_file(
|
321 |
+
path_or_fileobj=file_path,
|
322 |
+
path_in_repo=target_path_in_repo,
|
323 |
+
repo_id=HF_SPACE_NAME,
|
324 |
+
repo_type="space"
|
325 |
+
)
|
326 |
+
print(f"Uploaded {file_path} to {target_path_in_repo}")
|
327 |
+
|
328 |
+
print("All files uploaded successfully!")
|
329 |
+
|
330 |
+
|
331 |
# Setup LLM
|
332 |
def setup_llm(model_name, temperature, api_key):
|
333 |
llm = ChatGroq(model=model_name, temperature=temperature, api_key=api_key)
|
|
|
418 |
#print(f"Total number of code_chunks: {len(code_chunks)}")
|
419 |
print(f"Total number of doc_chunks: {len(doc_chunks)}")
|
420 |
|
421 |
+
docstore = setup_vectorstore(doc_chunks, EMBEDDING_MODEL_NAME)
|
422 |
#codestore = setup_vectorstore(code_chunks, EMBEDDING_MODEL_NAME, PERSIST_CODE_DIRECTORY)
|
423 |
|
424 |
#llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)
|