bupa1018 commited on
Commit
df02851
·
1 Parent(s): abdd442

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -2
app.py CHANGED
@@ -282,9 +282,10 @@ def split_into_chunks(texts, references, chunk_size, chunk_overlap):
282
  return chunks
283
 
284
  # Setup Vectorstore
285
- def setup_vectorstore(chunks, model_name, persist_directory):
286
  print("Start setup_vectorstore_function")
287
  embedding_model = HuggingFaceEmbeddings(model_name=model_name)
 
288
  vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
289
  vectorstore.persist()
290
  print("test1", vectorstore._persist_directory)
@@ -292,6 +293,41 @@ def setup_vectorstore(chunks, model_name, persist_directory):
292
  return vectorstore
293
 
294
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  # Setup LLM
296
  def setup_llm(model_name, temperature, api_key):
297
  llm = ChatGroq(model=model_name, temperature=temperature, api_key=api_key)
@@ -382,7 +418,7 @@ def initialize():
382
  #print(f"Total number of code_chunks: {len(code_chunks)}")
383
  print(f"Total number of doc_chunks: {len(doc_chunks)}")
384
 
385
- docstore = setup_vectorstore(doc_chunks, EMBEDDING_MODEL_NAME, "./data" )
386
  #codestore = setup_vectorstore(code_chunks, EMBEDDING_MODEL_NAME, PERSIST_CODE_DIRECTORY)
387
 
388
  #llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)
 
282
  return chunks
283
 
284
  # Setup Vectorstore
285
+ #def setup_vectorstore(chunks, model_name):
286
  print("Start setup_vectorstore_function")
287
  embedding_model = HuggingFaceEmbeddings(model_name=model_name)
288
+ persist_directory =
289
  vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
290
  vectorstore.persist()
291
  print("test1", vectorstore._persist_directory)
 
293
  return vectorstore
294
 
295
 
296
+ def setup_vectorstore(chunks, model_name):
297
+ print("Start setup_vectorstore_function")
298
+
299
+ # Create a temporary directory to use as the persist_directory
300
+ with tempfile.TemporaryDirectory() as temp_dir:
301
+ print(f"Using temporary directory: {temp_dir}")
302
+
303
+ # Initialize the embedding model
304
+ embedding_model = HuggingFaceEmbeddings(model_name=model_name)
305
+
306
+ # Set up the vectorstore with the temporary directory
307
+ vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=temp_dir)
308
+ vectorstore.persist()
309
+
310
+ # Optionally, display the persist directory for debugging
311
+ print("Persist directory:", vectorstore._persist_directory)
312
+ print("Available methods in vectorstore:", dir(vectorstore))
313
+
314
+ # At this point, you can use your API upload method to upload the persisted vectorstore files
315
+ for root, _, files in os.walk(temp_dir):
316
+ for file_name in files:
317
+ file_path = os.path.join(root, file_name)
318
+ target_path_in_repo = os.path.relpath(file_path, temp_dir)
319
+ print(f"Uploading file: {file_path} -> {target_path_in_repo}")
320
+ api.upload_file(
321
+ path_or_fileobj=file_path,
322
+ path_in_repo=target_path_in_repo,
323
+ repo_id=HF_SPACE_NAME,
324
+ repo_type="space"
325
+ )
326
+ print(f"Uploaded {file_path} to {target_path_in_repo}")
327
+
328
+ print("All files uploaded successfully!")
329
+
330
+
331
  # Setup LLM
332
  def setup_llm(model_name, temperature, api_key):
333
  llm = ChatGroq(model=model_name, temperature=temperature, api_key=api_key)
 
418
  #print(f"Total number of code_chunks: {len(code_chunks)}")
419
  print(f"Total number of doc_chunks: {len(doc_chunks)}")
420
 
421
+ docstore = setup_vectorstore(doc_chunks, EMBEDDING_MODEL_NAME)
422
  #codestore = setup_vectorstore(code_chunks, EMBEDDING_MODEL_NAME, PERSIST_CODE_DIRECTORY)
423
 
424
  #llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)