Spaces:

bupa1018
/

KadiAPY_Coding_Assistant

Sleeping

App Files Files Community

bupa1018 commited on Mar 7

Commit

df02851

1 Parent(s): abdd442

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -2

app.py CHANGED Viewed

@@ -282,9 +282,10 @@ def split_into_chunks(texts, references, chunk_size, chunk_overlap):
     return chunks
 # Setup Vectorstore
-def setup_vectorstore(chunks, model_name, persist_directory):
     print("Start setup_vectorstore_function")
     embedding_model = HuggingFaceEmbeddings(model_name=model_name)
     vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
     vectorstore.persist()
     print("test1", vectorstore._persist_directory)
@@ -292,6 +293,41 @@ def setup_vectorstore(chunks, model_name, persist_directory):
     return vectorstore
 # Setup LLM
 def setup_llm(model_name, temperature, api_key):
     llm = ChatGroq(model=model_name, temperature=temperature, api_key=api_key)
@@ -382,7 +418,7 @@ def initialize():
     #print(f"Total number of code_chunks: {len(code_chunks)}")
     print(f"Total number of doc_chunks: {len(doc_chunks)}")
-    docstore = setup_vectorstore(doc_chunks, EMBEDDING_MODEL_NAME, "./data" )
     #codestore = setup_vectorstore(code_chunks, EMBEDDING_MODEL_NAME, PERSIST_CODE_DIRECTORY)
     #llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)

     return chunks
 # Setup Vectorstore
+#def setup_vectorstore(chunks, model_name):
     print("Start setup_vectorstore_function")
     embedding_model = HuggingFaceEmbeddings(model_name=model_name)
+    persist_directory =
     vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=persist_directory)
     vectorstore.persist()
     print("test1", vectorstore._persist_directory)
     return vectorstore
+def setup_vectorstore(chunks, model_name):
+    print("Start setup_vectorstore_function")
+    # Create a temporary directory to use as the persist_directory
+    with tempfile.TemporaryDirectory() as temp_dir:
+        print(f"Using temporary directory: {temp_dir}")
+        # Initialize the embedding model
+        embedding_model = HuggingFaceEmbeddings(model_name=model_name)
+        # Set up the vectorstore with the temporary directory
+        vectorstore = Chroma.from_documents(chunks, embedding=embedding_model, persist_directory=temp_dir)
+        vectorstore.persist()
+        # Optionally, display the persist directory for debugging
+        print("Persist directory:", vectorstore._persist_directory)
+        print("Available methods in vectorstore:", dir(vectorstore))
+        # At this point, you can use your API upload method to upload the persisted vectorstore files
+        for root, _, files in os.walk(temp_dir):
+            for file_name in files:
+                file_path = os.path.join(root, file_name)
+                target_path_in_repo = os.path.relpath(file_path, temp_dir)
+                print(f"Uploading file: {file_path} -> {target_path_in_repo}")
+                api.upload_file(
+                    path_or_fileobj=file_path,
+                    path_in_repo=target_path_in_repo,
+                    repo_id=HF_SPACE_NAME,
+                    repo_type="space"
+                )
+                print(f"Uploaded {file_path} to {target_path_in_repo}")
+        print("All files uploaded successfully!")
 # Setup LLM
 def setup_llm(model_name, temperature, api_key):
     llm = ChatGroq(model=model_name, temperature=temperature, api_key=api_key)
     #print(f"Total number of code_chunks: {len(code_chunks)}")
     print(f"Total number of doc_chunks: {len(doc_chunks)}")
+    docstore = setup_vectorstore(doc_chunks, EMBEDDING_MODEL_NAME)
     #codestore = setup_vectorstore(code_chunks, EMBEDDING_MODEL_NAME, PERSIST_CODE_DIRECTORY)
     #llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)