Spaces:

bupa1018
/

KadiAPY_Coding_Assistant

Sleeping

bupa1018 commited on Mar 22

Commit

125fa0c

1 Parent(s): 25830df

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,6 +10,8 @@ from process_repo import extract_repo_files
 from chunking import chunk_pythoncode_and_add_metadata, chunk_text_and_add_metadata
 from vectorstore import setup_vectorstore
 from llm import get_groq_llm
 from kadi_apy_bot import KadiAPYBot
 from repo_versions import store_message_from_json
@@ -51,21 +53,21 @@ def initialize():
-    download_gitlab_repo_to_hfspace(GITLAB_API_URL, GITLAB_PROJECT_ID, GITLAB_PROJECT_VERSION, DATA_DIR, hf_api, HF_SPACE_NAME)
-    code_texts, code_references = extract_repo_files(DATA_DIR, ['kadi_apy'], [])
-    doc_texts, doc_references = extract_repo_files(DATA_DIR, ['docs'], [])
-    print("Length of code_texts: ", len(code_texts))
-    print("Length of doc_files: ", len(doc_texts))
-    code_chunks = chunk_pythoncode_and_add_metadata(code_texts, code_references)
-    doc_chunks = chunk_text_and_add_metadata(doc_texts, doc_references, CHUNK_SIZE, CHUNK_OVERLAP)
-    print(f"Total number of code_chunks: {len(code_chunks)}")
-    print(f"Total number of doc_chunks: {len(doc_chunks)}")
-    vectorstore = setup_vectorstore(doc_chunks + code_chunks, EMBEDDING_MODEL_NAME, VECTORSTORE_DIRECTORY)
     llm = get_groq_llm(LLM_MODEL_NAME, LLM_MODEL_TEMPERATURE, GROQ_API_KEY)
     kadiAPY_bot = KadiAPYBot(llm, vectorstore)

 from chunking import chunk_pythoncode_and_add_metadata, chunk_text_and_add_metadata
 from vectorstore import setup_vectorstore
 from llm import get_groq_llm
+from vectorstore import get_chroma_vectorstore
+from embeddings import get_SFR_Code_embedding_model
 from kadi_apy_bot import KadiAPYBot
 from repo_versions import store_message_from_json
+    # download_gitlab_repo_to_hfspace(GITLAB_API_URL, GITLAB_PROJECT_ID, GITLAB_PROJECT_VERSION, DATA_DIR, hf_api, HF_SPACE_NAME)
+    # code_texts, code_references = extract_repo_files(DATA_DIR, ['kadi_apy'], [])
+    # doc_texts, doc_references = extract_repo_files(DATA_DIR, ['docs'], [])
+    # print("Length of code_texts: ", len(code_texts))
+    # print("Length of doc_files: ", len(doc_texts))
+    # code_chunks = chunk_pythoncode_and_add_metadata(code_texts, code_references)
+    # doc_chunks = chunk_text_and_add_metadata(doc_texts, doc_references, CHUNK_SIZE, CHUNK_OVERLAP)
+    # print(f"Total number of code_chunks: {len(code_chunks)}")
+    # print(f"Total number of doc_chunks: {len(doc_chunks)}")
+    vectorstore = get_chroma_vectorstore(get_SFR_Code_embedding_model(), "data/vectorstore")
     llm = get_groq_llm(LLM_MODEL_NAME, LLM_MODEL_TEMPERATURE, GROQ_API_KEY)
     kadiAPY_bot = KadiAPYBot(llm, vectorstore)