bupa1018 commited on
Commit
125fa0c
·
1 Parent(s): 25830df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -10
app.py CHANGED
@@ -10,6 +10,8 @@ from process_repo import extract_repo_files
10
  from chunking import chunk_pythoncode_and_add_metadata, chunk_text_and_add_metadata
11
  from vectorstore import setup_vectorstore
12
  from llm import get_groq_llm
 
 
13
  from kadi_apy_bot import KadiAPYBot
14
  from repo_versions import store_message_from_json
15
 
@@ -51,21 +53,21 @@ def initialize():
51
 
52
 
53
 
54
- download_gitlab_repo_to_hfspace(GITLAB_API_URL, GITLAB_PROJECT_ID, GITLAB_PROJECT_VERSION, DATA_DIR, hf_api, HF_SPACE_NAME)
55
 
56
- code_texts, code_references = extract_repo_files(DATA_DIR, ['kadi_apy'], [])
57
- doc_texts, doc_references = extract_repo_files(DATA_DIR, ['docs'], [])
58
 
59
- print("Length of code_texts: ", len(code_texts))
60
- print("Length of doc_files: ", len(doc_texts))
61
 
62
- code_chunks = chunk_pythoncode_and_add_metadata(code_texts, code_references)
63
- doc_chunks = chunk_text_and_add_metadata(doc_texts, doc_references, CHUNK_SIZE, CHUNK_OVERLAP)
64
 
65
- print(f"Total number of code_chunks: {len(code_chunks)}")
66
- print(f"Total number of doc_chunks: {len(doc_chunks)}")
67
 
68
- vectorstore = setup_vectorstore(doc_chunks + code_chunks, EMBEDDING_MODEL_NAME, VECTORSTORE_DIRECTORY)
69
  llm = get_groq_llm(LLM_MODEL_NAME, LLM_MODEL_TEMPERATURE, GROQ_API_KEY)
70
 
71
  kadiAPY_bot = KadiAPYBot(llm, vectorstore)
 
10
  from chunking import chunk_pythoncode_and_add_metadata, chunk_text_and_add_metadata
11
  from vectorstore import setup_vectorstore
12
  from llm import get_groq_llm
13
+ from vectorstore import get_chroma_vectorstore
14
+ from embeddings import get_SFR_Code_embedding_model
15
  from kadi_apy_bot import KadiAPYBot
16
  from repo_versions import store_message_from_json
17
 
 
53
 
54
 
55
 
56
+ # download_gitlab_repo_to_hfspace(GITLAB_API_URL, GITLAB_PROJECT_ID, GITLAB_PROJECT_VERSION, DATA_DIR, hf_api, HF_SPACE_NAME)
57
 
58
+ # code_texts, code_references = extract_repo_files(DATA_DIR, ['kadi_apy'], [])
59
+ # doc_texts, doc_references = extract_repo_files(DATA_DIR, ['docs'], [])
60
 
61
+ # print("Length of code_texts: ", len(code_texts))
62
+ # print("Length of doc_files: ", len(doc_texts))
63
 
64
+ # code_chunks = chunk_pythoncode_and_add_metadata(code_texts, code_references)
65
+ # doc_chunks = chunk_text_and_add_metadata(doc_texts, doc_references, CHUNK_SIZE, CHUNK_OVERLAP)
66
 
67
+ # print(f"Total number of code_chunks: {len(code_chunks)}")
68
+ # print(f"Total number of doc_chunks: {len(doc_chunks)}")
69
 
70
+ vectorstore = get_chroma_vectorstore(get_SFR_Code_embedding_model(), "data/vectorstore")
71
  llm = get_groq_llm(LLM_MODEL_NAME, LLM_MODEL_TEMPERATURE, GROQ_API_KEY)
72
 
73
  kadiAPY_bot = KadiAPYBot(llm, vectorstore)