bupa1018 commited on
Commit
9e24330
·
1 Parent(s): d0a3932

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -17
app.py CHANGED
@@ -403,34 +403,29 @@ def initialize():
403
  global vector_store, chunks, llm
404
 
405
  download_gitlab_project_by_version()
406
- #download_gitlab_repo()
407
- code_partial_paths = ['kadi_apy/lib/']
408
- code_file_paths = []
409
- doc_partial_paths = []
410
- doc_partial_paths = ['docs/source/setup/']
411
- doc_file_paths = ['docs/source/usage/lib.rst']
412
-
413
-
414
 
415
 
416
- kadiAPY_code_texts, kadiAPY_code_references = process_directory(DATA_DIR, code_partial_paths, code_file_paths)
417
- print("LEEEEEEEEEEEENGTH of code_texts: ", len(kadiAPY_code_texts))
418
 
419
 
420
- kadiAPY_doc_texts, kadiAPY_doc_references = process_directory(DATA_DIR, doc_partial_paths, doc_file_paths)
421
- print("LEEEEEEEEEEEENGTH of doc_files: ", len(kadiAPY_doc_texts))
422
 
423
- kadiAPY_code_chunks = split_python_code_into_chunks(kadiAPY_code_texts, kadiAPY_code_references)
424
- kadiAPY_doc_chunks = split_into_chunks(kadiAPY_doc_texts, kadiAPY_doc_references, CHUNK_SIZE, CHUNK_OVERLAP)
425
 
426
- print(f"Total number of code_chunks: {len(kadiAPY_code_chunks)}")
427
- print(f"Total number of doc_chunks: {len(kadiAPY_doc_chunks)}")
428
 
429
  #docstore = embed_documents_into_vectorstore(kadiAPY_code_chunks, EMBEDDING_MODEL_NAME, PERSIST_DOC_DIRECTORY)
430
  #codestore = embed_documents_into_vectorstore(kadiAPY_doc_chunks, EMBEDDING_MODEL_NAME, PERSIST_CODE_DIRECTORY)
431
 
432
  vector_store = embed_documents_into_vectorstore(
433
- chunks= kadiAPY_doc_chunks + kadiAPY_code_chunks,
434
  model_name= EMBEDDING_MODEL_NAME,
435
  persist_directory= PERSIST_DOC_DIRECTORY
436
  )
 
403
  global vector_store, chunks, llm
404
 
405
  download_gitlab_project_by_version()
406
+
407
+ code_file_paths = ['kadi_apy']
408
+ doc_file_path = ['docs/source/']
 
 
 
 
 
409
 
410
 
411
+ code_texts, code_references = process_directory(DATA_DIR, code_partial_paths, code_file_paths)
412
+ print("LEEEEEEEEEEEENGTH of code_texts: ", len(code_texts))
413
 
414
 
415
+ doc_texts, kadiAPY_doc_references = process_directory(DATA_DIR, doc_partial_paths, doc_file_paths)
416
+ print("LEEEEEEEEEEEENGTH of doc_files: ", len(doc_texts))
417
 
418
+ code_chunks = split_python_code_into_chunks(code_texts, code_references)
419
+ doc_chunks = split_into_chunks(doc_texts, kadiAPY_doc_references, CHUNK_SIZE, CHUNK_OVERLAP)
420
 
421
+ print(f"Total number of code_chunks: {len(code_chunks)}")
422
+ print(f"Total number of doc_chunks: {len(doc_chunks)}")
423
 
424
  #docstore = embed_documents_into_vectorstore(kadiAPY_code_chunks, EMBEDDING_MODEL_NAME, PERSIST_DOC_DIRECTORY)
425
  #codestore = embed_documents_into_vectorstore(kadiAPY_doc_chunks, EMBEDDING_MODEL_NAME, PERSIST_CODE_DIRECTORY)
426
 
427
  vector_store = embed_documents_into_vectorstore(
428
+ chunks= doc_chunks + code_chunks,
429
  model_name= EMBEDDING_MODEL_NAME,
430
  persist_directory= PERSIST_DOC_DIRECTORY
431
  )