bupa1018 commited on
Commit
76330b3
·
1 Parent(s): 015e446

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -9
app.py CHANGED
@@ -118,13 +118,29 @@ def get_file(temp_dir, file_path):
118
  full_path = os.path.join(temp_dir, file_path)
119
  return full_path
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
122
  def process_directory(directory, partial_paths=None, file_paths=None):
123
  all_texts = []
124
  file_references = []
125
 
126
- zip_files = [file for file in os.listdir(directory) if file.endswith('.zip')]
127
 
 
 
128
  if not zip_files:
129
  print("No zip file found in the directory.")
130
  return all_texts, file_references
@@ -379,24 +395,27 @@ def rag_workflow(query):
379
  def initialize():
380
  global docstore, codestore, chunks, llm
381
  #download_gitlab_repo()
382
- #code_partial_paths = ['kadi_apy/lib/']
383
- #code_file_path = []
384
- doc_partial_paths = []
385
  #doc_partial_paths = ['docs/source/setup/']
386
- doc_file_paths = ['docs/source/usage/lib.rst']
 
 
387
 
 
388
 
389
  #code_files, code_file_references = process_directory(DATA_DIR, code_partial_paths, code_file_path)
390
 
391
- doc_files, doc_file_references = process_directory(DATA_DIR, doc_partial_paths, doc_file_paths)
392
 
393
  #code_chunks = split_pythoncode_into_chunks(code_files, code_file_references, 1500, 0)
394
- doc_chunks = split_into_chunks(doc_files, doc_file_references, CHUNK_SIZE, CHUNK_OVERLAP)
395
 
396
  #print(f"Total number of code_chunks: {len(code_chunks)}")
397
- print(f"Total number of doc_chunks: {len(doc_chunks)}")
398
 
399
- docstore = setup_vectorstore(doc_chunks, EMBEDDING_MODEL_NAME, PERSIST_DOC_DIRECTORY)
400
  #codestore = setup_vectorstore(code_chunks, EMBEDDING_MODEL_NAME, PERSIST_CODE_DIRECTORY)
401
 
402
  #llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)
 
118
  full_path = os.path.join(temp_dir, file_path)
119
  return full_path
120
 
121
+
122
+
123
+ def process_directory(directory):
124
+ code_partial_paths = ['kadi_apy/lib/resources/']
125
+
126
+ zip_file_path = os.listdir(directory) if file.endswith('.zip')
127
+ print(zip_file_path)
128
+ # with tempfile.TemporaryDirectory() as tmpdirname:
129
+ # Unzip the file into the temporary directory
130
+ # with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
131
+ # zip_ref.extractall(tmpdirname)
132
+
133
+
134
+ # unzipped_root = os.listdir(tmpdirname)
135
 
136
  def process_directory(directory, partial_paths=None, file_paths=None):
137
  all_texts = []
138
  file_references = []
139
 
140
+ zip_file = next((file for file in os.listdir(directory) if file.endswith('.zip')), None)
141
 
142
+
143
+
144
  if not zip_files:
145
  print("No zip file found in the directory.")
146
  return all_texts, file_references
 
395
  def initialize():
396
  global docstore, codestore, chunks, llm
397
  #download_gitlab_repo()
398
+ code_partial_paths = ['kadi_apy/lib/resources/']
399
+ code_file_path = []
400
+ #doc_partial_paths = []
401
  #doc_partial_paths = ['docs/source/setup/']
402
+ #doc_file_paths = ['docs/source/usage/lib.rst']
403
+
404
+
405
 
406
+ code_files = process_directory(DATA_DIR)
407
 
408
  #code_files, code_file_references = process_directory(DATA_DIR, code_partial_paths, code_file_path)
409
 
410
+ #doc_files, doc_file_references = process_directory(DATA_DIR, doc_partial_paths, doc_file_paths)
411
 
412
  #code_chunks = split_pythoncode_into_chunks(code_files, code_file_references, 1500, 0)
413
+ #doc_chunks = split_into_chunks(doc_files, doc_file_references, CHUNK_SIZE, CHUNK_OVERLAP)
414
 
415
  #print(f"Total number of code_chunks: {len(code_chunks)}")
416
+ #print(f"Total number of doc_chunks: {len(doc_chunks)}")
417
 
418
+ #docstore = setup_vectorstore(doc_chunks, EMBEDDING_MODEL_NAME, PERSIST_DOC_DIRECTORY)
419
  #codestore = setup_vectorstore(code_chunks, EMBEDDING_MODEL_NAME, PERSIST_CODE_DIRECTORY)
420
 
421
  #llm = setup_llm(LLM_MODEL_NAME, LLM_TEMPERATURE, GROQ_API_KEY)