bupa1018 commited on
Commit
906d13d
·
1 Parent(s): c6e3c5d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -10
app.py CHANGED
@@ -99,11 +99,11 @@ def download_gitlab_repo():
99
  print("Upload complete")
100
 
101
 
102
- def get_all_files_in_folder(temp_dir, partial_path):
103
 
104
  all_files = []
105
  print("inner method of get all files in folder")
106
- target_dir = os.path.join(temp_dir, partial_path)
107
  print(target_dir)
108
 
109
  for root, dirs, files in os.walk(target_dir):
@@ -119,9 +119,8 @@ def get_file(temp_dir, file_path):
119
  return full_path
120
 
121
 
122
-
123
- def process_directory(directory):
124
- code_partial_paths = ['kadi_apy/lib/resources/']
125
 
126
 
127
  zip_filename = next((file for file in os.listdir(directory) if file.endswith('.zip')), None) # zip_filename: kadi-apy-master-2a244f1af1483b48f8f9c0d99ce2744a0950c834.zip
@@ -142,6 +141,37 @@ def process_directory(directory):
142
 
143
  tmpsubdirpath= os.path.join(tmpdirname, unzipped_root[0]) # /tmp/tmpux1v52wy/kadi-apy-master-2a244f1af1483b48f8f9c0d99ce2744a0950c834
144
  print("tempsubdirpath: ", tmpsubdirpath)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  # with tempfile.TemporaryDirectory() as tmpdirname:
147
  # Unzip the file into the temporary directory
@@ -150,7 +180,8 @@ def process_directory(directory):
150
 
151
 
152
  # unzipped_root = os.listdir(tmpdirname)
153
-
 
154
  def process_directory5(directory, partial_paths=None, file_paths=None):
155
  all_texts = []
156
  file_references = []
@@ -211,7 +242,7 @@ def process_directory5(directory, partial_paths=None, file_paths=None):
211
  text = f"SVG file content from {file_path}"
212
  elif file_ext in ['.png', '.ico']:
213
  text = f"Image metadata from {file_path}"
214
- else:
215
  continue
216
 
217
  all_texts.append(text)
@@ -413,15 +444,16 @@ def rag_workflow(query):
413
  def initialize():
414
  global docstore, codestore, chunks, llm
415
  #download_gitlab_repo()
416
- code_partial_paths = ['kadi_apy/lib/resources/']
417
  code_file_path = []
418
  #doc_partial_paths = []
419
  #doc_partial_paths = ['docs/source/setup/']
420
  #doc_file_paths = ['docs/source/usage/lib.rst']
421
 
422
 
423
-
424
- process_directory(DATA_DIR)
 
425
 
426
  #code_files, code_file_references = process_directory5(DATA_DIR, code_partial_paths, code_file_path)
427
 
 
99
  print("Upload complete")
100
 
101
 
102
+ def get_all_files_in_folder(temp_dir, folder_path):
103
 
104
  all_files = []
105
  print("inner method of get all files in folder")
106
+ target_dir = os.path.join(temp_dir, folder_path)
107
  print(target_dir)
108
 
109
  for root, dirs, files in os.walk(target_dir):
 
119
  return full_path
120
 
121
 
122
+ #getFilesFromRepo
123
+ def process_directory(directory, folder_paths, file_paths):
 
124
 
125
 
126
  zip_filename = next((file for file in os.listdir(directory) if file.endswith('.zip')), None) # zip_filename: kadi-apy-master-2a244f1af1483b48f8f9c0d99ce2744a0950c834.zip
 
141
 
142
  tmpsubdirpath= os.path.join(tmpdirname, unzipped_root[0]) # /tmp/tmpux1v52wy/kadi-apy-master-2a244f1af1483b48f8f9c0d99ce2744a0950c834
143
  print("tempsubdirpath: ", tmpsubdirpath)
144
+
145
+ if folder_paths:
146
+ for folder_path in folder_paths:
147
+ files += get_all_files_in_folder(tmpsubdirpath, folder_paths)
148
+ if file_paths:
149
+ files += [get_file(tmpsubdirpath, file_path) for file_path in file_paths]
150
+
151
+
152
+ print(f"Total number of files: {len(files)}")
153
+ for file_path in files:
154
+ #print(f"Paths of files: {iles}")
155
+ file_ext = os.path.splitext(file_path)[1]
156
+
157
+ if os.path.getsize(file_path) == 0:
158
+ print(f"Skipping an empty file: {file_path}")
159
+ continue
160
+
161
+ with open(file_path, 'rb') as f:
162
+ if file_ext in ['.rst', '.md', '.txt', '.html', '.json', '.yaml', '.py']:
163
+ text = f.read().decode('utf-8')
164
+ elif file_ext in ['.svg']:
165
+ text = f"SVG file content from {file_path}"
166
+ elif file_ext in ['.png', '.ico']:
167
+ text = f"Image metadata from {file_path}"
168
+ else
169
+ continue
170
+
171
+ all_texts.append(text)
172
+ file_references.append(file_path)
173
+
174
+ return all_texts, file_references
175
 
176
  # with tempfile.TemporaryDirectory() as tmpdirname:
177
  # Unzip the file into the temporary directory
 
180
 
181
 
182
  # unzipped_root = os.listdir(tmpdirname)
183
+
184
+
185
  def process_directory5(directory, partial_paths=None, file_paths=None):
186
  all_texts = []
187
  file_references = []
 
242
  text = f"SVG file content from {file_path}"
243
  elif file_ext in ['.png', '.ico']:
244
  text = f"Image metadata from {file_path}"
245
+ else
246
  continue
247
 
248
  all_texts.append(text)
 
444
  def initialize():
445
  global docstore, codestore, chunks, llm
446
  #download_gitlab_repo()
447
+ #code_partial_paths = ['kadi_apy/lib/resources/']
448
  code_file_path = []
449
  #doc_partial_paths = []
450
  #doc_partial_paths = ['docs/source/setup/']
451
  #doc_file_paths = ['docs/source/usage/lib.rst']
452
 
453
 
454
+ code_partial_paths = ['kadi_apy/lib/resources/']
455
+
456
+ process_directory(DATA_DIR,code_partial_paths, code_file_path)
457
 
458
  #code_files, code_file_references = process_directory5(DATA_DIR, code_partial_paths, code_file_path)
459