KadiAPY_Coding_Assistant / process_repo.py
bupa1018's picture
Update process_repo.py
776b230
raw
history blame
2.64 kB
import os
import tempfile
import zipfile
def extract_repo_files(directory, folder_paths, file_paths):
all_texts = []
file_references = []
zip_filename = next((file for file in os.listdir(directory) if file.endswith('.zip')), None)
zip_file_path = os.path.join(directory, zip_filename)
print(zip_file_path)
with tempfile.TemporaryDirectory() as tmpdirname:
# Unzip the file into the temporary directory
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
zip_ref.extractall(tmpdirname)
files = []
print("tmpdirname: " , tmpdirname)
unzipped_root = os.listdir(tmpdirname)
print("unzipped_root ", unzipped_root)
tmpsubdirpath= os.path.join(tmpdirname, unzipped_root[0])
print("tempsubdirpath: ", tmpsubdirpath)
if folder_paths:
for folder_path in folder_paths:
files += _get_all_files_in_folder(tmpsubdirpath, folder_path)
if file_paths:
files += [_get_file(tmpsubdirpath, file_path) for file_path in file_paths]
print(f"Total number of files: {len(files)}")
for file_path in files:
# print("111111111:", file_path)
file_ext = os.path.splitext(file_path)[1]
# print("222222222:", file_ext)
if os.path.getsize(file_path) == 0:
print(f"Skipping an empty file: {file_path}")
continue
with open(file_path, 'rb') as f:
if file_ext in ['.rst', '.py']:
text = f.read().decode('utf-8')
all_texts.append(text)
print("Filepaths brother:", file_path)
relative_path = os.path.relpath(file_path, tmpsubdirpath)
print("Relative Filepaths brother:", relative_path)
file_references.append(relative_path)
return all_texts, file_references
def _get_all_files_in_folder(temp_dir, folder_path):
all_files = []
target_dir = os.path.join(temp_dir, folder_path)
for root, dirs, files in os.walk(target_dir):
print(f"Files in current directory ({root}): {files}")
for file in files:
print(f"Processing file: {file}")
all_files.append(os.path.join(root, file))
return all_files
def _get_file(temp_dir, file_path):
full_path = os.path.join(temp_dir, file_path)
return full_path