Spaces:
Sleeping
Sleeping
import os | |
import tempfile | |
import zipfile | |
def extract_repo_files(directory, folder_paths, file_paths): | |
all_texts = [] | |
file_references = [] | |
zip_filename = next((file for file in os.listdir(directory) if file.endswith('.zip')), None) | |
zip_file_path = os.path.join(directory, zip_filename) | |
print(zip_file_path) | |
with tempfile.TemporaryDirectory() as tmpdirname: | |
# Unzip the file into the temporary directory | |
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref: | |
zip_ref.extractall(tmpdirname) | |
files = [] | |
print("tmpdirname: " , tmpdirname) | |
unzipped_root = os.listdir(tmpdirname) | |
print("unzipped_root ", unzipped_root) | |
tmpsubdirpath= os.path.join(tmpdirname, unzipped_root[0]) | |
print("tempsubdirpath: ", tmpsubdirpath) | |
if folder_paths: | |
for folder_path in folder_paths: | |
files += _get_all_files_in_folder(tmpsubdirpath, folder_path) | |
if file_paths: | |
files += [_get_file(tmpsubdirpath, file_path) for file_path in file_paths] | |
print(f"Total number of files: {len(files)}") | |
for file_path in files: | |
# print("111111111:", file_path) | |
file_ext = os.path.splitext(file_path)[1] | |
# print("222222222:", file_ext) | |
if os.path.getsize(file_path) == 0: | |
print(f"Skipping an empty file: {file_path}") | |
continue | |
with open(file_path, 'rb') as f: | |
if file_ext in ['.rst', '.py']: | |
text = f.read().decode('utf-8') | |
all_texts.append(text) | |
print("Filepaths brother:", file_path) | |
relative_path = os.path.relpath(file_path, tmpsubdirpath) | |
print("Relative Filepaths brother:", relative_path) | |
file_references.append(relative_path) | |
return all_texts, file_references | |
def _get_all_files_in_folder(temp_dir, folder_path): | |
all_files = [] | |
target_dir = os.path.join(temp_dir, folder_path) | |
for root, dirs, files in os.walk(target_dir): | |
print(f"Files in current directory ({root}): {files}") | |
for file in files: | |
print(f"Processing file: {file}") | |
all_files.append(os.path.join(root, file)) | |
return all_files | |
def _get_file(temp_dir, file_path): | |
full_path = os.path.join(temp_dir, file_path) | |
return full_path | |