Spaces:
Running
Running
def get_hf_files(repo, name, path=""): | |
api = HfApi() | |
file_list = api.list_repo_files(repo_id=f'{repo}/{name}', repo_type="space") | |
print(f"Files in {repo}/{name}: {file_list}") | |
processed_files = [] | |
# Create base directory if it doesn't exist | |
if not os.path.exists(name): | |
os.makedirs(name) | |
for file_path in file_list: | |
# Handle nested directories | |
if "/" in file_path: | |
# Split into directory and remainder | |
dir_part, file_part = file_path.split("/", 1) | |
# Ensure directory exists | |
dir_path = os.path.join(name, dir_part) | |
if not os.path.exists(dir_path): | |
os.makedirs(dir_path) | |
# Recursively handle subdirectories if needed | |
if "/" in file_part: | |
processed_files.extend(get_hf_files(repo, name, dir_part)) | |
continue | |
# Safely split filename into prefix and extension | |
filename = os.path.basename(file_path) | |
if "." in filename: | |
pf, sf = filename.rsplit(".", 1) # Safely split on last period | |
f_name = f"{pf}.{sf}" | |
else: | |
pf = filename | |
sf = "" | |
f_name = pf | |
# Construct full local path | |
local_path = os.path.join(name, file_path) | |
# Ensure subdirectory exists for nested files | |
os.makedirs(os.path.dirname(local_path), exist_ok=True) | |
# Download file content | |
r = requests.get(f'https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}') | |
print(f"Downloading: {file_path}") | |
with open(local_path, 'wb') as file: | |
file.write(r.content) | |
processed_files.append({"path": file_path}) | |
print(f"Processed files: {processed_files}") | |
return processed_files |