def get_hf_files(repo, name, path=""): api = HfApi() file_list = api.list_repo_files(repo_id=f'{repo}/{name}', repo_type="space") print(f"Files in {repo}/{name}: {file_list}") processed_files = [] # Create base directory if it doesn't exist if not os.path.exists(name): os.makedirs(name) for file_path in file_list: # Handle nested directories if "/" in file_path: # Split into directory and remainder dir_part, file_part = file_path.split("/", 1) # Ensure directory exists dir_path = os.path.join(name, dir_part) if not os.path.exists(dir_path): os.makedirs(dir_path) # Recursively handle subdirectories if needed if "/" in file_part: processed_files.extend(get_hf_files(repo, name, dir_part)) continue # Safely split filename into prefix and extension filename = os.path.basename(file_path) if "." in filename: pf, sf = filename.rsplit(".", 1) # Safely split on last period f_name = f"{pf}.{sf}" else: pf = filename sf = "" f_name = pf # Construct full local path local_path = os.path.join(name, file_path) # Ensure subdirectory exists for nested files os.makedirs(os.path.dirname(local_path), exist_ok=True) # Download file content r = requests.get(f'https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}') print(f"Downloading: {file_path}") with open(local_path, 'wb') as file: file.write(r.content) processed_files.append({"path": file_path}) print(f"Processed files: {processed_files}") return processed_files