import requests import string import re def get_content_list() -> str: url = string.Template( "https://raw.githubusercontent.com/huggingface/" "transformers/main/docs/source/en/_toctree.yml" ).safe_substitute() response = requests.get(url) if response.status_code == 200: content = response.text return content else: raise ValueError("Failed to retrieve content list from the URL.", url) def retrieve_content_list(content: str) -> dict[any, list[str]]: file_paths = re.findall(r'local:\s*(\S+)', content) categories = {None: []} for filepath in file_paths: filepath += ".md" if '/' in filepath: prefix = filepath.split('/')[0] if prefix in categories: categories[prefix].append(filepath) else: categories[prefix] = [filepath] else: categories[None].append(filepath) return categories