Spaces:
Sleeping
Sleeping
import requests | |
import string | |
import re | |
def get_content_list() -> str: | |
url = string.Template( | |
"https://raw.githubusercontent.com/huggingface/" | |
"transformers/main/docs/source/en/_toctree.yml" | |
).safe_substitute() | |
response = requests.get(url) | |
if response.status_code == 200: | |
content = response.text | |
return content | |
else: | |
raise ValueError("Failed to retrieve content list from the URL.", url) | |
def retrieve_content_list(content: str) -> dict[any, list[str]]: | |
file_paths = re.findall(r'local:\s*(\S+)', content) | |
categories = {None: []} | |
for filepath in file_paths: | |
filepath += ".md" | |
if '/' in filepath: | |
prefix = filepath.split('/')[0] | |
if prefix in categories: | |
categories[prefix].append(filepath) | |
else: | |
categories[prefix] = [filepath] | |
else: | |
categories[None].append(filepath) | |
return categories | |