File size: 969 Bytes
2f541f4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import requests
import string
import re


def get_content_list() -> str:
    url = string.Template(
        "https://raw.githubusercontent.com/huggingface/"
        "transformers/main/docs/source/en/_toctree.yml"
    ).safe_substitute()
    response = requests.get(url)
    if response.status_code == 200:
        content = response.text
        return content
    else:
        raise ValueError("Failed to retrieve content list from the URL.", url)

def retrieve_content_list(content: str) -> dict[any, list[str]]:
    file_paths = re.findall(r'local:\s*(\S+)', content)
    categories = {None: []}

    for filepath in file_paths:
        filepath += ".md"
        if '/' in filepath:
            prefix = filepath.split('/')[0]
            if prefix in categories:
                categories[prefix].append(filepath)
            else:
                categories[prefix] = [filepath]
        else:
            categories[None].append(filepath)
    return categories