File size: 1,830 Bytes
ba14112
f83a6c7
ba14112
 
 
f83a6c7
ba14112
f83a6c7
ba14112
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f83a6c7
ba14112
 
 
10099e5
ba14112
 
 
 
10099e5
ba14112
 
 
 
 
a8175e6
ba14112
10099e5
ba14112
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def get_hf_files(repo, name, path=""):
    api = HfApi()
    file_list = api.list_repo_files(repo_id=f'{repo}/{name}', repo_type="space")
    print(f"Files in {repo}/{name}: {file_list}")
    processed_files = []
    
    # Create base directory if it doesn't exist
    if not os.path.exists(name):
        os.makedirs(name)

    for file_path in file_list:
        # Handle nested directories
        if "/" in file_path:
            # Split into directory and remainder
            dir_part, file_part = file_path.split("/", 1)
            # Ensure directory exists
            dir_path = os.path.join(name, dir_part)
            if not os.path.exists(dir_path):
                os.makedirs(dir_path)
            # Recursively handle subdirectories if needed
            if "/" in file_part:
                processed_files.extend(get_hf_files(repo, name, dir_part))
                continue
        
        # Safely split filename into prefix and extension
        filename = os.path.basename(file_path)
        if "." in filename:
            pf, sf = filename.rsplit(".", 1)  # Safely split on last period
            f_name = f"{pf}.{sf}"
        else:
            pf = filename
            sf = ""
            f_name = pf
        
        # Construct full local path
        local_path = os.path.join(name, file_path)
        # Ensure subdirectory exists for nested files
        os.makedirs(os.path.dirname(local_path), exist_ok=True)

        # Download file content
        r = requests.get(f'https://huggingface.co/spaces/{repo}/{name}/raw/main/{file_path}')
        print(f"Downloading: {file_path}")
        with open(local_path, 'wb') as file:
            file.write(r.content)
        
        processed_files.append({"path": file_path})

    print(f"Processed files: {processed_files}")
    return processed_files