File size: 3,657 Bytes
b769091
190ec4f
 
 
 
b77d102
190ec4f
148a42e
 
 
 
 
 
 
 
 
190ec4f
b769091
 
 
29b242d
 
b769091
 
 
b77d102
 
 
 
 
148a42e
 
 
52295e1
b77d102
 
 
 
 
 
 
 
 
190ec4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148a42e
 
 
190ec4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
from huggingface_hub import snapshot_download
import os
import json
import shutil
import gradio as gr
import time

def clear_cache():
    """Hugging Faceキャッシュを削除する関数。"""
    cache_dir = os.path.expanduser("~/.cache/huggingface")
    if os.path.exists(cache_dir):
        print(f"Clearing cache at: {cache_dir}")
        shutil.rmtree(cache_dir)
    else:
        print("No cache found to clear.")

def process_and_zip_folders(huggingface_dataset_url, output_dir):
    # URLからリポジトリIDを抽出
    if huggingface_dataset_url.startswith("https://huggingface.co/"):
        repo_id = huggingface_dataset_url.replace("https://huggingface.co/", "").rstrip("/")
        if repo_id.startswith("datasets/"):
            repo_id = repo_id[len("datasets/"):]
    else:
        repo_id = huggingface_dataset_url

    # データセットをダウンロード (再試行ロジックを追加)
    retry_attempts = 3
    dataset_path = None
    for attempt in range(retry_attempts):
        try:
            if attempt > 0:
                print(f"Retrying... (Attempt {attempt + 1})")
                clear_cache()  # キャッシュ削除
            dataset_path = snapshot_download(repo_id, repo_type="dataset", force_download=True, resume_download=True, num_proc=64)
            break  # 成功した場合はループを終了
        except OSError as e:
            print(f"Download failed on attempt {attempt + 1}/{retry_attempts}: {e}")
            if attempt == retry_attempts - 1:
                raise  # 最後の試行でも失敗した場合はエラーを再送出
            time.sleep(5)  # 少し待機して再試行

    if dataset_path is None:
        raise EnvironmentError("Failed to download dataset after multiple attempts.")

    # 処理対象のフォルダ
    folders = [
        "hololive-jp", "hololive-id", "hololive-en", "holostars",
        "hololive-cn", "nijisanji", "nijisanji-en", "vshojo",
        "phaseconnect", "indies", "other"
    ]

    zip_files = []

    for folder in folders:
        folder_path = os.path.join(dataset_path, folder)
        if not os.path.exists(folder_path):
            continue

        # JSONファイルを探す
        json_path = os.path.join(folder_path, "model_info.json")
        if not os.path.exists(json_path):
            continue

        # JSONを読み込む
        with open(json_path, "r", encoding="utf-8") as f:
            model_info = json.load(f)

        # ZIPに圧縮
        zip_name = f"{folder}.zip"
        zip_path = os.path.join(output_dir, zip_name)
        shutil.make_archive(base_name=zip_path.replace('.zip', ''), format="zip", root_dir=folder_path)
        zip_files.append(zip_path)

    return zip_files

def gradio_interface():
    def start_process(huggingface_url, output_directory):
        # 出力ディレクトリが存在しない場合は作成
        if not os.path.exists(output_directory):
            os.makedirs(output_directory, exist_ok=True)
        zip_files = process_and_zip_folders(huggingface_url, output_directory)
        return zip_files

    interface = gr.Interface(
        fn=start_process,
        inputs=[
            gr.Textbox(label="Hugging Face Dataset URL", placeholder="https://huggingface.co/datasets/soiz1/rvc-models"),
            gr.Textbox(label="Output Directory", placeholder="/path/to/output")
        ],
        outputs=gr.File(label="Generated ZIP Files"),
        title="Folder to ZIP Generator",
        description="指定されたフォルダを取得してZIPに圧縮します。"
    )

    interface.launch()

if __name__ == "__main__":
    gradio_interface()