from huggingface_hub import snapshot_download import os import json import shutil import gradio as gr import time def clear_cache(): """Hugging Faceキャッシュを削除する関数。""" cache_dir = os.path.expanduser("~/.cache/huggingface") if os.path.exists(cache_dir): print(f"Clearing cache at: {cache_dir}") shutil.rmtree(cache_dir) else: print("No cache found to clear.") def process_and_zip_folders(huggingface_dataset_url, output_dir): # URLからリポジトリIDを抽出 if huggingface_dataset_url.startswith("https://huggingface.co/"): repo_id = huggingface_dataset_url.replace("https://huggingface.co/", "").rstrip("/") if repo_id.startswith("datasets/"): repo_id = repo_id[len("datasets/"):] else: repo_id = huggingface_dataset_url # データセットをダウンロード (再試行ロジックを追加) retry_attempts = 3 dataset_path = None for attempt in range(retry_attempts): try: if attempt > 0: print(f"Retrying... (Attempt {attempt + 1})") clear_cache() # キャッシュ削除 dataset_path = snapshot_download(repo_id, repo_type="dataset", force_download=True, resume_download=True, num_proc=64) break # 成功した場合はループを終了 except OSError as e: print(f"Download failed on attempt {attempt + 1}/{retry_attempts}: {e}") if attempt == retry_attempts - 1: raise # 最後の試行でも失敗した場合はエラーを再送出 time.sleep(5) # 少し待機して再試行 if dataset_path is None: raise EnvironmentError("Failed to download dataset after multiple attempts.") # 処理対象のフォルダ folders = [ "hololive-jp", "hololive-id", "hololive-en", "holostars", "hololive-cn", "nijisanji", "nijisanji-en", "vshojo", "phaseconnect", "indies", "other" ] zip_files = [] for folder in folders: folder_path = os.path.join(dataset_path, folder) if not os.path.exists(folder_path): continue # JSONファイルを探す json_path = os.path.join(folder_path, "model_info.json") if not os.path.exists(json_path): continue # JSONを読み込む with open(json_path, "r", encoding="utf-8") as f: model_info = json.load(f) # ZIPに圧縮 zip_name = f"{folder}.zip" zip_path = os.path.join(output_dir, zip_name) shutil.make_archive(base_name=zip_path.replace('.zip', ''), format="zip", root_dir=folder_path) zip_files.append(zip_path) return zip_files def gradio_interface(): def start_process(huggingface_url, output_directory): # 出力ディレクトリが存在しない場合は作成 if not os.path.exists(output_directory): os.makedirs(output_directory, exist_ok=True) zip_files = process_and_zip_folders(huggingface_url, output_directory) return zip_files interface = gr.Interface( fn=start_process, inputs=[ gr.Textbox(label="Hugging Face Dataset URL", placeholder="https://huggingface.co/datasets/soiz1/rvc-models"), gr.Textbox(label="Output Directory", placeholder="/path/to/output") ], outputs=gr.File(label="Generated ZIP Files"), title="Folder to ZIP Generator", description="指定されたフォルダを取得してZIPに圧縮します。" ) interface.launch() if __name__ == "__main__": gradio_interface()