|
from huggingface_hub import snapshot_download |
|
import os |
|
import json |
|
import shutil |
|
import gradio as gr |
|
import time |
|
|
|
def clear_cache(): |
|
"""Hugging Faceキャッシュを削除する関数。""" |
|
cache_dir = os.path.expanduser("~/.cache/huggingface") |
|
if os.path.exists(cache_dir): |
|
print(f"Clearing cache at: {cache_dir}") |
|
shutil.rmtree(cache_dir) |
|
else: |
|
print("No cache found to clear.") |
|
|
|
def process_and_zip_folders(huggingface_dataset_url, output_dir): |
|
|
|
if huggingface_dataset_url.startswith("https://huggingface.co/"): |
|
repo_id = huggingface_dataset_url.replace("https://huggingface.co/", "").rstrip("/") |
|
if repo_id.startswith("datasets/"): |
|
repo_id = repo_id[len("datasets/"):] |
|
else: |
|
repo_id = huggingface_dataset_url |
|
|
|
|
|
retry_attempts = 3 |
|
dataset_path = None |
|
for attempt in range(retry_attempts): |
|
try: |
|
if attempt > 0: |
|
print(f"Retrying... (Attempt {attempt + 1})") |
|
clear_cache() |
|
dataset_path = snapshot_download(repo_id, repo_type="dataset", force_download=True, resume_download=True, num_proc=64) |
|
break |
|
except OSError as e: |
|
print(f"Download failed on attempt {attempt + 1}/{retry_attempts}: {e}") |
|
if attempt == retry_attempts - 1: |
|
raise |
|
time.sleep(5) |
|
|
|
if dataset_path is None: |
|
raise EnvironmentError("Failed to download dataset after multiple attempts.") |
|
|
|
|
|
folders = [ |
|
"hololive-jp", "hololive-id", "hololive-en", "holostars", |
|
"hololive-cn", "nijisanji", "nijisanji-en", "vshojo", |
|
"phaseconnect", "indies", "other" |
|
] |
|
|
|
zip_files = [] |
|
|
|
for folder in folders: |
|
folder_path = os.path.join(dataset_path, folder) |
|
if not os.path.exists(folder_path): |
|
continue |
|
|
|
|
|
json_path = os.path.join(folder_path, "model_info.json") |
|
if not os.path.exists(json_path): |
|
continue |
|
|
|
|
|
with open(json_path, "r", encoding="utf-8") as f: |
|
model_info = json.load(f) |
|
|
|
|
|
zip_name = f"{folder}.zip" |
|
zip_path = os.path.join(output_dir, zip_name) |
|
shutil.make_archive(base_name=zip_path.replace('.zip', ''), format="zip", root_dir=folder_path) |
|
zip_files.append(zip_path) |
|
|
|
return zip_files |
|
|
|
def gradio_interface(): |
|
def start_process(huggingface_url, output_directory): |
|
|
|
if not os.path.exists(output_directory): |
|
os.makedirs(output_directory, exist_ok=True) |
|
zip_files = process_and_zip_folders(huggingface_url, output_directory) |
|
return zip_files |
|
|
|
interface = gr.Interface( |
|
fn=start_process, |
|
inputs=[ |
|
gr.Textbox(label="Hugging Face Dataset URL", placeholder="https://huggingface.co/datasets/soiz1/rvc-models"), |
|
gr.Textbox(label="Output Directory", placeholder="/path/to/output") |
|
], |
|
outputs=gr.File(label="Generated ZIP Files"), |
|
title="Folder to ZIP Generator", |
|
description="指定されたフォルダを取得してZIPに圧縮します。" |
|
) |
|
|
|
interface.launch() |
|
|
|
if __name__ == "__main__": |
|
gradio_interface() |
|
|