|
from huggingface_hub import snapshot_download |
|
import os |
|
import json |
|
import shutil |
|
import gradio as gr |
|
import time |
|
|
|
def process_and_zip_folders(huggingface_dataset_url, output_dir): |
|
|
|
if huggingface_dataset_url.startswith("https://huggingface.co/"): |
|
repo_id = huggingface_dataset_url.replace("https://huggingface.co/", "").rstrip("/") |
|
if repo_id.startswith("datasets/"): |
|
repo_id = repo_id[len("datasets/"):] |
|
else: |
|
repo_id = huggingface_dataset_url |
|
|
|
|
|
retry_attempts = 3 |
|
dataset_path = None |
|
for attempt in range(retry_attempts): |
|
try: |
|
dataset_path = snapshot_download(repo_id, repo_type="dataset", force_download=True) |
|
break |
|
except OSError as e: |
|
print(f"Download failed on attempt {attempt + 1}/{retry_attempts}: {e}") |
|
if attempt == retry_attempts - 1: |
|
raise |
|
time.sleep(5) |
|
|
|
if dataset_path is None: |
|
raise EnvironmentError("Failed to download dataset after multiple attempts.") |
|
|
|
|
|
folders = [ |
|
"hololive-jp", "hololive-id", "hololive-en", "holostars", |
|
"hololive-cn", "nijisanji", "nijisanji-en", "vshojo", |
|
"phaseconnect", "indies", "other" |
|
] |
|
|
|
zip_files = [] |
|
|
|
for folder in folders: |
|
folder_path = os.path.join(dataset_path, folder) |
|
if not os.path.exists(folder_path): |
|
continue |
|
|
|
|
|
json_path = os.path.join(folder_path, "model_info.json") |
|
if not os.path.exists(json_path): |
|
continue |
|
|
|
|
|
with open(json_path, "r", encoding="utf-8") as f: |
|
model_info = json.load(f) |
|
|
|
|
|
zip_name = f"{folder}.zip" |
|
zip_path = os.path.join(output_dir, zip_name) |
|
shutil.make_archive(base_name=zip_path.replace('.zip', ''), format="zip", root_dir=folder_path) |
|
zip_files.append(zip_path) |
|
|
|
return zip_files |
|
|
|
def gradio_interface(): |
|
def start_process(huggingface_url, output_directory): |
|
zip_files = process_and_zip_folders(huggingface_url, output_directory) |
|
return zip_files |
|
|
|
interface = gr.Interface( |
|
fn=start_process, |
|
inputs=[ |
|
gr.Textbox(label="Hugging Face Dataset URL", placeholder="https://huggingface.co/datasets/soiz1/rvc-models"), |
|
gr.Textbox(label="Output Directory", placeholder="/path/to/output") |
|
], |
|
outputs=gr.File(label="Generated ZIP Files"), |
|
title="Folder to ZIP Generator", |
|
description="指定されたフォルダを取得してZIPに圧縮します。" |
|
) |
|
|
|
interface.launch() |
|
|
|
if __name__ == "__main__": |
|
gradio_interface() |
|
|