dl / app.py
soiz1's picture
Update app.py
52295e1 verified
from huggingface_hub import snapshot_download
import os
import json
import shutil
import gradio as gr
import time
def clear_cache():
"""Hugging Faceキャッシュを削除する関数。"""
cache_dir = os.path.expanduser("~/.cache/huggingface")
if os.path.exists(cache_dir):
print(f"Clearing cache at: {cache_dir}")
shutil.rmtree(cache_dir)
else:
print("No cache found to clear.")
def process_and_zip_folders(huggingface_dataset_url, output_dir):
# URLからリポジトリIDを抽出
if huggingface_dataset_url.startswith("https://huggingface.co/"):
repo_id = huggingface_dataset_url.replace("https://huggingface.co/", "").rstrip("/")
if repo_id.startswith("datasets/"):
repo_id = repo_id[len("datasets/"):]
else:
repo_id = huggingface_dataset_url
# データセットをダウンロード (再試行ロジックを追加)
retry_attempts = 3
dataset_path = None
for attempt in range(retry_attempts):
try:
if attempt > 0:
print(f"Retrying... (Attempt {attempt + 1})")
clear_cache() # キャッシュ削除
dataset_path = snapshot_download(repo_id, repo_type="dataset", force_download=True, resume_download=True, num_proc=64)
break # 成功した場合はループを終了
except OSError as e:
print(f"Download failed on attempt {attempt + 1}/{retry_attempts}: {e}")
if attempt == retry_attempts - 1:
raise # 最後の試行でも失敗した場合はエラーを再送出
time.sleep(5) # 少し待機して再試行
if dataset_path is None:
raise EnvironmentError("Failed to download dataset after multiple attempts.")
# 処理対象のフォルダ
folders = [
"hololive-jp", "hololive-id", "hololive-en", "holostars",
"hololive-cn", "nijisanji", "nijisanji-en", "vshojo",
"phaseconnect", "indies", "other"
]
zip_files = []
for folder in folders:
folder_path = os.path.join(dataset_path, folder)
if not os.path.exists(folder_path):
continue
# JSONファイルを探す
json_path = os.path.join(folder_path, "model_info.json")
if not os.path.exists(json_path):
continue
# JSONを読み込む
with open(json_path, "r", encoding="utf-8") as f:
model_info = json.load(f)
# ZIPに圧縮
zip_name = f"{folder}.zip"
zip_path = os.path.join(output_dir, zip_name)
shutil.make_archive(base_name=zip_path.replace('.zip', ''), format="zip", root_dir=folder_path)
zip_files.append(zip_path)
return zip_files
def gradio_interface():
def start_process(huggingface_url, output_directory):
# 出力ディレクトリが存在しない場合は作成
if not os.path.exists(output_directory):
os.makedirs(output_directory, exist_ok=True)
zip_files = process_and_zip_folders(huggingface_url, output_directory)
return zip_files
interface = gr.Interface(
fn=start_process,
inputs=[
gr.Textbox(label="Hugging Face Dataset URL", placeholder="https://huggingface.co/datasets/soiz1/rvc-models"),
gr.Textbox(label="Output Directory", placeholder="/path/to/output")
],
outputs=gr.File(label="Generated ZIP Files"),
title="Folder to ZIP Generator",
description="指定されたフォルダを取得してZIPに圧縮します。"
)
interface.launch()
if __name__ == "__main__":
gradio_interface()