Upload main.py
Browse files
main.py
CHANGED
@@ -296,11 +296,6 @@ class CivitAICrawler:
|
|
296 |
logger.error(f"Failed to create dummy file for {file_name}: {e}")
|
297 |
continue
|
298 |
|
299 |
-
# ===== 旧コード: 直接アップロードしていた箇所を削除して、rclone暗号化&アップロードに変更する場合は呼び出さない =====
|
300 |
-
# 旧来は self.upload_file(...) していたが、このタイミングでアップロードしたくない場合は消すかコメントアウト
|
301 |
-
# self.upload_file(local_file_path, path_in_repo=...)
|
302 |
-
# os.remove(local_file_path)
|
303 |
-
|
304 |
def download_images(self, model_versions: list, folder: str):
|
305 |
"""モデルの画像をダウンロードし、指定されたフォルダに保存する。"""
|
306 |
images_folder = os.path.join(folder, "images")
|
@@ -353,9 +348,6 @@ class CivitAICrawler:
|
|
353 |
with open(os.path.join(folder, "model_info.json"), "w") as file:
|
354 |
json.dump(model_info, file, indent=2)
|
355 |
|
356 |
-
# =============================================================================
|
357 |
-
# 以下はダウンロードやモデル情報処理の部分(元コードと同等)
|
358 |
-
# =============================================================================
|
359 |
@staticmethod
|
360 |
def increment_repo_name(repo_id: str) -> str:
|
361 |
match = re.search(r'(\d+)$', repo_id)
|
@@ -365,7 +357,6 @@ class CivitAICrawler:
|
|
365 |
else:
|
366 |
return f"{repo_id}1"
|
367 |
|
368 |
-
|
369 |
# =============================================================================
|
370 |
# ★ 暗号化しないアップロード(ログや model_list.log 用)
|
371 |
# =============================================================================
|
@@ -451,20 +442,42 @@ class CivitAICrawler:
|
|
451 |
folder_path: str,
|
452 |
repo_id: Optional[str] = None,
|
453 |
path_in_repo: Optional[str] = None
|
454 |
-
):
|
455 |
"""
|
456 |
フォルダを rclone で暗号化し、暗号化されたフォルダ構造ごとアップロード。
|
|
|
457 |
"""
|
458 |
if repo_id is None:
|
459 |
repo_id = self.repo_ids['current']
|
460 |
base_path = path_in_repo or ""
|
461 |
|
|
|
462 |
self.encrypt_with_rclone(folder_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
463 |
self.upload_encrypted_files(repo_id=repo_id, base_path_in_repo=base_path)
|
464 |
|
|
|
465 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
466 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
467 |
|
|
|
|
|
|
|
468 |
def read_model_list(self):
|
469 |
"""モデルリストを読み込む。"""
|
470 |
model_list = {}
|
@@ -513,7 +526,8 @@ class CivitAICrawler:
|
|
513 |
logger.warning(f"No 'Model' type file found for model ID {model_id}. Using first file's name.")
|
514 |
|
515 |
os.makedirs(folder, exist_ok=True)
|
516 |
-
|
|
|
517 |
model_list = self.read_model_list()
|
518 |
|
519 |
if model_hf_url in model_list:
|
@@ -529,12 +543,16 @@ class CivitAICrawler:
|
|
529 |
self.save_html_content(model_url, folder)
|
530 |
self.save_model_info(model_info, folder)
|
531 |
|
532 |
-
#
|
533 |
-
|
|
|
534 |
|
535 |
# モデルリスト更新
|
|
|
536 |
modelpage_name = model_info.get("name", "Unnamed Model")
|
537 |
-
|
|
|
|
|
538 |
with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
|
539 |
f.write(f"{modelpage_name}: {model_hf_url}\n")
|
540 |
|
@@ -551,11 +569,11 @@ class CivitAICrawler:
|
|
551 |
try:
|
552 |
login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
|
553 |
|
554 |
-
# model_list.log
|
555 |
model_list_path = hf_hub_download(repo_id=self.repo_ids['model_list'], filename=self.config.LIST_FILE)
|
556 |
shutil.copyfile(model_list_path, f"./{self.config.LIST_FILE}")
|
557 |
|
558 |
-
#
|
559 |
local_file_path = hf_hub_download(repo_id=self.repo_ids["log"], filename=self.config.LOG_FILE)
|
560 |
shutil.copyfile(local_file_path, f"./{self.config.LOG_FILE}")
|
561 |
|
@@ -588,13 +606,13 @@ class CivitAICrawler:
|
|
588 |
else:
|
589 |
await asyncio.sleep(2)
|
590 |
else:
|
591 |
-
#
|
592 |
with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
|
593 |
f.write(json.dumps(latest_model_ids) + "\n")
|
594 |
f.write(f"{self.repo_ids['current']}\n")
|
595 |
logger.info(f"Updated log file: {self.config.LOG_FILE}")
|
596 |
|
597 |
-
#
|
598 |
self.upload_file_raw(
|
599 |
file_path=self.config.LOG_FILE,
|
600 |
repo_id=self.repo_ids["log"],
|
@@ -615,7 +633,7 @@ class CivitAICrawler:
|
|
615 |
f.write(f"{self.repo_ids['current']}\n")
|
616 |
logger.info(f"Updated log file with new model ID: {model_id}")
|
617 |
|
618 |
-
#
|
619 |
self.upload_file_raw(
|
620 |
file_path=self.config.LOG_FILE,
|
621 |
repo_id=self.repo_ids["log"],
|
|
|
296 |
logger.error(f"Failed to create dummy file for {file_name}: {e}")
|
297 |
continue
|
298 |
|
|
|
|
|
|
|
|
|
|
|
299 |
def download_images(self, model_versions: list, folder: str):
|
300 |
"""モデルの画像をダウンロードし、指定されたフォルダに保存する。"""
|
301 |
images_folder = os.path.join(folder, "images")
|
|
|
348 |
with open(os.path.join(folder, "model_info.json"), "w") as file:
|
349 |
json.dump(model_info, file, indent=2)
|
350 |
|
|
|
|
|
|
|
351 |
@staticmethod
|
352 |
def increment_repo_name(repo_id: str) -> str:
|
353 |
match = re.search(r'(\d+)$', repo_id)
|
|
|
357 |
else:
|
358 |
return f"{repo_id}1"
|
359 |
|
|
|
360 |
# =============================================================================
|
361 |
# ★ 暗号化しないアップロード(ログや model_list.log 用)
|
362 |
# =============================================================================
|
|
|
442 |
folder_path: str,
|
443 |
repo_id: Optional[str] = None,
|
444 |
path_in_repo: Optional[str] = None
|
445 |
+
) -> str:
|
446 |
"""
|
447 |
フォルダを rclone で暗号化し、暗号化されたフォルダ構造ごとアップロード。
|
448 |
+
終了後に「実際に Hugging Face 上で使われる暗号化後のトップレベルフォルダ名」を返す。
|
449 |
"""
|
450 |
if repo_id is None:
|
451 |
repo_id = self.repo_ids['current']
|
452 |
base_path = path_in_repo or ""
|
453 |
|
454 |
+
# 1) rcloneにコピーして暗号化
|
455 |
self.encrypt_with_rclone(folder_path)
|
456 |
+
|
457 |
+
# 2) 暗号後のトップディレクトリ名を取得
|
458 |
+
# 例: /home/user/app/encrypted/<暗号フォルダ名>
|
459 |
+
# 基本的にトップレベルディレクトリは1つ想定
|
460 |
+
top_levels = [
|
461 |
+
d for d in os.listdir(self.config.ENCRYPTED_DIR)
|
462 |
+
if os.path.isdir(os.path.join(self.config.ENCRYPTED_DIR, d))
|
463 |
+
]
|
464 |
+
if not top_levels:
|
465 |
+
raise RuntimeError("No top-level folder found after rclone encryption.")
|
466 |
+
if len(top_levels) > 1:
|
467 |
+
logger.warning(f"Multiple top-level folders found after encryption? {top_levels}. Using the first one.")
|
468 |
+
|
469 |
+
encrypted_top_name = top_levels[0]
|
470 |
+
|
471 |
+
# 3) アップロード
|
472 |
self.upload_encrypted_files(repo_id=repo_id, base_path_in_repo=base_path)
|
473 |
|
474 |
+
# 4) 後始末
|
475 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
476 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
477 |
|
478 |
+
# 5) 実際にHFに作られた「暗号化後のトップフォルダ名」を返す
|
479 |
+
return encrypted_top_name
|
480 |
+
|
481 |
def read_model_list(self):
|
482 |
"""モデルリストを読み込む。"""
|
483 |
model_list = {}
|
|
|
526 |
logger.warning(f"No 'Model' type file found for model ID {model_id}. Using first file's name.")
|
527 |
|
528 |
os.makedirs(folder, exist_ok=True)
|
529 |
+
|
530 |
+
# すでに同フォルダURLがあるかどうか
|
531 |
model_list = self.read_model_list()
|
532 |
|
533 |
if model_hf_url in model_list:
|
|
|
543 |
self.save_html_content(model_url, folder)
|
544 |
self.save_model_info(model_info, folder)
|
545 |
|
546 |
+
# ========== rclone で暗号化フォルダをアップロード ==========
|
547 |
+
# ここで「実際にHF上に作成された暗号化後のトップフォルダ名」を取得
|
548 |
+
encrypted_top_name = self.upload_folder_encrypted(folder)
|
549 |
|
550 |
# モデルリスト更新
|
551 |
+
# ここで実際の暗号化フォルダ名を使ったURLを書き込む
|
552 |
modelpage_name = model_info.get("name", "Unnamed Model")
|
553 |
+
# 例: https://huggingface.co/REPO_ID/tree/main/<暗号化後トップフォルダ>
|
554 |
+
model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{encrypted_top_name}"
|
555 |
+
|
556 |
with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
|
557 |
f.write(f"{modelpage_name}: {model_hf_url}\n")
|
558 |
|
|
|
569 |
try:
|
570 |
login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
|
571 |
|
572 |
+
# model_list.logのダウンロード(暗号化せず上書き)
|
573 |
model_list_path = hf_hub_download(repo_id=self.repo_ids['model_list'], filename=self.config.LIST_FILE)
|
574 |
shutil.copyfile(model_list_path, f"./{self.config.LIST_FILE}")
|
575 |
|
576 |
+
# ログファイルのダウンロード(暗号化せず上書き)
|
577 |
local_file_path = hf_hub_download(repo_id=self.repo_ids["log"], filename=self.config.LOG_FILE)
|
578 |
shutil.copyfile(local_file_path, f"./{self.config.LOG_FILE}")
|
579 |
|
|
|
606 |
else:
|
607 |
await asyncio.sleep(2)
|
608 |
else:
|
609 |
+
# 新モデルなし → ログ更新して終了
|
610 |
with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
|
611 |
f.write(json.dumps(latest_model_ids) + "\n")
|
612 |
f.write(f"{self.repo_ids['current']}\n")
|
613 |
logger.info(f"Updated log file: {self.config.LOG_FILE}")
|
614 |
|
615 |
+
# ログファイルをリポジトリにアップロード(暗号化しない)
|
616 |
self.upload_file_raw(
|
617 |
file_path=self.config.LOG_FILE,
|
618 |
repo_id=self.repo_ids["log"],
|
|
|
633 |
f.write(f"{self.repo_ids['current']}\n")
|
634 |
logger.info(f"Updated log file with new model ID: {model_id}")
|
635 |
|
636 |
+
# ログとモデルリストのアップロード(暗号化しない)
|
637 |
self.upload_file_raw(
|
638 |
file_path=self.config.LOG_FILE,
|
639 |
repo_id=self.repo_ids["log"],
|