Upload main.py
Browse files
main.py
CHANGED
@@ -11,12 +11,16 @@ import time
|
|
11 |
from typing import Optional
|
12 |
|
13 |
import requests
|
|
|
|
|
14 |
from fastapi import FastAPI
|
15 |
-
from huggingface_hub import HfApi, hf_hub_download, login
|
16 |
|
|
|
17 |
logging.basicConfig(level=logging.INFO)
|
18 |
logger = logging.getLogger(__name__)
|
19 |
|
|
|
20 |
class Config:
|
21 |
"""設定用のクラス"""
|
22 |
HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
|
@@ -36,6 +40,7 @@ class Config:
|
|
36 |
"hash": "https://civitai.com/api/v1/model-versions/by-hash/"
|
37 |
}
|
38 |
JST = datetime.timezone(datetime.timedelta(hours=9))
|
|
|
39 |
HEADERS = {
|
40 |
'Authorization': f'Bearer {CIVITAI_API_TOKEN}',
|
41 |
'User-Agent': 'civitai-crawler/1.0',
|
@@ -49,7 +54,7 @@ class Config:
|
|
49 |
|
50 |
|
51 |
class CivitAICrawler:
|
52 |
-
"""CivitAIからモデルをダウンロードし、Hugging Face
|
53 |
|
54 |
def __init__(self, config: Config):
|
55 |
self.config = config
|
@@ -64,12 +69,14 @@ class CivitAICrawler:
|
|
64 |
self.setup_routes()
|
65 |
|
66 |
def setup_routes(self):
|
|
|
67 |
@self.app.get("/")
|
68 |
def read_root():
|
69 |
now = str(datetime.datetime.now(self.jst))
|
70 |
description = f"""
|
71 |
CivitAIを定期的に周回し新規モデルを {self.repo_ids['current']} にバックアップするSpaceです。
|
72 |
-
|
|
|
73 |
Status: {now} + currently running :D
|
74 |
"""
|
75 |
return description
|
@@ -79,7 +86,7 @@ class CivitAICrawler:
|
|
79 |
asyncio.create_task(self.crawl())
|
80 |
|
81 |
# =============================================================================
|
82 |
-
# rclone
|
83 |
# =============================================================================
|
84 |
def setup_rclone_conf(self):
|
85 |
"""環境変数 RCLONE_CONF_BASE64 から rclone.conf を生成し、RCLONE_CONFIG 環境変数を設定"""
|
@@ -97,12 +104,8 @@ class CivitAICrawler:
|
|
97 |
|
98 |
def encrypt_with_rclone(self, local_path: str):
|
99 |
"""
|
100 |
-
指定ファイル or ディレクトリを cryptLocal
|
101 |
-
|
102 |
-
cryptLocal:{os.path.basename(local_path)} の形でコピーする。
|
103 |
-
|
104 |
-
※ rclone の crypt設定が filename_encryption = standard 等なら、
|
105 |
-
フォルダ名やファイル名も丸ごと暗号化される
|
106 |
"""
|
107 |
if not os.path.exists(local_path):
|
108 |
raise FileNotFoundError(f"[ERROR] Local path not found: {local_path}")
|
@@ -111,13 +114,11 @@ class CivitAICrawler:
|
|
111 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
112 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
113 |
|
114 |
-
#
|
115 |
-
# 暗号化されたディレクトリを作成できるようにする。
|
116 |
top_level_name = os.path.basename(local_path.rstrip("/"))
|
117 |
if not top_level_name:
|
118 |
-
top_level_name = "
|
119 |
|
120 |
-
# rclone copy local_path -> cryptLocal:top_level_name
|
121 |
cmd = ["rclone", "copy", local_path, f"cryptLocal:{top_level_name}", "-v"]
|
122 |
logger.info(f"[INFO] Running: {' '.join(cmd)}")
|
123 |
subprocess.run(cmd, check=True)
|
@@ -130,24 +131,16 @@ class CivitAICrawler:
|
|
130 |
|
131 |
def upload_encrypted_files(self, repo_id: str, base_path_in_repo: str = ""):
|
132 |
"""
|
133 |
-
self.config.ENCRYPTED_DIR
|
134 |
-
|
135 |
-
|
136 |
-
rclone で filename_encryption=standard が有効な場合:
|
137 |
-
- 元フォルダ名/ファイル名は完全に暗号化され、HF上では判読不能な名称になる。
|
138 |
-
|
139 |
-
base_path_in_repo が空("")の場合は、
|
140 |
-
`/home/user/app/encrypted` の構造が HF リポジトリの直下に展開される。
|
141 |
"""
|
142 |
max_retries = 5
|
143 |
-
|
144 |
for root, dirs, files in os.walk(self.config.ENCRYPTED_DIR):
|
145 |
for fn in files:
|
146 |
encrypted_file_path = os.path.join(root, fn)
|
147 |
if not os.path.isfile(encrypted_file_path):
|
148 |
continue
|
149 |
|
150 |
-
# self.config.ENCRYPTED_DIR からの相対パス(暗号化後のフォルダ名・ファイル名)
|
151 |
relative_path = os.path.relpath(encrypted_file_path, self.config.ENCRYPTED_DIR)
|
152 |
upload_path_in_repo = os.path.join(base_path_in_repo, relative_path)
|
153 |
|
@@ -372,28 +365,103 @@ class CivitAICrawler:
|
|
372 |
else:
|
373 |
return f"{repo_id}1"
|
374 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
# =============================================================================
|
376 |
-
#
|
377 |
# =============================================================================
|
378 |
-
def
|
|
|
|
|
|
|
|
|
|
|
379 |
"""
|
380 |
-
|
381 |
-
rcloneでフォルダを作って暗号化するため、フォルダ構造としてアップされる。
|
382 |
"""
|
383 |
-
|
384 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
385 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
386 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
387 |
|
388 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
389 |
"""
|
390 |
-
|
391 |
-
rclone copy folder_path => cryptLocal:folder_pathのbasename
|
392 |
-
すると {ENCRYPTED_DIR}/{暗号化後のトップレベルフォルダ} が生成されるため、
|
393 |
-
それを再帰的にHugging Faceへアップロードする。
|
394 |
"""
|
|
|
|
|
|
|
|
|
395 |
self.encrypt_with_rclone(folder_path)
|
396 |
-
self.upload_encrypted_files(repo_id=
|
|
|
397 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
398 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
399 |
|
@@ -462,7 +530,7 @@ class CivitAICrawler:
|
|
462 |
self.save_model_info(model_info, folder)
|
463 |
|
464 |
# ====== rclone でフォルダ暗号化 → HFへアップロード ======
|
465 |
-
self.
|
466 |
|
467 |
# モデルリスト更新
|
468 |
modelpage_name = model_info.get("name", "Unnamed Model")
|
@@ -527,12 +595,12 @@ class CivitAICrawler:
|
|
527 |
logger.info(f"Updated log file: {self.config.LOG_FILE}")
|
528 |
|
529 |
# ログファイルをリポジトリにアップロード
|
530 |
-
self.
|
531 |
file_path=self.config.LOG_FILE,
|
532 |
repo_id=self.repo_ids["log"],
|
533 |
path_in_repo=self.config.LOG_FILE
|
534 |
)
|
535 |
-
logger.info("Uploaded log file to repository.")
|
536 |
|
537 |
logger.info("No new models found.")
|
538 |
await asyncio.sleep(60)
|
@@ -547,13 +615,13 @@ class CivitAICrawler:
|
|
547 |
f.write(f"{self.repo_ids['current']}\n")
|
548 |
logger.info(f"Updated log file with new model ID: {model_id}")
|
549 |
|
550 |
-
# ログとモデルリストのアップロード
|
551 |
-
self.
|
552 |
file_path=self.config.LOG_FILE,
|
553 |
repo_id=self.repo_ids["log"],
|
554 |
path_in_repo=self.config.LOG_FILE
|
555 |
)
|
556 |
-
self.
|
557 |
file_path=self.config.LIST_FILE,
|
558 |
repo_id=self.repo_ids["model_list"],
|
559 |
path_in_repo=self.config.LIST_FILE
|
|
|
11 |
from typing import Optional
|
12 |
|
13 |
import requests
|
14 |
+
from bs4 import BeautifulSoup
|
15 |
+
from fake_useragent import UserAgent
|
16 |
from fastapi import FastAPI
|
17 |
+
from huggingface_hub import HfApi, create_repo, hf_hub_download, login
|
18 |
|
19 |
+
# ロギングの設定
|
20 |
logging.basicConfig(level=logging.INFO)
|
21 |
logger = logging.getLogger(__name__)
|
22 |
|
23 |
+
|
24 |
class Config:
|
25 |
"""設定用のクラス"""
|
26 |
HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
|
|
|
40 |
"hash": "https://civitai.com/api/v1/model-versions/by-hash/"
|
41 |
}
|
42 |
JST = datetime.timezone(datetime.timedelta(hours=9))
|
43 |
+
UA = UserAgent()
|
44 |
HEADERS = {
|
45 |
'Authorization': f'Bearer {CIVITAI_API_TOKEN}',
|
46 |
'User-Agent': 'civitai-crawler/1.0',
|
|
|
54 |
|
55 |
|
56 |
class CivitAICrawler:
|
57 |
+
"""CivitAIからモデルをダウンロードし、Hugging Faceにアップロードするクラス"""
|
58 |
|
59 |
def __init__(self, config: Config):
|
60 |
self.config = config
|
|
|
69 |
self.setup_routes()
|
70 |
|
71 |
def setup_routes(self):
|
72 |
+
"""FastAPIのルーティングを設定する。"""
|
73 |
@self.app.get("/")
|
74 |
def read_root():
|
75 |
now = str(datetime.datetime.now(self.jst))
|
76 |
description = f"""
|
77 |
CivitAIを定期的に周回し新規モデルを {self.repo_ids['current']} にバックアップするSpaceです。
|
78 |
+
model_list.log や civitai_backup.log は暗号化しないでアップロードします。
|
79 |
+
モデルのフォルダやファイルは暗号化してアップロードします。
|
80 |
Status: {now} + currently running :D
|
81 |
"""
|
82 |
return description
|
|
|
86 |
asyncio.create_task(self.crawl())
|
87 |
|
88 |
# =============================================================================
|
89 |
+
# rclone の設定・暗号化アップロード処理
|
90 |
# =============================================================================
|
91 |
def setup_rclone_conf(self):
|
92 |
"""環境変数 RCLONE_CONF_BASE64 から rclone.conf を生成し、RCLONE_CONFIG 環境変数を設定"""
|
|
|
104 |
|
105 |
def encrypt_with_rclone(self, local_path: str):
|
106 |
"""
|
107 |
+
指定ファイル or ディレクトリを cryptLocal: にコピー。
|
108 |
+
フォルダ構造やファイル名を rclone の filename_encryption 設定に応じて暗号化する。
|
|
|
|
|
|
|
|
|
109 |
"""
|
110 |
if not os.path.exists(local_path):
|
111 |
raise FileNotFoundError(f"[ERROR] Local path not found: {local_path}")
|
|
|
114 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
115 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
116 |
|
117 |
+
# コピー先: cryptLocal:{basename(local_path)}
|
|
|
118 |
top_level_name = os.path.basename(local_path.rstrip("/"))
|
119 |
if not top_level_name:
|
120 |
+
top_level_name = "unnamed"
|
121 |
|
|
|
122 |
cmd = ["rclone", "copy", local_path, f"cryptLocal:{top_level_name}", "-v"]
|
123 |
logger.info(f"[INFO] Running: {' '.join(cmd)}")
|
124 |
subprocess.run(cmd, check=True)
|
|
|
131 |
|
132 |
def upload_encrypted_files(self, repo_id: str, base_path_in_repo: str = ""):
|
133 |
"""
|
134 |
+
self.config.ENCRYPTED_DIR 以下に生成された暗号化後のファイル・フォルダ構造を再帰的��
|
135 |
+
Hugging Face にアップロードする。フォルダ名・ファイル名は rclone により暗号化済み。
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
"""
|
137 |
max_retries = 5
|
|
|
138 |
for root, dirs, files in os.walk(self.config.ENCRYPTED_DIR):
|
139 |
for fn in files:
|
140 |
encrypted_file_path = os.path.join(root, fn)
|
141 |
if not os.path.isfile(encrypted_file_path):
|
142 |
continue
|
143 |
|
|
|
144 |
relative_path = os.path.relpath(encrypted_file_path, self.config.ENCRYPTED_DIR)
|
145 |
upload_path_in_repo = os.path.join(base_path_in_repo, relative_path)
|
146 |
|
|
|
365 |
else:
|
366 |
return f"{repo_id}1"
|
367 |
|
368 |
+
|
369 |
+
# =============================================================================
|
370 |
+
# ★ 暗号化しないアップロード(ログや model_list.log 用)
|
371 |
+
# =============================================================================
|
372 |
+
def upload_file_raw(
|
373 |
+
self,
|
374 |
+
file_path: str,
|
375 |
+
repo_id: Optional[str] = None,
|
376 |
+
path_in_repo: Optional[str] = None
|
377 |
+
):
|
378 |
+
"""
|
379 |
+
暗号化せず、そのまま Hugging Face にアップロードするメソッド。
|
380 |
+
civitai_backup.log や model_list.log などはこれを使う。
|
381 |
+
"""
|
382 |
+
if repo_id is None:
|
383 |
+
repo_id = self.repo_ids['current']
|
384 |
+
if path_in_repo is None:
|
385 |
+
path_in_repo = os.path.basename(file_path)
|
386 |
+
|
387 |
+
max_retries = 5
|
388 |
+
attempt = 0
|
389 |
+
while attempt < max_retries:
|
390 |
+
try:
|
391 |
+
self.api.upload_file(
|
392 |
+
path_or_fileobj=file_path,
|
393 |
+
repo_id=repo_id,
|
394 |
+
path_in_repo=path_in_repo
|
395 |
+
)
|
396 |
+
logger.info(f"[OK] Uploaded {file_path} => {repo_id}/{path_in_repo}")
|
397 |
+
return
|
398 |
+
except Exception as e:
|
399 |
+
attempt += 1
|
400 |
+
error_message = str(e)
|
401 |
+
if "over the limit of 100000 files" in error_message:
|
402 |
+
logger.warning("Repository file limit exceeded, creating a new repository.")
|
403 |
+
self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
|
404 |
+
self.api.create_repo(repo_id=self.repo_ids['current'], private=True)
|
405 |
+
# リポジトリ変更で attempt リセット
|
406 |
+
attempt = 0
|
407 |
+
repo_id = self.repo_ids['current']
|
408 |
+
continue
|
409 |
+
elif "you can retry this action in about 1 hour" in error_message:
|
410 |
+
logger.warning("Encountered 'retry in 1 hour' error. Waiting 1 hour before retrying...")
|
411 |
+
time.sleep(3600)
|
412 |
+
attempt -= 1
|
413 |
+
else:
|
414 |
+
if attempt < max_retries:
|
415 |
+
logger.warning(f"Failed to upload raw file {file_path}, retry {attempt}/{max_retries}...")
|
416 |
+
else:
|
417 |
+
logger.error(f"Failed to upload raw file after {max_retries} attempts: {file_path}")
|
418 |
+
raise
|
419 |
+
|
420 |
# =============================================================================
|
421 |
+
# ★ 暗号化してアップロード (単ファイル)
|
422 |
# =============================================================================
|
423 |
+
def upload_file_encrypted(
|
424 |
+
self,
|
425 |
+
file_path: str,
|
426 |
+
repo_id: Optional[str] = None,
|
427 |
+
path_in_repo: Optional[str] = None
|
428 |
+
):
|
429 |
"""
|
430 |
+
単一ファイルを rclone で暗号化し、そのままHFへアップロード。
|
|
|
431 |
"""
|
432 |
+
if repo_id is None:
|
433 |
+
repo_id = self.repo_ids['current']
|
434 |
+
base_path = path_in_repo or "" # HF上のベースパス
|
435 |
+
|
436 |
+
# 1) rclone で暗号化
|
437 |
+
self.encrypt_with_rclone(file_path)
|
438 |
+
|
439 |
+
# 2) アップロード
|
440 |
+
self.upload_encrypted_files(repo_id=repo_id, base_path_in_repo=base_path)
|
441 |
+
|
442 |
+
# 3) 後始末
|
443 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
444 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
445 |
|
446 |
+
# =============================================================================
|
447 |
+
# ★ 暗号化してアップロード (フォルダ)
|
448 |
+
# =============================================================================
|
449 |
+
def upload_folder_encrypted(
|
450 |
+
self,
|
451 |
+
folder_path: str,
|
452 |
+
repo_id: Optional[str] = None,
|
453 |
+
path_in_repo: Optional[str] = None
|
454 |
+
):
|
455 |
"""
|
456 |
+
フォルダを rclone で暗号化し、暗号化されたフォルダ構造ごとアップロード。
|
|
|
|
|
|
|
457 |
"""
|
458 |
+
if repo_id is None:
|
459 |
+
repo_id = self.repo_ids['current']
|
460 |
+
base_path = path_in_repo or ""
|
461 |
+
|
462 |
self.encrypt_with_rclone(folder_path)
|
463 |
+
self.upload_encrypted_files(repo_id=repo_id, base_path_in_repo=base_path)
|
464 |
+
|
465 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
466 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
467 |
|
|
|
530 |
self.save_model_info(model_info, folder)
|
531 |
|
532 |
# ====== rclone でフォルダ暗号化 → HFへアップロード ======
|
533 |
+
self.upload_folder_encrypted(folder)
|
534 |
|
535 |
# モデルリスト更新
|
536 |
modelpage_name = model_info.get("name", "Unnamed Model")
|
|
|
595 |
logger.info(f"Updated log file: {self.config.LOG_FILE}")
|
596 |
|
597 |
# ログファイルをリポジトリにアップロード
|
598 |
+
self.upload_file_raw(
|
599 |
file_path=self.config.LOG_FILE,
|
600 |
repo_id=self.repo_ids["log"],
|
601 |
path_in_repo=self.config.LOG_FILE
|
602 |
)
|
603 |
+
logger.info("Uploaded log file to repository (unencrypted).")
|
604 |
|
605 |
logger.info("No new models found.")
|
606 |
await asyncio.sleep(60)
|
|
|
615 |
f.write(f"{self.repo_ids['current']}\n")
|
616 |
logger.info(f"Updated log file with new model ID: {model_id}")
|
617 |
|
618 |
+
# ログとモデルリストのアップロード
|
619 |
+
self.upload_file_raw(
|
620 |
file_path=self.config.LOG_FILE,
|
621 |
repo_id=self.repo_ids["log"],
|
622 |
path_in_repo=self.config.LOG_FILE
|
623 |
)
|
624 |
+
self.upload_file_raw(
|
625 |
file_path=self.config.LIST_FILE,
|
626 |
repo_id=self.repo_ids["model_list"],
|
627 |
path_in_repo=self.config.LIST_FILE
|