Upload main.py
Browse files
main.py
CHANGED
@@ -11,16 +11,13 @@ import time
|
|
11 |
from typing import Optional
|
12 |
|
13 |
import requests
|
14 |
-
from bs4 import BeautifulSoup
|
15 |
-
from fake_useragent import UserAgent
|
16 |
from fastapi import FastAPI
|
17 |
-
from huggingface_hub import HfApi,
|
18 |
|
19 |
# ロギングの設定
|
20 |
logging.basicConfig(level=logging.INFO)
|
21 |
logger = logging.getLogger(__name__)
|
22 |
|
23 |
-
|
24 |
class Config:
|
25 |
"""設定用のクラス"""
|
26 |
HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
|
@@ -40,57 +37,21 @@ class Config:
|
|
40 |
"hash": "https://civitai.com/api/v1/model-versions/by-hash/"
|
41 |
}
|
42 |
JST = datetime.timezone(datetime.timedelta(hours=9))
|
43 |
-
UA = UserAgent()
|
44 |
HEADERS = {
|
45 |
'Authorization': f'Bearer {CIVITAI_API_TOKEN}',
|
46 |
-
'User-Agent':
|
47 |
"Content-Type": "application/json"
|
48 |
}
|
49 |
|
50 |
# ===== rclone 用の追加設定 =====
|
51 |
-
# (環境変数 RCLONE_CONF_BASE64 に rclone.conf をbase64エンコードした文字列を設定しておく想定)
|
52 |
RCLONE_CONF_BASE64 = os.environ.get("RCLONE_CONF_BASE64", "")
|
53 |
# 暗号化されたファイルが出力されるローカルディレクトリ(cryptLocal: の実体)
|
54 |
ENCRYPTED_DIR = "/home/user/app/encrypted"
|
55 |
|
56 |
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
def __init__(self, config: Config):
|
61 |
-
self.config = config
|
62 |
-
self.api = HfApi()
|
63 |
-
self.app = FastAPI()
|
64 |
-
self.repo_ids = self.config.REPO_IDS.copy()
|
65 |
-
self.jst = self.config.JST
|
66 |
-
|
67 |
-
# rclone のセットアップ
|
68 |
-
self.setup_rclone_conf()
|
69 |
-
|
70 |
-
self.setup_routes()
|
71 |
-
|
72 |
-
def setup_routes(self):
|
73 |
-
"""FastAPIのルーティングを設定する。"""
|
74 |
-
|
75 |
-
@self.app.get("/")
|
76 |
-
def read_root():
|
77 |
-
now = str(datetime.datetime.now(self.jst))
|
78 |
-
description = f"""
|
79 |
-
CivitAIを定期的に周回し新規モデルを {self.repo_ids['current']} にバックアップするspaceです。
|
80 |
-
モデルページ名とバックアップURLの紐づきはhttps://huggingface.co/{self.repo_ids['model_list']}/blob/main/model_list.logからどうぞ
|
81 |
-
たまに覗いてもらえると動き続けると思います。
|
82 |
-
再起動が必要になっている場合はRestartボタンを押してもらえると助かります。
|
83 |
-
Status: {now} + currently running :D
|
84 |
-
"""
|
85 |
-
return description
|
86 |
-
|
87 |
-
@self.app.on_event("startup")
|
88 |
-
async def startup_event():
|
89 |
-
asyncio.create_task(self.crawl())
|
90 |
-
|
91 |
-
# =============================
|
92 |
-
# rclone 周りのヘルパー関数
|
93 |
-
# =============================
|
94 |
def setup_rclone_conf(self):
|
95 |
"""環境変数 RCLONE_CONF_BASE64 から rclone.conf を生成し、RCLONE_CONFIG 環境変数を設定"""
|
96 |
if not self.config.RCLONE_CONF_BASE64:
|
@@ -105,52 +66,49 @@ class CivitAICrawler:
|
|
105 |
os.environ["RCLONE_CONFIG"] = conf_path
|
106 |
logger.info(f"[INFO] rclone.conf created at: {conf_path}")
|
107 |
|
108 |
-
def encrypt_with_rclone(self, local_path: str
|
109 |
"""
|
110 |
-
指定ファイル or
|
111 |
-
|
112 |
"""
|
113 |
if not os.path.exists(local_path):
|
114 |
raise FileNotFoundError(f"[ERROR] Local path not found: {local_path}")
|
115 |
|
116 |
-
#
|
117 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
118 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
119 |
|
120 |
-
# rclone コマンドの実行
|
121 |
cmd = ["rclone", "copy", local_path, "cryptLocal:", "-v"]
|
122 |
logger.info(f"[INFO] Running: {' '.join(cmd)}")
|
123 |
subprocess.run(cmd, check=True)
|
124 |
-
logger.info(f"[OK] rclone copy
|
125 |
|
126 |
if not os.path.isdir(self.config.ENCRYPTED_DIR):
|
127 |
raise FileNotFoundError(
|
128 |
f"[ERROR] {self.config.ENCRYPTED_DIR} not found. Check your rclone config."
|
129 |
)
|
130 |
|
131 |
-
def upload_encrypted_files(self, repo_id: str,
|
132 |
"""
|
133 |
-
self.config.ENCRYPTED_DIR
|
134 |
-
|
135 |
"""
|
136 |
-
if not path_in_repo:
|
137 |
-
path_in_repo = ""
|
138 |
-
|
139 |
max_retries = 5
|
140 |
|
141 |
-
#
|
142 |
for root, dirs, files in os.walk(self.config.ENCRYPTED_DIR):
|
143 |
for fn in files:
|
144 |
encrypted_file_path = os.path.join(root, fn)
|
145 |
if not os.path.isfile(encrypted_file_path):
|
146 |
continue
|
147 |
|
148 |
-
#
|
149 |
relative_path = os.path.relpath(encrypted_file_path, self.config.ENCRYPTED_DIR)
|
150 |
-
# Hugging Face
|
151 |
-
|
|
|
|
|
152 |
|
153 |
-
# HFへのアップロードを試行 (over the limitなどの例外をリトライする)
|
154 |
attempt = 0
|
155 |
while attempt < max_retries:
|
156 |
try:
|
@@ -159,16 +117,16 @@ class CivitAICrawler:
|
|
159 |
repo_id=repo_id,
|
160 |
path_in_repo=upload_path_in_repo
|
161 |
)
|
162 |
-
logger.info(f"[OK] Uploaded
|
163 |
-
break
|
164 |
except Exception as e:
|
165 |
attempt += 1
|
166 |
error_message = str(e)
|
167 |
if "over the limit of 100000 files" in error_message:
|
168 |
-
logger.warning("Repository file limit exceeded
|
169 |
self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
|
170 |
self.api.create_repo(repo_id=self.repo_ids['current'], private=True)
|
171 |
-
#
|
172 |
attempt = 0
|
173 |
repo_id = self.repo_ids['current']
|
174 |
continue
|
@@ -178,9 +136,13 @@ class CivitAICrawler:
|
|
178 |
attempt -= 1 # この場合はリトライ回数をカウントしない
|
179 |
else:
|
180 |
if attempt < max_retries:
|
181 |
-
logger.warning(
|
|
|
|
|
182 |
else:
|
183 |
-
logger.error(
|
|
|
|
|
184 |
raise
|
185 |
|
186 |
# =============================
|
@@ -367,44 +329,30 @@ class CivitAICrawler:
|
|
367 |
new_repo_id = f"{repo_id}1"
|
368 |
return new_repo_id
|
369 |
|
370 |
-
#
|
371 |
-
#
|
372 |
-
#
|
373 |
-
def upload_file(self, file_path: str
|
374 |
"""
|
375 |
1) rcloneで file_path を暗号化
|
376 |
-
2)
|
|
|
377 |
"""
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
# 1) rclone copy (ファイル暗号化)
|
384 |
-
self.encrypt_with_rclone(file_path, is_file=True)
|
385 |
-
|
386 |
-
# 2) 暗号ファイルをアップロード
|
387 |
-
self.upload_encrypted_files(repo_id=repo_id, path_in_repo=path_in_repo)
|
388 |
-
|
389 |
-
# 3) 暗号ディレクトリの掃除
|
390 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
391 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
392 |
|
393 |
-
def upload_folder(self, folder_path: str
|
394 |
"""
|
395 |
-
1) rclone
|
396 |
-
2)
|
397 |
"""
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
# 1) rclone copy (フォルダ暗号化)
|
402 |
-
self.encrypt_with_rclone(folder_path, is_file=False)
|
403 |
-
|
404 |
-
# 2) 暗号フォルダをアップロード
|
405 |
-
self.upload_encrypted_files(repo_id=self.repo_ids['current'], path_in_repo=path_in_repo)
|
406 |
-
|
407 |
-
# 3) 掃除
|
408 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
409 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
410 |
|
|
|
11 |
from typing import Optional
|
12 |
|
13 |
import requests
|
|
|
|
|
14 |
from fastapi import FastAPI
|
15 |
+
from huggingface_hub import HfApi, hf_hub_download, login
|
16 |
|
17 |
# ロギングの設定
|
18 |
logging.basicConfig(level=logging.INFO)
|
19 |
logger = logging.getLogger(__name__)
|
20 |
|
|
|
21 |
class Config:
|
22 |
"""設定用のクラス"""
|
23 |
HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
|
|
|
37 |
"hash": "https://civitai.com/api/v1/model-versions/by-hash/"
|
38 |
}
|
39 |
JST = datetime.timezone(datetime.timedelta(hours=9))
|
|
|
40 |
HEADERS = {
|
41 |
'Authorization': f'Bearer {CIVITAI_API_TOKEN}',
|
42 |
+
'User-Agent': 'civitai-crawler/1.0',
|
43 |
"Content-Type": "application/json"
|
44 |
}
|
45 |
|
46 |
# ===== rclone 用の追加設定 =====
|
|
|
47 |
RCLONE_CONF_BASE64 = os.environ.get("RCLONE_CONF_BASE64", "")
|
48 |
# 暗号化されたファイルが出力されるローカルディレクトリ(cryptLocal: の実体)
|
49 |
ENCRYPTED_DIR = "/home/user/app/encrypted"
|
50 |
|
51 |
|
52 |
+
# =============================================================================
|
53 |
+
# rclone まわり
|
54 |
+
# =============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
def setup_rclone_conf(self):
|
56 |
"""環境変数 RCLONE_CONF_BASE64 から rclone.conf を生成し、RCLONE_CONFIG 環境変数を設定"""
|
57 |
if not self.config.RCLONE_CONF_BASE64:
|
|
|
66 |
os.environ["RCLONE_CONFIG"] = conf_path
|
67 |
logger.info(f"[INFO] rclone.conf created at: {conf_path}")
|
68 |
|
69 |
+
def encrypt_with_rclone(self, local_path: str):
|
70 |
"""
|
71 |
+
指定ファイル or ディレクトリを cryptLocal: にコピーし、
|
72 |
+
暗号化された結果を self.config.ENCRYPTED_DIR に生成する。
|
73 |
"""
|
74 |
if not os.path.exists(local_path):
|
75 |
raise FileNotFoundError(f"[ERROR] Local path not found: {local_path}")
|
76 |
|
77 |
+
# 事前に暗号先ディレクトリをクリーンアップ
|
78 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
79 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
80 |
|
|
|
81 |
cmd = ["rclone", "copy", local_path, "cryptLocal:", "-v"]
|
82 |
logger.info(f"[INFO] Running: {' '.join(cmd)}")
|
83 |
subprocess.run(cmd, check=True)
|
84 |
+
logger.info(f"[OK] rclone copy => cryptLocal:")
|
85 |
|
86 |
if not os.path.isdir(self.config.ENCRYPTED_DIR):
|
87 |
raise FileNotFoundError(
|
88 |
f"[ERROR] {self.config.ENCRYPTED_DIR} not found. Check your rclone config."
|
89 |
)
|
90 |
|
91 |
+
def upload_encrypted_files(self, repo_id: str, base_path_in_repo: str = ""):
|
92 |
"""
|
93 |
+
self.config.ENCRYPTED_DIR にある暗号化後のファイル/フォルダ構造をそっくりそのまま
|
94 |
+
Hugging Faceにアップロードする。フォルダ名・ファイル名はrcloneの設定によって暗号化されている。
|
95 |
"""
|
|
|
|
|
|
|
96 |
max_retries = 5
|
97 |
|
98 |
+
# 再帰的に暗号化フォルダを探索
|
99 |
for root, dirs, files in os.walk(self.config.ENCRYPTED_DIR):
|
100 |
for fn in files:
|
101 |
encrypted_file_path = os.path.join(root, fn)
|
102 |
if not os.path.isfile(encrypted_file_path):
|
103 |
continue
|
104 |
|
105 |
+
# self.config.ENCRYPTED_DIR からの相対パスを算出
|
106 |
relative_path = os.path.relpath(encrypted_file_path, self.config.ENCRYPTED_DIR)
|
107 |
+
# Hugging Face上にアップロードする際のディレクトリパス
|
108 |
+
# ここでは base_path_in_repo + relative_path にしているが、
|
109 |
+
# base_path_in_repo が空("")なら、rclone で暗号化されたディレクトリ名をそのまま使う
|
110 |
+
upload_path_in_repo = os.path.join(base_path_in_repo, relative_path)
|
111 |
|
|
|
112 |
attempt = 0
|
113 |
while attempt < max_retries:
|
114 |
try:
|
|
|
117 |
repo_id=repo_id,
|
118 |
path_in_repo=upload_path_in_repo
|
119 |
)
|
120 |
+
logger.info(f"[OK] Uploaded => {repo_id}/{upload_path_in_repo}")
|
121 |
+
break
|
122 |
except Exception as e:
|
123 |
attempt += 1
|
124 |
error_message = str(e)
|
125 |
if "over the limit of 100000 files" in error_message:
|
126 |
+
logger.warning("Repository file limit exceeded. Creating a new repository...")
|
127 |
self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
|
128 |
self.api.create_repo(repo_id=self.repo_ids['current'], private=True)
|
129 |
+
# リポジトリが変わったので attempt をリセット
|
130 |
attempt = 0
|
131 |
repo_id = self.repo_ids['current']
|
132 |
continue
|
|
|
136 |
attempt -= 1 # この場合はリトライ回数をカウントしない
|
137 |
else:
|
138 |
if attempt < max_retries:
|
139 |
+
logger.warning(
|
140 |
+
f"Failed to upload {encrypted_file_path}, retry {attempt}/{max_retries}..."
|
141 |
+
)
|
142 |
else:
|
143 |
+
logger.error(
|
144 |
+
f"Failed to upload after {max_retries} attempts: {encrypted_file_path}"
|
145 |
+
)
|
146 |
raise
|
147 |
|
148 |
# =============================
|
|
|
329 |
new_repo_id = f"{repo_id}1"
|
330 |
return new_repo_id
|
331 |
|
332 |
+
# =============================================================================
|
333 |
+
# 既存のファイル/フォルダアップロードを「フォルダ名も暗号化」するよう変更
|
334 |
+
# =============================================================================
|
335 |
+
def upload_file(self, file_path: str):
|
336 |
"""
|
337 |
1) rcloneで file_path を暗号化
|
338 |
+
2) 暗号化された単一ファイル(またはフォルダ構造)をそのままHugging Faceへアップロード
|
339 |
+
(ファイル名もフォルダ名も暗号化される)
|
340 |
"""
|
341 |
+
# rcloneで暗号化
|
342 |
+
self.encrypt_with_rclone(file_path)
|
343 |
+
# 暗号化フォルダ全体をアップロード (フォルダ名も暗号化される)
|
344 |
+
self.upload_encrypted_files(repo_id=self.repo_ids['current'], base_path_in_repo="")
|
345 |
+
# 後始末
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
346 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
347 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
348 |
|
349 |
+
def upload_folder(self, folder_path: str):
|
350 |
"""
|
351 |
+
1) rcloneでフォルダ全体を暗号化 (フォルダ名含むディレクトリ構造が暗号化)
|
352 |
+
2) 暗号化済みディレクトリ構造をHugging Faceにアップロード
|
353 |
"""
|
354 |
+
self.encrypt_with_rclone(folder_path)
|
355 |
+
self.upload_encrypted_files(repo_id=self.repo_ids['current'], base_path_in_repo="")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
356 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
357 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
358 |
|