ttttdiva commited on
Commit
7d6cf26
·
verified ·
1 Parent(s): 43ae23e

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +95 -625
main.py CHANGED
@@ -1,627 +1,97 @@
1
- import asyncio
2
- import datetime
3
- import json
4
- import logging
5
- import os
6
- import re
7
- import shutil
8
- import subprocess
9
- import time
10
- import uuid
11
- from typing import Optional
12
-
13
- import requests
14
- from bs4 import BeautifulSoup
15
- from fake_useragent import UserAgent
16
- from fastapi import FastAPI
17
- from huggingface_hub import HfApi, hf_hub_download, login
18
-
19
- # ロギングの設定
20
- logging.basicConfig(level=logging.INFO)
21
- logger = logging.getLogger(__name__)
22
-
23
-
24
- class Config:
25
- """設定用のクラス"""
26
- HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
27
- CIVITAI_API_TOKEN = os.environ["CIVITAI_API_TOKEN"]
28
- LOG_FILE = "civitai_backup.log"
29
- LIST_FILE = "model_list.log"
30
- REPO_IDS = {
31
- "log": "ttttdiva/CivitAI_log_test",
32
- "model_list": "ttttdiva/CivitAI_model_info_test",
33
- "current": ""
34
- }
35
- URLS = {
36
- "latest": "https://civitai.com/api/v1/models?sort=Newest",
37
- "modelPage": "https://civitai.com/models/",
38
- "modelId": "https://civitai.com/api/v1/models/",
39
- "modelVersionId": "https://civitai.com/api/v1/model-versions/",
40
- "hash": "https://civitai.com/api/v1/model-versions/by-hash/"
41
- }
42
- JST = datetime.timezone(datetime.timedelta(hours=9))
43
- UA = UserAgent()
44
- HEADERS = {
45
- 'Authorization': f'Bearer {CIVITAI_API_TOKEN}',
46
- 'User-Agent': UA.random,
47
- "Content-Type": "application/json"
48
- }
49
-
50
-
51
- class CivitAICrawler:
52
- """CivitAIからモデルをダウンロードし、Hugging Faceにアップロードするクラス"""
53
-
54
- def __init__(self, config: Config):
55
- import base64
56
-
57
- rclone_conf_base64 = os.environ.get("RCLONE_CONF_BASE64")
58
- if rclone_conf_base64:
59
- # カレントディレクトリ配下に .rclone_config ディレクトリを作成
60
- config_dir = os.path.join(os.getcwd(), ".rclone_config")
61
- os.makedirs(config_dir, exist_ok=True)
62
-
63
- conf_path = os.path.join(config_dir, "rclone.conf")
64
- with open(conf_path, "wb") as f:
65
- f.write(base64.b64decode(rclone_conf_base64))
66
-
67
- # rclone がここを参照するように設定
68
- os.environ["RCLONE_CONFIG"] = conf_path
69
- logger.info(f"[INFO] Created rclone.conf at {conf_path}")
70
- else:
71
- logger.warning("[WARN] RCLONE_CONF_BASE64 not found; rclone may fail.")
72
-
73
- self.config = config
74
- self.api = HfApi()
75
- self.app = FastAPI()
76
- self.repo_ids = self.config.REPO_IDS.copy()
77
- self.jst = self.config.JST
78
- self.setup_routes()
79
-
80
- def setup_routes(self):
81
- """FastAPIのルーティングを設定する。"""
82
- @self.app.get("/")
83
- def read_root():
84
- now = str(datetime.datetime.now(self.jst))
85
- description = f"""
86
- CivitAIを定期的に周回し新規モデルを {self.repo_ids['current']} にバックアップするspaceです。
87
- モデル一覧は https://huggingface.co/{self.repo_ids['model_list']}/blob/main/model_list.log を参照してください。
88
- Status: {now} + currently running :D
89
- """
90
- return description
91
-
92
- @self.app.on_event("startup")
93
- async def startup_event():
94
- asyncio.create_task(self.crawl())
95
-
96
- @staticmethod
97
- def get_filename_from_cd(content_disposition: Optional[str], default_name: str) -> str:
98
- """Content-Dispositionヘッダーからファイル名を取得する。"""
99
- if content_disposition:
100
- parts = content_disposition.split(';')
101
- for part in parts:
102
- if "filename=" in part:
103
- return part.split("=")[1].strip().strip('"')
104
- return default_name
105
-
106
- def download_file(self, url: str, destination_folder: str, default_name: str) -> Optional[str]:
107
- """指定されたURLからファイルをダウンロードし、指定されたフォルダに保存する。"""
108
- try:
109
- response = requests.get(url, headers=self.config.HEADERS, stream=True)
110
- response.raise_for_status()
111
- except requests.RequestException as e:
112
- logger.error(f"Failed to download file from {url}: {e}")
113
- return None
114
-
115
- filename = self.get_filename_from_cd(response.headers.get('content-disposition'), default_name)
116
- file_path = os.path.join(destination_folder, filename)
117
-
118
- # ダウンロードとファイル保存処理
119
- with open(file_path, 'wb') as file:
120
- for chunk in response.iter_content(chunk_size=8192):
121
- file.write(chunk)
122
- logger.info(f"Downloaded: {file_path}")
123
- return file_path
124
-
125
- def get_model_info(self, model_id: str) -> dict:
126
- """モデルの情報を取得する。"""
127
- try:
128
- response = requests.get(self.config.URLS["modelId"] + str(model_id), headers=self.config.HEADERS)
129
- response.raise_for_status()
130
- return response.json()
131
- except requests.RequestException as e:
132
- logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
133
- return {}
134
-
135
- def download_images(self, model_versions: list, folder: str):
136
- """画像を images フォルダにまとめてダウンロードする."""
137
- images_folder = os.path.join(folder, "images")
138
- os.makedirs(images_folder, exist_ok=True)
139
- images = []
140
- for version in model_versions:
141
- for img in version.get("images", []):
142
- images.append(img["url"])
143
- for image_url in images:
144
- image_name = os.path.basename(image_url)
145
- local_path = os.path.join(images_folder, image_name)
146
- try:
147
- resp = requests.get(image_url, stream=True)
148
- resp.raise_for_status()
149
- with open(local_path, 'wb') as imgf:
150
- for chunk in resp.iter_content(chunk_size=8192):
151
- imgf.write(chunk)
152
- logger.info(f"Downloaded image: {local_path}")
153
- except requests.RequestException as e:
154
- logger.error(f"Failed to download image {image_url}: {e}")
155
-
156
- def save_html_content(self, model_page_url: str, folder: str):
157
- """モデルページのHTMLをフォルダ内に保存する."""
158
  try:
159
- resp = requests.get(model_page_url)
160
- resp.raise_for_status()
161
- html_path = os.path.join(folder, "page.html")
162
- with open(html_path, 'w', encoding='utf-8') as f:
163
- f.write(resp.text)
164
- logger.info(f"Saved HTML: {html_path}")
165
  except Exception as e:
166
- logger.error(f"Error saving HTML content from {model_page_url}: {e}")
167
-
168
- def save_model_info_json(self, model_info: dict, folder: str):
169
- """モデル情報をJSONファイルとして保存."""
170
- info_path = os.path.join(folder, "model_info.json")
171
- try:
172
- with open(info_path, 'w', encoding='utf-8') as f:
173
- json.dump(model_info, f, indent=2)
174
- logger.info(f"Saved model_info.json: {info_path}")
175
- except Exception as e:
176
- logger.error(f"Failed to save model info JSON: {e}")
177
- # =========================================================================
178
- # ここが重要:
179
- # - 最新バージョンはまとめて folder_name にダウンロード (一度に暗号化アップロード)
180
- # - 古いバージョンは1つずつダウンロード→暗号化→アップロード→削除 でストレージを節約
181
- # =========================================================================
182
- def download_and_process_versions(self, model_versions: list, folder: str):
183
- """最新バージョンをまとめてダウンロード、old_versionsは1つずつアップして削除。"""
184
-
185
- # 1) 最新バージョン (インデックス0) のファイルを folder にダウンロード
186
- latest_version = model_versions[0]
187
- logger.info(f"Processing latest version: {latest_version.get('name','(NoName)')}")
188
-
189
- for file_info in latest_version.get("files", []):
190
- download_url = file_info["downloadUrl"]
191
- file_name = file_info["name"]
192
- login_detected_count = 0
193
-
194
- while login_detected_count < 5:
195
- local_path = self.download_file(download_url, folder, file_name)
196
- if local_path and "login" in os.listdir(folder):
197
- login_detected_count += 1
198
- os.remove(os.path.join(folder, "login"))
199
- logger.warning(f"'login' file found, retrying {file_name} ({login_detected_count}/5)")
200
- else:
201
- break
202
-
203
- if login_detected_count >= 5:
204
- dummy_file_path = os.path.join(folder, f"{file_name}.download_failed")
205
- try:
206
- with open(dummy_file_path, "w") as f:
207
- f.write("Download failed after 5 attempts.")
208
- logger.error(f"Failed to download {file_name}. Dummy file created: {dummy_file_path}")
209
- except Exception as e:
210
- logger.error(f"Failed to create dummy file for {file_name}: {e}")
211
-
212
- # 2) 古いバージョンがあれば 1つずつダウンロード→暗号化アップロード→削除
213
- if len(model_versions) > 1:
214
- old_versions_folder = os.path.join(folder, "old_versions")
215
- os.makedirs(old_versions_folder, exist_ok=True)
216
-
217
- for version in model_versions[1:]:
218
- logger.info(f"Processing older version: {version.get('name','(NoName)')}")
219
- for file_info in version.get("files", []):
220
- file_name = file_info["name"]
221
- download_url = file_info["downloadUrl"]
222
- login_detected_count = 0
223
-
224
- while login_detected_count < 5:
225
- local_path = self.download_file(download_url, old_versions_folder, file_name)
226
- if local_path and "login" in os.listdir(old_versions_folder):
227
- login_detected_count += 1
228
- os.remove(os.path.join(old_versions_folder, "login"))
229
- logger.warning(f"'login' file found, retrying {file_name} ({login_detected_count}/5)")
230
- else:
231
- break
232
-
233
- if login_detected_count >= 5:
234
- dummy_file_path = os.path.join(old_versions_folder, f"{file_name}.download_failed")
235
- try:
236
- with open(dummy_file_path, "w") as f:
237
- f.write("Download failed after 5 attempts.")
238
- logger.error(f"Failed to download {file_name}. Dummy file: {dummy_file_path}")
239
- except Exception as e:
240
- logger.error(f"Failed to create dummy file for {file_name}: {e}")
241
- continue # 次のファイルへ
242
-
243
- # ===== ダウンロード成功した古いバージョンファイルを暗号化アップロード =====
244
- # フォルダごと暗号化でもいいですが、大容量を避けるためファイル単位で暗号化する例
245
- # ここでは "encrypt_and_upload_folder" の代わりに「encrypt_and_upload_file」するなど
246
- # あるいはフォルダごとでもOK
247
- single_file_folder = os.path.join(old_versions_folder, "temp_single")
248
- os.makedirs(single_file_folder, exist_ok=True)
249
- try:
250
- # 移動して「このファイルだけ」が入ったフォルダを作る
251
- single_file_path = shutil.move(local_path, os.path.join(single_file_folder, file_name))
252
- # 暗号化アップロード
253
- self.encrypt_and_upload_folder(single_file_folder)
254
- except Exception as e:
255
- logger.error(f"Failed to encrypt/upload old version file: {e}")
256
- finally:
257
- # single_file_folder削除(encrypt_and_upload_folderで消えるはず)
258
- if os.path.exists(single_file_folder):
259
- shutil.rmtree(single_file_folder)
260
- logger.info(f"Removed temp_single folder {single_file_folder}")
261
-
262
- def encrypt_and_upload_folder(self, local_folder: str) -> Optional[str]:
263
- """
264
- 1. rclone mkdir cryptLocal:subfolder_label で空フォルダを作る
265
- 2. rclone copy local_folder => cryptLocal:subfolder_label --create-empty-src-dirs
266
- 3. そのフォルダを self.upload_folder() でアップ
267
- 4. ローカル平文フォルダ & 暗号フォルダを削除
268
- 5. subfolder_label を返す
269
- """
270
- if not os.path.exists(local_folder):
271
- logger.error(f"encrypt_and_upload_folder: folder not found: {local_folder}")
272
- return None
273
-
274
- encrypted_base_dir = os.path.join(os.getcwd(), "encrypted")
275
- os.makedirs(encrypted_base_dir, exist_ok=True)
276
-
277
- # 既存の暗号ファイル削除(不要ならコメントアウト)
278
- for item in os.listdir(encrypted_base_dir):
279
- item_path = os.path.join(encrypted_base_dir, item)
280
- try:
281
- if os.path.isfile(item_path) or os.path.islink(item_path):
282
- os.remove(item_path)
283
- else:
284
- shutil.rmtree(item_path)
285
- logger.info(f"[CLEANUP] Removed old encrypted item: {item_path}")
286
- except Exception as e:
287
- logger.warning(f"[CLEANUP] Failed to remove {item_path}: {e}")
288
-
289
- # サブフォルダ名を生成(enc_ + UUID)
290
- subfolder_label = "enc_" + str(uuid.uuid4())[:8]
291
-
292
- # ★ 追加: mkdir で先に空ディレクトリを作っておく
293
- try:
294
- subprocess.run(
295
- ["rclone", "mkdir", f"cryptLocal:{subfolder_label}"],
296
- check=True
297
- )
298
- except subprocess.CalledProcessError as e:
299
- logger.error(f"rclone mkdir failed: {e}")
300
- return None
301
-
302
- # ★ --create-empty-src-dirs オプションを付けて copy
303
- try:
304
- subprocess.run(
305
- [
306
- "rclone",
307
- "copy",
308
- local_folder,
309
- f"cryptLocal:{subfolder_label}",
310
- "--create-empty-src-dirs"
311
- ],
312
- check=True
313
- )
314
- except subprocess.CalledProcessError as e:
315
- logger.error(f"rclone copy failed: {e}")
316
- return None
317
-
318
- # 暗号フォルダのパス(必ずディレクトリができている前提)
319
- enc_folder_path = os.path.join(encrypted_base_dir, subfolder_label)
320
-
321
- if not os.path.isdir(enc_folder_path):
322
- logger.error(f"[ERROR] {enc_folder_path} is not a directory, something is still off.")
323
- return None
324
-
325
- # アップロード
326
- try:
327
- self.upload_folder(enc_folder_path, path_in_repo=subfolder_label)
328
- logger.info(f"Uploaded encrypted folder: {enc_folder_path}")
329
- except Exception as e:
330
- logger.error(f"Failed to upload encrypted folder {enc_folder_path}: {e}")
331
-
332
- # ローカル削除
333
- try:
334
- shutil.rmtree(local_folder)
335
- shutil.rmtree(enc_folder_path)
336
- logger.info(f"Removed local folder: {local_folder} and {enc_folder_path}")
337
- except Exception as e:
338
- logger.error(f"Failed to remove local folders: {e}")
339
-
340
- return subfolder_label
341
-
342
- def upload_file(self, file_path: str, repo_id: Optional[str] = None, path_in_repo: Optional[str] = None):
343
- if repo_id is None:
344
- repo_id = self.repo_ids['current']
345
- if path_in_repo is None:
346
- path_in_repo = os.path.basename(file_path)
347
-
348
- max_retries = 5
349
- attempt = 0
350
- while attempt < max_retries:
351
- try:
352
- self.api.upload_file(
353
- path_or_fileobj=file_path,
354
- repo_id=repo_id,
355
- path_in_repo=path_in_repo
356
- )
357
- logger.info(f"Uploaded file: {file_path} to {repo_id} at {path_in_repo}")
358
- return
359
- except Exception as e:
360
- attempt += 1
361
- error_message = str(e)
362
- if "over the limit of 100000 files" in error_message:
363
- logger.warning("File limit exceeded, creating a new repo.")
364
- self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
365
- self.api.create_repo(repo_id=self.repo_ids['current'], private=True)
366
- attempt = 0
367
- continue
368
- elif "you can retry this action in about 1 hour" in error_message:
369
- logger.warning("Rate limit hit. Waiting 1 hour...")
370
- time.sleep(3600)
371
- attempt -= 1
372
- else:
373
- if attempt < max_retries:
374
- logger.warning(f"Failed to upload {file_path}, retry {attempt}/{max_retries}")
375
- else:
376
- logger.error(f"Failed after {max_retries} attempts: {e}")
377
- raise
378
-
379
- def upload_file(self, file_path: str, repo_id: Optional[str] = None, path_in_repo: Optional[str] = None):
380
- if repo_id is None:
381
- repo_id = self.repo_ids['current']
382
- if path_in_repo is None:
383
- path_in_repo = os.path.basename(file_path)
384
-
385
- max_retries = 5
386
- attempt = 0
387
- while attempt < max_retries:
388
- try:
389
- self.api.upload_file(
390
- path_or_fileobj=file_path,
391
- repo_id=repo_id,
392
- path_in_repo=path_in_repo
393
- )
394
- logger.info(f"Uploaded file: {file_path} to {repo_id} at {path_in_repo}")
395
- return
396
- except Exception as e:
397
- attempt += 1
398
- error_message = str(e)
399
- if "over the limit of 100000 files" in error_message:
400
- logger.warning("File limit exceeded, creating a new repo.")
401
- self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
402
- self.api.create_repo(repo_id=self.repo_ids['current'], private=True)
403
- attempt = 0
404
- continue
405
- elif "you can retry this action in about 1 hour" in error_message:
406
- logger.warning("Rate limit hit. Waiting 1 hour...")
407
- time.sleep(3600)
408
- attempt -= 1
409
- else:
410
- if attempt < max_retries:
411
- logger.warning(f"Failed to upload {file_path}, retry {attempt}/{max_retries}")
412
- else:
413
- logger.error(f"Failed after {max_retries} attempts: {e}")
414
- raise
415
-
416
- def upload_folder(self, folder_path: str, path_in_repo: Optional[str] = None):
417
- """
418
- フォルダを Hugging Face リポジトリに一括アップロード
419
- """
420
- if path_in_repo is None:
421
- path_in_repo = os.path.basename(folder_path)
422
-
423
- max_retries = 5
424
- attempt = 0
425
- while attempt < max_retries:
426
- try:
427
- self.api.upload_folder(
428
- folder_path=folder_path,
429
- repo_id=self.repo_ids['current'],
430
- path_in_repo=path_in_repo
431
- )
432
- logger.info(f"Uploaded folder: {folder_path} to {self.repo_ids['current']} at {path_in_repo}")
433
- return
434
- except Exception as e:
435
- attempt += 1
436
- error_message = str(e)
437
- if "over the limit of 100000 files" in error_message:
438
- logger.warning("File limit exceeded, creating a new repo.")
439
- self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
440
- self.api.create_repo(repo_id=self.repo_ids['current'], private=True)
441
- attempt = 0
442
- continue
443
- elif "you can retry this action in about 1 hour" in error_message:
444
- logger.warning("Rate limit hit. Waiting 1 hour...")
445
- time.sleep(3600)
446
- attempt -= 1
447
- else:
448
- if attempt < max_retries:
449
- logger.warning(f"Failed to upload folder {folder_path}, retry {attempt}/{max_retries}")
450
- else:
451
- logger.error(f"Failed after {max_retries} attempts: {e}")
452
- raise
453
-
454
- @staticmethod
455
- def increment_repo_name(repo_id: str) -> str:
456
- """リポジトリ名の末尾の数字をインクリメントする。"""
457
- match = re.search(r'(\d+)$', repo_id)
458
- if match:
459
- number = int(match.group(1)) + 1
460
- new_repo_id = re.sub(r'\d+$', str(number), repo_id)
461
- else:
462
- new_repo_id = f"{repo_id}1"
463
- return new_repo_id
464
-
465
- def read_model_list(self) -> dict:
466
- """モデルリストを読み込む。"""
467
- model_list = {}
468
- try:
469
- with open(self.config.LIST_FILE, "r", encoding="utf-8") as f:
470
- for line in f:
471
- line = line.strip()
472
- if line:
473
- parts = line.split(": ", 1)
474
- if len(parts) == 2:
475
- modelpage_name, model_hf_url = parts
476
- model_list[model_hf_url] = modelpage_name
477
- except Exception as e:
478
- logger.error(f"Failed to read model list: {e}")
479
- return model_list
480
-
481
- def get_repo_info(self, repo_id):
482
- """リポジトリの情報を取得する。"""
483
- try:
484
- repo_info = self.api.repo_info(repo_id=repo_id, files_metadata=True)
485
- file_paths = [sibling.rfilename for sibling in repo_info.siblings]
486
- return file_paths
487
- except Exception as e:
488
- logger.error(f"Failed to get repo info for {repo_id}: {e}")
489
- return []
490
-
491
- def process_model(self, model_url: str):
492
- """1つのモデルをダウンロードしてフォルダ丸ごと暗号化&アップロードする."""
493
- try:
494
- # model_idを取得
495
- model_id = model_url.rstrip("/").split("/")[-1]
496
-
497
- # モデル情報を取得
498
- model_info = self.get_model_info(model_id)
499
- if not model_info or "modelVersions" not in model_info:
500
- logger.error(f"No valid model info for ID {model_id}. Skipping.")
501
- return
502
-
503
- # バージョン一覧
504
- versions = model_info["modelVersions"]
505
- if not versions:
506
- logger.warning(f"No modelVersions found for ID {model_id}.")
507
- return
508
-
509
- # ベースとなるフォルダ名
510
- folder_name = model_info.get("name", "UnknownModel")
511
- folder_name = re.sub(r'[\\/*?:"<>|]', '_', folder_name)
512
- folder_name += "_" + str(uuid.uuid4())[:8]
513
- os.makedirs(folder_name, exist_ok=True)
514
-
515
- # (A) 最新バージョンファイル + (B) 画像 + (C) HTML + model_info.json
516
- # → 全部 folder_name に置く
517
- # → 後でまとめて encrypt_and_upload_folder
518
- self.download_and_process_versions(versions, folder_name)
519
- self.download_images(versions, folder_name)
520
- self.save_html_content(f"{self.config.URLS['modelPage']}{model_id}", folder_name)
521
- self.save_model_info_json(model_info, folder_name)
522
-
523
- # ↑ この時点で「最新バージョン」の大ファイルが folder_name に残っている
524
- # しかし old_versions は既に1つずつ暗号化→削除済み
525
-
526
- # まとめて暗号化 & アップロード
527
- enc_subfolder = self.encrypt_and_upload_folder(folder_name)
528
- if enc_subfolder is None:
529
- enc_subfolder = "[ENCRYPT_FAILED]"
530
-
531
- hf_enc_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{enc_subfolder}"
532
- with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
533
- f.write(f"{model_info.get('name', 'UnnamedModel')} (ID:{model_id}): {hf_enc_url}\n")
534
-
535
- except Exception as e:
536
- logger.error(f"Error in process_model ({model_url}): {e}")
537
-
538
- async def crawl(self):
539
- """モデルを定期的にチェックし、更新を行う。"""
540
- while True:
541
- try:
542
- login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
543
-
544
- # model_list.logを最新化
545
- model_list_path = hf_hub_download(
546
- repo_id=self.repo_ids['model_list'],
547
- filename=self.config.LIST_FILE
548
- )
549
- shutil.copyfile(model_list_path, f"./{self.config.LIST_FILE}")
550
-
551
- # ログファイルを最新化
552
- local_file_path = hf_hub_download(
553
- repo_id=self.repo_ids["log"],
554
- filename=self.config.LOG_FILE
555
- )
556
- shutil.copyfile(local_file_path, f"./{self.config.LOG_FILE}")
557
-
558
- # ログを読み込み
559
- with open(self.config.LOG_FILE, "r", encoding="utf-8") as file:
560
- lines = file.read().splitlines()
561
- old_models = json.loads(lines[0]) if len(lines) > 0 else []
562
- self.repo_ids["current"] = lines[1] if len(lines) > 1 else ""
563
-
564
- # 新着モデル確認
565
- r = requests.get(self.config.URLS["latest"], headers=self.config.HEADERS)
566
- r.raise_for_status()
567
- latest_models = r.json().get("items", [])
568
- latest_model_ids = [m["id"] for m in latest_models if "id" in m]
569
-
570
- new_models = list(set(latest_model_ids) - set(old_models))
571
- if new_models:
572
- logger.info(f"New model IDs found: {new_models}")
573
- model_id = new_models[0]
574
-
575
- for attempt in range(1, 6):
576
- try:
577
- self.process_model(self.config.URLS["modelId"] + str(model_id))
578
- break
579
- except Exception as e:
580
- logger.error(f"Failed to process model {model_id} (attempt {attempt}/5): {e}")
581
- if attempt == 5:
582
- logger.error(f"Skipping model {model_id} after 5 failures.")
583
- else:
584
- await asyncio.sleep(2)
585
-
586
- # 新モデルをold_modelsに追加し、ログを更新
587
- old_models.append(model_id)
588
- with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
589
- f.write(json.dumps(old_models) + "\n")
590
- f.write(f"{self.repo_ids['current']}\n")
591
- logger.info(f"Updated log with new model ID: {model_id}")
592
-
593
- # ログファイル & model_list.logをアップロード
594
- self.upload_file(
595
- file_path=self.config.LOG_FILE,
596
- repo_id=self.repo_ids["log"],
597
- path_in_repo=self.config.LOG_FILE
598
- )
599
- self.upload_file(
600
- file_path=self.config.LIST_FILE,
601
- repo_id=self.repo_ids["model_list"],
602
- path_in_repo=self.config.LIST_FILE
603
- )
604
- else:
605
- # 新着なし → ログを最新化してアップロードだけして待機
606
- with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
607
- f.write(json.dumps(latest_model_ids) + "\n")
608
- f.write(f"{self.repo_ids['current']}\n")
609
- logger.info(f"No new models. Updated log: {self.config.LOG_FILE}")
610
- self.upload_file(
611
- file_path=self.config.LOG_FILE,
612
- repo_id=self.repo_ids["log"],
613
- path_in_repo=self.config.LOG_FILE
614
- )
615
- logger.info("Uploaded log file.")
616
- await asyncio.sleep(60)
617
- continue
618
-
619
- except Exception as e:
620
- logger.error(f"Error in crawl loop: {e}")
621
- await asyncio.sleep(300)
622
-
623
-
624
- # FastAPIアプリケーション
625
- config = Config()
626
- crawler = CivitAICrawler(config)
627
- app = crawler.app
 
1
+ def encrypt_and_upload_folder(self, local_folder: str) -> Optional[str]:
2
+ """
3
+ 1. /home/user/app/encrypted 配下の古いファイルやフォルダを消去(オプション)
4
+ 2. rclone mkdir cryptLocal:subfolder_label (空ディレクトリを必ず作る)
5
+ 3. rclone copy local_folder => cryptLocal:subfolder_label
6
+ /home/user/app/encrypted/<暗号フォルダ>/ にコピー
7
+ 4. そのフォルダを self.upload_folder() (=HFにアップロード)
8
+ 5. ローカル(平文フォルダ & 暗号フォルダ)削除
9
+ 6. 最後に subfolder_label(論理上のフォルダ名)を return
10
+ """
11
+ if not os.path.exists(local_folder):
12
+ logger.error(f"encrypt_and_upload_folder: folder not found: {local_folder}")
13
+ return None
14
+
15
+ encrypted_base_dir = os.path.join(os.getcwd(), "encrypted")
16
+ os.makedirs(encrypted_base_dir, exist_ok=True)
17
+
18
+ # 1) 古い暗号ファイルやフォルダを削除
19
+ for item in os.listdir(encrypted_base_dir):
20
+ item_path = os.path.join(encrypted_base_dir, item)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  try:
22
+ if os.path.isfile(item_path) or os.path.islink(item_path):
23
+ os.remove(item_path)
24
+ else:
25
+ shutil.rmtree(item_path)
26
+ logger.info(f"[CLEANUP] Removed old encrypted item: {item_path}")
 
27
  except Exception as e:
28
+ logger.warning(f"[CLEANUP] Failed to remove {item_path}: {e}")
29
+
30
+ # 2) まず mkdir で「暗号リモート上に空ディレクトリ」を作る
31
+ subfolder_label = "enc_" + str(uuid.uuid4())[:8]
32
+ try:
33
+ subprocess.run(
34
+ ["rclone", "mkdir", f"cryptLocal:{subfolder_label}"],
35
+ check=True
36
+ )
37
+ logger.info(f"[OK] rclone mkdir cryptLocal:{subfolder_label}")
38
+ except subprocess.CalledProcessError as e:
39
+ logger.error(f"rclone mkdir failed: {e}")
40
+ return None
41
+
42
+ # 3) local_folder -> cryptLocal:subfolder_label にコピー
43
+ # --create-empty-src-dirs: 空フォルダ構造も含めてコピー
44
+ try:
45
+ subprocess.run(
46
+ [
47
+ "rclone", "copy",
48
+ local_folder,
49
+ f"cryptLocal:{subfolder_label}",
50
+ "--create-empty-src-dirs"
51
+ ],
52
+ check=True
53
+ )
54
+ logger.info(f"[OK] rclone copy {local_folder} => cryptLocal:{subfolder_label}")
55
+ except subprocess.CalledProcessError as e:
56
+ logger.error(f"rclone copy failed: {e}")
57
+ return None
58
+
59
+ # cryptLocal:{subfolder_label} => 実際には /home/user/app/encrypted/<ランダム文字列> のフォルダになる
60
+ # それを差分検知で特定
61
+ before_dirs = set(os.listdir(encrypted_base_dir))
62
+ # mkdir/copy 直後に何らかのズレがあるかもしれないので再度 mkdir しなくてOK
63
+ # すでに上でやってるため、ここでやるなら:
64
+ # time.sleep(1) などで間を置いてみる手もある
65
+
66
+ after_dirs = set(os.listdir(encrypted_base_dir))
67
+ diff = after_dirs - before_dirs
68
+ # もし diff が空なら既にあるフォルダに上書きコピーされた可能性
69
+ if not diff:
70
+ logger.error("[ERROR] No new directory appeared in ./encrypted after rclone copy.")
71
+ return None
72
+ if len(diff) > 1:
73
+ logger.warning(f"[WARN] Multiple new directories found: {diff}, picking the first one.")
74
+ enc_folder_name = diff.pop()
75
+ enc_folder_path = os.path.join(encrypted_base_dir, enc_folder_name)
76
+
77
+ if not os.path.isdir(enc_folder_path):
78
+ logger.error(f"[ERROR] {enc_folder_path} is not a directory.")
79
+ return None
80
+
81
+ # 4) アップロード (フォルダとして)
82
+ try:
83
+ self.upload_folder(enc_folder_path, path_in_repo=subfolder_label)
84
+ logger.info(f"Uploaded encrypted folder: {enc_folder_path}")
85
+ except Exception as e:
86
+ logger.error(f"Failed to upload encrypted folder {enc_folder_path}: {e}")
87
+
88
+ # 5) ローカル削除
89
+ try:
90
+ shutil.rmtree(local_folder)
91
+ shutil.rmtree(enc_folder_path)
92
+ logger.info(f"Removed local folder: {local_folder} and {enc_folder_path}")
93
+ except Exception as e:
94
+ logger.error(f"Failed to remove local folders: {e}")
95
+
96
+ # 6) 論理上のフォルダ名(rclone上の名称)を返す
97
+ return subfolder_label