ttttdiva commited on
Commit
bbb27d2
·
verified ·
1 Parent(s): 3ca4467

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +22 -0
  2. main.py +529 -0
  3. requirements.txt +9 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+ RUN apt update && apt install -y \
9
+ rclone \
10
+ aria2 \
11
+ zip
12
+
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ WORKDIR $HOME/app
19
+
20
+ COPY --chown=user . $HOME/app
21
+
22
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,529 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import datetime
3
+ import json
4
+ import logging
5
+ import os
6
+ import re
7
+ import shutil
8
+ import subprocess
9
+ import time
10
+ import uuid
11
+ from typing import Optional
12
+
13
+ import requests
14
+ from bs4 import BeautifulSoup
15
+ from fake_useragent import UserAgent
16
+ from fastapi import FastAPI
17
+ from huggingface_hub import HfApi, hf_hub_download, login
18
+
19
+ # ロギングの設定
20
+ logging.basicConfig(level=logging.INFO)
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class Config:
25
+ """設定用のクラス"""
26
+ HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
27
+ CIVITAI_API_TOKEN = os.environ["CIVITAI_API_TOKEN"]
28
+ LOG_FILE = "civitai_backup.log"
29
+ LIST_FILE = "model_list.log"
30
+ REPO_IDS = {
31
+ "log": "ttttdiva/CivitAI_log_test",
32
+ "model_list": "ttttdiva/CivitAI_model_info_test",
33
+ "current": ""
34
+ }
35
+ URLS = {
36
+ "latest": "https://civitai.com/api/v1/models?sort=Newest",
37
+ "modelPage": "https://civitai.com/models/",
38
+ "modelId": "https://civitai.com/api/v1/models/",
39
+ "modelVersionId": "https://civitai.com/api/v1/model-versions/",
40
+ "hash": "https://civitai.com/api/v1/model-versions/by-hash/"
41
+ }
42
+ JST = datetime.timezone(datetime.timedelta(hours=9))
43
+ UA = UserAgent()
44
+ HEADERS = {
45
+ 'Authorization': f'Bearer {CIVITAI_API_TOKEN}',
46
+ 'User-Agent': UA.random,
47
+ "Content-Type": "application/json"
48
+ }
49
+
50
+
51
+ class CivitAICrawler:
52
+ """CivitAIからモデルをダウンロードし、Hugging Faceにアップロードするクラス"""
53
+
54
+ def __init__(self, config: Config):
55
+ import base64
56
+
57
+ rclone_conf_base64 = os.environ.get("RCLONE_CONF_BASE64")
58
+ if rclone_conf_base64:
59
+ # カレントディレクトリ配下に .rclone_config ディレクトリを作成
60
+ config_dir = os.path.join(os.getcwd(), ".rclone_config")
61
+ os.makedirs(config_dir, exist_ok=True)
62
+
63
+ conf_path = os.path.join(config_dir, "rclone.conf")
64
+ with open(conf_path, "wb") as f:
65
+ f.write(base64.b64decode(rclone_conf_base64))
66
+
67
+ # rclone がここを参照するように設定
68
+ os.environ["RCLONE_CONFIG"] = conf_path
69
+ logger.info(f"[INFO] Created rclone.conf at {conf_path}")
70
+ else:
71
+ logger.warning("[WARN] RCLONE_CONF_BASE64 not found; rclone may fail.")
72
+ # ==========================
73
+
74
+ self.config = config
75
+ self.api = HfApi()
76
+ self.app = FastAPI()
77
+ self.repo_ids = self.config.REPO_IDS.copy()
78
+ self.jst = self.config.JST
79
+ self.setup_routes()
80
+
81
+ def setup_routes(self):
82
+ """FastAPIのルーティングを設定する。"""
83
+ @self.app.get("/")
84
+ def read_root():
85
+ now = str(datetime.datetime.now(self.jst))
86
+ description = f"""
87
+ CivitAIを定期的に周回し新規モデルを {self.repo_ids['current']} にバックアップするspaceです。
88
+ モデル一覧は https://huggingface.co/{self.repo_ids['model_list']}/blob/main/model_list.log を参照してください。
89
+ Status: {now} + currently running :D
90
+ """
91
+ return description
92
+
93
+ @self.app.on_event("startup")
94
+ async def startup_event():
95
+ asyncio.create_task(self.crawl())
96
+
97
+ @staticmethod
98
+ def get_filename_from_cd(content_disposition: Optional[str], default_name: str) -> str:
99
+ """Content-Dispositionヘッダーからファイル名を取得する。"""
100
+ if content_disposition:
101
+ parts = content_disposition.split(';')
102
+ for part in parts:
103
+ if "filename=" in part:
104
+ return part.split("=")[1].strip().strip('"')
105
+ return default_name
106
+
107
+ def download_file(self, url: str, destination_folder: str, default_name: str) -> Optional[str]:
108
+ """指定されたURLからファイルをダウンロードし、指定されたフォルダに保存する。"""
109
+ try:
110
+ response = requests.get(url, headers=self.config.HEADERS, stream=True)
111
+ response.raise_for_status()
112
+ except requests.RequestException as e:
113
+ logger.error(f"Failed to download file from {url}: {e}")
114
+ return None
115
+
116
+ filename = self.get_filename_from_cd(response.headers.get('content-disposition'), default_name)
117
+ file_path = os.path.join(destination_folder, filename)
118
+
119
+ # ダウンロードとファイル保存処理
120
+ with open(file_path, 'wb') as file:
121
+ for chunk in response.iter_content(chunk_size=8192):
122
+ file.write(chunk)
123
+ logger.info(f"Downloaded: {file_path}")
124
+ return file_path
125
+
126
+ def get_model_info(self, model_id: str) -> dict:
127
+ """モデルの情報を取得する。"""
128
+ try:
129
+ response = requests.get(self.config.URLS["modelId"] + str(model_id), headers=self.config.HEADERS)
130
+ response.raise_for_status()
131
+ return response.json()
132
+ except requests.RequestException as e:
133
+ logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
134
+ return {}
135
+
136
+ def download_model_files(self, model_versions: list, folder: str):
137
+ """最新のモデルバージョンと古いバージョンのファイルをまとめてダウンロード."""
138
+ for version in model_versions:
139
+ files_info = version.get("files", [])
140
+ for file_info in files_info:
141
+ download_url = file_info["downloadUrl"]
142
+ file_name = file_info["name"]
143
+ login_detected_count = 0
144
+
145
+ while login_detected_count < 5:
146
+ local_path = self.download_file(download_url, folder, file_name)
147
+ if local_path and "login" in os.listdir(folder):
148
+ # 万が一、ダウンロード先に "login" という謎ファイルが出た場合の再試行処理
149
+ login_detected_count += 1
150
+ os.remove(os.path.join(folder, "login"))
151
+ logger.warning(f"Detected 'login' file, retrying download: {file_name} ({login_detected_count}/5)")
152
+ else:
153
+ break
154
+
155
+ if login_detected_count >= 5:
156
+ # ダウンロード失敗を示すダミーファイルを作成
157
+ dummy_file_path = os.path.join(folder, f"{file_name}.download_failed")
158
+ try:
159
+ with open(dummy_file_path, "w") as f:
160
+ f.write("Download failed after 5 attempts.")
161
+ logger.error(f"Failed to download {file_name}. Created dummy file: {dummy_file_path}")
162
+ except Exception as e:
163
+ logger.error(f"Failed to create dummy file for {file_name}: {e}")
164
+
165
+ def download_images(self, model_versions: list, folder: str):
166
+ """画像を images フォルダにまとめてダウンロードする."""
167
+ images_folder = os.path.join(folder, "images")
168
+ os.makedirs(images_folder, exist_ok=True)
169
+
170
+ images = []
171
+ for version in model_versions:
172
+ for img in version.get("images", []):
173
+ image_url = img["url"]
174
+ images.append(image_url)
175
+
176
+ for image_url in images:
177
+ image_name = os.path.basename(image_url) # ファイル名部分
178
+ local_path = os.path.join(images_folder, image_name)
179
+ try:
180
+ resp = requests.get(image_url, stream=True)
181
+ resp.raise_for_status()
182
+ with open(local_path, 'wb') as imgf:
183
+ for chunk in resp.iter_content(chunk_size=8192):
184
+ imgf.write(chunk)
185
+ logger.info(f"Downloaded image: {local_path}")
186
+ except requests.RequestException as e:
187
+ logger.error(f"Failed to download image {image_url}: {e}")
188
+
189
+ def save_html_content(self, model_page_url: str, folder: str):
190
+ """モデルページのHTMLをフォルダ内に保存する."""
191
+ try:
192
+ resp = requests.get(model_page_url)
193
+ resp.raise_for_status()
194
+ html_path = os.path.join(folder, "page.html")
195
+ with open(html_path, 'w', encoding='utf-8') as f:
196
+ f.write(resp.text)
197
+ logger.info(f"Saved HTML: {html_path}")
198
+ except Exception as e:
199
+ logger.error(f"Error saving HTML content from {model_page_url}: {e}")
200
+
201
+ def save_model_info_json(self, model_info: dict, folder: str):
202
+ """モデル情報をJSONファイルとして保存."""
203
+ info_path = os.path.join(folder, "model_info.json")
204
+ try:
205
+ with open(info_path, 'w', encoding='utf-8') as f:
206
+ json.dump(model_info, f, indent=2)
207
+ logger.info(f"Saved model_info.json: {info_path}")
208
+ except Exception as e:
209
+ logger.error(f"Failed to save model info JSON: {e}")
210
+
211
+ def encrypt_and_upload_folder(self, local_folder: str):
212
+ """
213
+ 1. rcloneでフォルダ全体を暗号化 (フォルダ名含む)
214
+ 2. 暗号化されたフォルダをHugging Faceにアップロード
215
+ 3. ローカル削除
216
+ """
217
+ if not os.path.exists(local_folder):
218
+ logger.error(f"encrypt_and_upload_folder: folder not found: {local_folder}")
219
+ return
220
+
221
+ # 暗号化後のフォルダが生成されるベースパス (例: /app/encrypted)
222
+ encrypted_base_dir = "/app/encrypted"
223
+ os.makedirs(encrypted_base_dir, exist_ok=True)
224
+
225
+ # rclone実行前の /app/encrypted の状態を取得 (新規フォルダ検出用)
226
+ before_set = set(os.listdir(encrypted_base_dir))
227
+
228
+ # rcloneでフォルダごとコピー (ファイル名・フォルダ名ともに暗号化)
229
+ # ここで "cryptLocal:" は .rclone.conf 側で
230
+ # [cryptLocal]
231
+ # type = crypt
232
+ # remote = /app/encrypted
233
+ # filename_encryption = standard
234
+ # password = ****
235
+ # 等が設定されている想定
236
+ try:
237
+ subprocess.run(
238
+ ["rclone", "copy", local_folder, "cryptLocal:"],
239
+ check=True
240
+ )
241
+ except subprocess.CalledProcessError as e:
242
+ logger.error(f"rclone copy failed: {e}")
243
+ return
244
+
245
+ # rclone実行後の /app/encrypted の状態
246
+ after_set = set(os.listdir(encrypted_base_dir))
247
+ # 新しくできた暗号化フォルダを特定
248
+ new_folders = after_set - before_set
249
+ if not new_folders:
250
+ logger.error("No new encrypted folder found. Something went wrong.")
251
+ return
252
+
253
+ # 通常は1個のはずだが、複数あるなら先頭だけ使う
254
+ enc_folder_name = list(new_folders)[0]
255
+ enc_folder_path = os.path.join(encrypted_base_dir, enc_folder_name)
256
+
257
+ # Hugging Face上で、この暗号化フォルダをそのままアップロード
258
+ # => HF側もフォルダ名が暗号化された状態で表示されます
259
+ try:
260
+ # path_in_repo も同じ暗号化名を指定
261
+ self.upload_folder(enc_folder_path, path_in_repo=enc_folder_name)
262
+ logger.info(f"Uploaded encrypted folder to HF: {enc_folder_path}")
263
+ except Exception as e:
264
+ logger.error(f"Failed to upload encrypted folder {enc_folder_path}: {e}")
265
+
266
+ # ローカル削除(平文のフォルダ & 暗号化済みフォルダ)
267
+ try:
268
+ shutil.rmtree(local_folder)
269
+ shutil.rmtree(enc_folder_path)
270
+ logger.info(f"Removed local folder: {local_folder} and encrypted folder: {enc_folder_path}")
271
+ except Exception as e:
272
+ logger.error(f"Failed to remove local folders: {e}")
273
+
274
+ def upload_file(self, file_path: str, repo_id: Optional[str] = None, path_in_repo: Optional[str] = None):
275
+ """
276
+ 単一ファイルをアップロードするための関数
277
+ (今回はフォルダ丸ごとアップロードがメインだが、ログファイルなどは個別アップロード)
278
+ """
279
+ if repo_id is None:
280
+ repo_id = self.repo_ids['current']
281
+ if path_in_repo is None:
282
+ path_in_repo = os.path.basename(file_path)
283
+
284
+ max_retries = 5
285
+ attempt = 0
286
+ while attempt < max_retries:
287
+ try:
288
+ self.api.upload_file(
289
+ path_or_fileobj=file_path,
290
+ repo_id=repo_id,
291
+ path_in_repo=path_in_repo
292
+ )
293
+ logger.info(f"Uploaded file: {file_path} to {repo_id} at {path_in_repo}")
294
+ return
295
+ except Exception as e:
296
+ attempt += 1
297
+ error_message = str(e)
298
+ if "over the limit of 100000 files" in error_message:
299
+ logger.warning("File limit exceeded, creating a new repo.")
300
+ self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
301
+ self.api.create_repo(repo_id=self.repo_ids['current'], private=True)
302
+ attempt = 0
303
+ continue
304
+ elif "you can retry this action in about 1 hour" in error_message:
305
+ logger.warning("Rate limit hit. Waiting 1 hour...")
306
+ time.sleep(3600)
307
+ attempt -= 1
308
+ else:
309
+ if attempt < max_retries:
310
+ logger.warning(f"Failed to upload {file_path}, retry {attempt}/{max_retries}")
311
+ else:
312
+ logger.error(f"Failed after {max_retries} attempts: {e}")
313
+ raise
314
+
315
+ def upload_folder(self, folder_path: str, path_in_repo: Optional[str] = None):
316
+ """
317
+ フォルダを Hugging Face リポジトリに一括アップロード
318
+ """
319
+ if path_in_repo is None:
320
+ path_in_repo = os.path.basename(folder_path)
321
+
322
+ max_retries = 5
323
+ attempt = 0
324
+ while attempt < max_retries:
325
+ try:
326
+ self.api.upload_folder(
327
+ folder_path=folder_path,
328
+ repo_id=self.repo_ids['current'],
329
+ path_in_repo=path_in_repo
330
+ )
331
+ logger.info(f"Uploaded folder: {folder_path} to {self.repo_ids['current']} at {path_in_repo}")
332
+ return
333
+ except Exception as e:
334
+ attempt += 1
335
+ error_message = str(e)
336
+ if "over the limit of 100000 files" in error_message:
337
+ logger.warning("File limit exceeded, creating a new repo.")
338
+ self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
339
+ self.api.create_repo(repo_id=self.repo_ids['current'], private=True)
340
+ attempt = 0
341
+ continue
342
+ elif "you can retry this action in about 1 hour" in error_message:
343
+ logger.warning("Rate limit hit. Waiting 1 hour...")
344
+ time.sleep(3600)
345
+ attempt -= 1
346
+ else:
347
+ if attempt < max_retries:
348
+ logger.warning(f"Failed to upload folder {folder_path}, retry {attempt}/{max_retries}")
349
+ else:
350
+ logger.error(f"Failed after {max_retries} attempts: {e}")
351
+ raise
352
+
353
+ @staticmethod
354
+ def increment_repo_name(repo_id: str) -> str:
355
+ """リポジトリ名の末尾の数字をインクリメントする。"""
356
+ match = re.search(r'(\d+)$', repo_id)
357
+ if match:
358
+ number = int(match.group(1)) + 1
359
+ new_repo_id = re.sub(r'\d+$', str(number), repo_id)
360
+ else:
361
+ new_repo_id = f"{repo_id}1"
362
+ return new_repo_id
363
+
364
+ def read_model_list(self) -> dict:
365
+ """モデルリストを読み込む。"""
366
+ model_list = {}
367
+ try:
368
+ with open(self.config.LIST_FILE, "r", encoding="utf-8") as f:
369
+ for line in f:
370
+ line = line.strip()
371
+ if line:
372
+ parts = line.split(": ", 1)
373
+ if len(parts) == 2:
374
+ modelpage_name, model_hf_url = parts
375
+ model_list[model_hf_url] = modelpage_name
376
+ except Exception as e:
377
+ logger.error(f"Failed to read model list: {e}")
378
+ return model_list
379
+
380
+ def get_repo_info(self, repo_id):
381
+ """リポジトリの情報を取得する。"""
382
+ try:
383
+ repo_info = self.api.repo_info(repo_id=repo_id, files_metadata=True)
384
+ file_paths = [sibling.rfilename for sibling in repo_info.siblings]
385
+ return file_paths
386
+ except Exception as e:
387
+ logger.error(f"Failed to get repo info for {repo_id}: {e}")
388
+ return []
389
+
390
+ def process_model(self, model_url: str):
391
+ """1つのモデルをダウンロードしてフォルダ丸ごと暗号化&アップロードする."""
392
+ try:
393
+ # model_idを取得
394
+ model_id = model_url.rstrip("/").split("/")[-1]
395
+
396
+ # モデル情報を取得
397
+ model_info = self.get_model_info(model_id)
398
+ if not model_info or "modelVersions" not in model_info:
399
+ logger.error(f"No valid model info for ID {model_id}. Skipping.")
400
+ return
401
+
402
+ # バージョン一覧
403
+ versions = model_info["modelVersions"]
404
+ if not versions:
405
+ logger.warning(f"No modelVersions found for ID {model_id}.")
406
+ return
407
+
408
+ # フォルダ名として適当な名前をつける
409
+ # たとえばモデル名をベースにフォルダを作る(被り防止にUUIDを付与)
410
+ folder_name = model_info.get("name", "UnknownModel")
411
+ folder_name = re.sub(r'[\\/*?:"<>|]', '_', folder_name) # OSで使えない文字除去
412
+ folder_name += "_" + str(uuid.uuid4())[:8]
413
+ os.makedirs(folder_name, exist_ok=True)
414
+
415
+ # モデルファイルをダウンロード
416
+ self.download_model_files(versions, folder_name)
417
+
418
+ # 画像を images/ にダウンロード
419
+ self.download_images(versions, folder_name)
420
+
421
+ # HTMLを取得
422
+ model_page_url = f"{self.config.URLS['modelPage']}{model_id}"
423
+ self.save_html_content(model_page_url, folder_name)
424
+
425
+ # model_info.json保存
426
+ self.save_model_info_json(model_info, folder_name)
427
+
428
+ # ここでフォルダごと暗号化&アップロード
429
+ self.encrypt_and_upload_folder(folder_name)
430
+
431
+ # model_list.logに追記 (暗号化フォルダを直接参照するURLは分からないため、
432
+ # とりあえず元の modelPage名 とかモデルIDのメモを書くだけに留める)
433
+ hf_url_placeholder = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/[ENCRYPTED_FOLDER]"
434
+ with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
435
+ f.write(f"{model_info.get('name', 'UnnamedModel')} (ID:{model_id}): {hf_url_placeholder}\n")
436
+
437
+ except Exception as e:
438
+ logger.error(f"Error in process_model ({model_url}): {e}")
439
+
440
+ async def crawl(self):
441
+ """モデルを定期的にチェックし、更新を行う。"""
442
+ while True:
443
+ try:
444
+ login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
445
+
446
+ # model_list.logを最新化
447
+ model_list_path = hf_hub_download(
448
+ repo_id=self.repo_ids['model_list'],
449
+ filename=self.config.LIST_FILE
450
+ )
451
+ shutil.copyfile(model_list_path, f"./{self.config.LIST_FILE}")
452
+
453
+ # ログファイルを最新化
454
+ local_file_path = hf_hub_download(
455
+ repo_id=self.repo_ids["log"],
456
+ filename=self.config.LOG_FILE
457
+ )
458
+ shutil.copyfile(local_file_path, f"./{self.config.LOG_FILE}")
459
+
460
+ # ログを読み込み
461
+ with open(self.config.LOG_FILE, "r", encoding="utf-8") as file:
462
+ lines = file.read().splitlines()
463
+ old_models = json.loads(lines[0]) if len(lines) > 0 else []
464
+ self.repo_ids["current"] = lines[1] if len(lines) > 1 else ""
465
+
466
+ # 新着モデル確認
467
+ r = requests.get(self.config.URLS["latest"], headers=self.config.HEADERS)
468
+ r.raise_for_status()
469
+ latest_models = r.json().get("items", [])
470
+ latest_model_ids = [m["id"] for m in latest_models if "id" in m]
471
+
472
+ new_models = list(set(latest_model_ids) - set(old_models))
473
+ if new_models:
474
+ logger.info(f"New model IDs found: {new_models}")
475
+ model_id = new_models[0]
476
+
477
+ for attempt in range(1, 6):
478
+ try:
479
+ self.process_model(self.config.URLS["modelId"] + str(model_id))
480
+ break
481
+ except Exception as e:
482
+ logger.error(f"Failed to process model {model_id} (attempt {attempt}/5): {e}")
483
+ if attempt == 5:
484
+ logger.error(f"Skipping model {model_id} after 5 failures.")
485
+ else:
486
+ await asyncio.sleep(2)
487
+
488
+ # 新モデルをold_modelsに追加し、ログを更新
489
+ old_models.append(model_id)
490
+ with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
491
+ f.write(json.dumps(old_models) + "\n")
492
+ f.write(f"{self.repo_ids['current']}\n")
493
+ logger.info(f"Updated log with new model ID: {model_id}")
494
+
495
+ # ログファイル & model_list.logをアップロード
496
+ self.upload_file(
497
+ file_path=self.config.LOG_FILE,
498
+ repo_id=self.repo_ids["log"],
499
+ path_in_repo=self.config.LOG_FILE
500
+ )
501
+ self.upload_file(
502
+ file_path=self.config.LIST_FILE,
503
+ repo_id=self.repo_ids["model_list"],
504
+ path_in_repo=self.config.LIST_FILE
505
+ )
506
+ else:
507
+ # 新着なし → ログを最新化してアップロードだけして待機
508
+ with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
509
+ f.write(json.dumps(latest_model_ids) + "\n")
510
+ f.write(f"{self.repo_ids['current']}\n")
511
+ logger.info(f"No new models. Updated log: {self.config.LOG_FILE}")
512
+ self.upload_file(
513
+ file_path=self.config.LOG_FILE,
514
+ repo_id=self.repo_ids["log"],
515
+ path_in_repo=self.config.LOG_FILE
516
+ )
517
+ logger.info("Uploaded log file.")
518
+ await asyncio.sleep(60)
519
+ continue
520
+
521
+ except Exception as e:
522
+ logger.error(f"Error in crawl loop: {e}")
523
+ await asyncio.sleep(300)
524
+
525
+
526
+ # FastAPIアプリケーション
527
+ config = Config()
528
+ crawler = CivitAICrawler(config)
529
+ app = crawler.app
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.74.*
2
+ sentencepiece==0.1.*
3
+ torch==1.11.*
4
+ transformers==4.*
5
+ uvicorn[standard]==0.17.*
6
+ requests==2.27.*
7
+ beautifulsoup4
8
+ huggingface_hub
9
+ fake-useragent