ttttdiva commited on
Commit
f30744e
·
verified ·
1 Parent(s): 6daeaff

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +150 -177
main.py CHANGED
@@ -46,7 +46,7 @@ class Config:
46
  "Content-Type": "application/json"
47
  }
48
 
49
- # ===== rclone 用の追加設定 =====
50
  RCLONE_CONF_BASE64 = os.environ.get("RCLONE_CONF_BASE64", "")
51
  ENCRYPTED_DIR = "/home/user/app/encrypted"
52
 
@@ -61,9 +61,8 @@ class CivitAICrawler:
61
  self.repo_ids = self.config.REPO_IDS.copy()
62
  self.jst = self.config.JST
63
 
64
- # rcloneのセットアップ
65
  self.setup_rclone_conf()
66
-
67
  self.setup_routes()
68
 
69
  def setup_routes(self):
@@ -83,7 +82,7 @@ class CivitAICrawler:
83
  asyncio.create_task(self.crawl())
84
 
85
  # ============================================================================
86
- # rclone設定 & 暗号化アップロード関連
87
  # ============================================================================
88
  def setup_rclone_conf(self):
89
  if not self.config.RCLONE_CONF_BASE64:
@@ -97,10 +96,10 @@ class CivitAICrawler:
97
  logger.info(f"[INFO] rclone.conf created at: {conf_path}")
98
 
99
  def encrypt_with_rclone(self, local_path: str):
100
- """単一ファイル or ディレクトリを cryptLocal: にコピーし、暗号化する"""
101
  if not os.path.exists(local_path):
102
  raise FileNotFoundError(f"[ERROR] Local path not found: {local_path}")
103
- # 事前に暗号先ディレクトリをクリーンアップ
104
  if os.path.isdir(self.config.ENCRYPTED_DIR):
105
  shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
106
 
@@ -116,13 +115,14 @@ class CivitAICrawler:
116
  )
117
 
118
  def upload_encrypted_files(self, repo_id: str, base_path_in_repo: str = ""):
119
- """self.config.ENCRYPTED_DIR 配下の暗号化済ファイルを再帰的にアップロード"""
120
  max_retries = 5
121
  for root, dirs, files in os.walk(self.config.ENCRYPTED_DIR):
122
  for fn in files:
123
  encrypted_file_path = os.path.join(root, fn)
124
  if not os.path.isfile(encrypted_file_path):
125
  continue
 
126
  relative_path = os.path.relpath(encrypted_file_path, self.config.ENCRYPTED_DIR)
127
  upload_path_in_repo = os.path.join(base_path_in_repo, relative_path)
128
 
@@ -139,7 +139,7 @@ class CivitAICrawler:
139
  except Exception as e:
140
  attempt += 1
141
  error_message = str(e)
142
- # 429 Rate-limit (31 minutes)
143
  if "rate-limited" in error_message and "minutes" in error_message:
144
  import re
145
  match = re.search(r"in (\d+) minutes?", error_message)
@@ -149,13 +149,13 @@ class CivitAICrawler:
149
  time.sleep(minutes * 60)
150
  attempt -= 1
151
  continue
152
- # 1時間待機パターン
153
  if "you can retry this action in about 1 hour" in error_message:
154
  logger.warning("Encountered 'retry in 1 hour' error. Waiting 1 hour...")
155
  time.sleep(3600)
156
  attempt -= 1
157
  continue
158
- # 100kファイル上限
159
  if "over the limit of 100000 files" in error_message:
160
  logger.warning("Repository file limit exceeded. Creating a new repository...")
161
  self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
@@ -169,50 +169,52 @@ class CivitAICrawler:
169
  f"Failed to upload {encrypted_file_path}, retry {attempt}/{max_retries}..."
170
  )
171
  else:
172
- logger.error(
173
- f"Failed to upload after {max_retries} attempts: {encrypted_file_path}"
174
- )
175
  raise
176
 
177
- @staticmethod
178
- def increment_repo_name(repo_id: str) -> str:
179
- match = re.search(r'(\d+)$', repo_id)
180
- if match:
181
- number = int(match.group(1)) + 1
182
- return re.sub(r'\d+$', str(number), repo_id)
183
- else:
184
- return f"{repo_id}1"
 
 
 
185
 
186
  # ============================================================================
187
- # 単ファイル暗号化アップロード → ローカル削除
188
  # ============================================================================
189
- def upload_file_encrypted_one_by_one(
190
- self,
191
- local_path: str,
192
- repo_id: Optional[str] = None,
193
- path_in_repo: str = ""
194
- ):
195
  """
196
- 単一ファイル (or フォルダ) を暗号化してアップロードしたあと、ローカルファイルを削除する。
197
  """
198
  if not repo_id:
199
  repo_id = self.repo_ids['current']
200
 
201
- self.encrypt_with_rclone(local_path)
202
- self.upload_encrypted_files(repo_id=repo_id, base_path_in_repo=path_in_repo)
203
 
204
- # 暗号化用のENCRYPTED_DIRを消す
205
  if os.path.isdir(self.config.ENCRYPTED_DIR):
206
  shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
 
 
 
207
 
208
- # 元のローカルファイルも削除
209
- if os.path.isfile(local_path):
210
- os.remove(local_path)
211
- elif os.path.isdir(local_path):
212
- shutil.rmtree(local_path, ignore_errors=True)
 
 
 
213
 
214
  # ============================================================================
215
- # 生ファイルアップロード (ログなど)
216
  # ============================================================================
217
  def upload_file_raw(self, file_path: str, repo_id: Optional[str] = None, path_in_repo: Optional[str] = None):
218
  if repo_id is None:
@@ -242,7 +244,7 @@ class CivitAICrawler:
242
  repo_id = self.repo_ids['current']
243
  continue
244
  elif "you can retry this action in about 1 hour" in error_message:
245
- logger.warning("Encountered 'retry in 1 hour' error. Waiting 1 hour...")
246
  time.sleep(3600)
247
  attempt -= 1
248
  else:
@@ -253,7 +255,7 @@ class CivitAICrawler:
253
  raise
254
 
255
  # ============================================================================
256
- # ダウンロード関連
257
  # ============================================================================
258
  @staticmethod
259
  def get_filename_from_cd(content_disposition: Optional[str], default_name: str) -> str:
@@ -280,47 +282,67 @@ class CivitAICrawler:
280
  file.write(chunk)
281
 
282
  logger.info(f"Download completed: {file_path}")
283
- return file_path # ★ ダウンロードしたファイルのパスを返すように
284
 
285
  # ============================================================================
286
- # (★修正)1ファイルずつDL→暗号化→アップロード→削除
287
  # ============================================================================
288
- def process_latest_files_one_by_one(self, version_data: dict, model_folder: str, encrypted_folder_name: str):
289
- """
290
- 最新バージョンのファイルを1つずつダウンロード→暗号化アップロード→ローカル削除
291
- path_in_repo は "{encrypted_folder_name}/" をベースに。
292
- """
293
- files = version_data.get("files", [])
294
- for file_info in files:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  download_url = file_info["downloadUrl"]
296
  file_name = file_info["name"]
 
 
 
 
 
297
 
298
- # ダウンロード
299
- local_path = self.download_file(download_url, model_folder, file_name)
300
- if not local_path or not os.path.exists(local_path):
301
- logger.warning(f"Skip because file not found locally: {local_path}")
302
- continue
303
-
304
- # 暗号化アップロード
305
- # 例: "myModelName/filename"
306
- in_repo_path = os.path.join(encrypted_folder_name, file_name)
307
- self.upload_file_encrypted_one_by_one(local_path, repo_id=self.repo_ids['current'], path_in_repo=in_repo_path)
308
 
309
- def process_images_one_by_one(self, version_list: list, model_folder: str, encrypted_folder_name: str):
310
- """
311
- 画像をすべて1つずつDL→暗号化アップロード→削除
312
- path_in_repo は "{encrypted_folder_name}/images/"
313
- """
314
  images = []
315
- for version in version_list:
316
- for img_info in version.get("images", []):
317
- images.append(img_info["url"])
318
 
319
  for image_url in images:
320
- image_name = image_url.split("/")[-1] + ".png"
321
- local_path = os.path.join(model_folder, image_name)
322
-
323
- # ダウンロード
324
  try:
325
  resp = requests.get(image_url, stream=True)
326
  resp.raise_for_status()
@@ -330,46 +352,7 @@ class CivitAICrawler:
330
  logger.info(f"Downloaded image: {local_path}")
331
  except Exception as e:
332
  logger.error(f"Error downloading image {image_url}: {e}")
333
- continue
334
 
335
- # アップロード
336
- in_repo_path = os.path.join(encrypted_folder_name, "images", image_name)
337
- self.upload_file_encrypted_one_by_one(local_path, self.repo_ids['current'], in_repo_path)
338
-
339
- def process_old_versions_one_by_one(self, version_list: list, model_folder: str, encrypted_folder_name: str):
340
- """
341
- 古いバージョン (index=1以降) のファイルを 1つずつダウンロード→暗号化アップロード→削除
342
- path_in_repo は "{encrypted_folder_name}/old_versions/{versionID_orName}/filename"
343
- """
344
- if len(version_list) <= 1:
345
- return
346
-
347
- for old_version in version_list[1:]:
348
- # どんな名前でフォルダを区別するか(バージョンIDやバージョン名など)
349
- version_id_or_name = str(old_version.get("id", "old_ver"))
350
- files = old_version.get("files", [])
351
- for file_info in files:
352
- download_url = file_info["downloadUrl"]
353
- file_name = file_info["name"]
354
-
355
- # ダウンロード
356
- local_path = self.download_file(download_url, model_folder, file_name)
357
- if not local_path or not os.path.exists(local_path):
358
- logger.warning(f"Skip because file not found locally: {local_path}")
359
- continue
360
-
361
- # 暗号化アップロード
362
- in_repo_path = os.path.join(
363
- encrypted_folder_name,
364
- "old_versions",
365
- version_id_or_name,
366
- file_name
367
- )
368
- self.upload_file_encrypted_one_by_one(local_path, self.repo_ids['current'], in_repo_path)
369
-
370
- # ============================================================================
371
- # HTML & model_info.json は軽量なので一括DL→アップロードでもOK
372
- # ============================================================================
373
  def save_html_content(self, url: str, folder: str):
374
  try:
375
  response = requests.get(url)
@@ -377,20 +360,13 @@ class CivitAICrawler:
377
  html_path = os.path.join(folder, os.path.basename(folder) + ".html")
378
  with open(html_path, 'w', encoding='utf-8') as file:
379
  file.write(response.text)
380
- return html_path
381
  except Exception as e:
382
  logger.error(f"Error saving HTML content for URL {url}: {e}")
383
- return None
384
 
385
- def save_model_info(self, model_info: dict, folder: str):
386
- json_path = os.path.join(folder, "model_info.json")
387
- try:
388
- with open(json_path, "w", encoding="utf-8") as file:
389
- json.dump(model_info, file, indent=2)
390
- return json_path
391
- except Exception as e:
392
- logger.error(f"Error saving model_info.json: {e}")
393
- return None
394
 
395
  # ============================================================================
396
  # model_list.log
@@ -411,22 +387,23 @@ class CivitAICrawler:
411
  logger.error(f"Failed to read model list: {e}")
412
  return model_list
413
 
 
 
 
414
  def get_model_info(self, model_id: str) -> dict:
415
- """
416
- model_id(例: '1110807')に対応するモデル情報を
417
- CivitAIのAPIから取得し、jsonを返す
418
- """
419
  try:
420
  url = self.config.URLS["modelId"] + str(model_id)
421
- response = requests.get(url, headers=self.config.HEADERS)
422
- response.raise_for_status()
423
- return response.json()
424
  except requests.RequestException as e:
425
  logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
426
- return {} # or return None
427
-
 
 
 
428
  def process_model(self, model_url: str):
429
- """ 指定されたモデルURLを処理 (1つずつファイルをDL→アップロード→削除) """
430
  try:
431
  model_id = model_url.rstrip("/").split("/")[-1]
432
  model_info = self.get_model_info(model_id)
@@ -434,103 +411,99 @@ class CivitAICrawler:
434
  logger.error(f"No model_info returned for {model_id}")
435
  return
436
 
437
- latest_version = model_info.get("modelVersions", [])[0]
438
- model_file = next(
439
- (file for file in latest_version.get("files", []) if file.get("type") == "Model"),
440
- None
441
- )
 
 
442
  if model_file:
443
- latest_filename = model_file["name"]
444
  folder = os.path.splitext(latest_filename)[0]
445
  else:
446
- # ファイルtype=Modelが無い場合、とりあえず最初のファイル��でフォルダ名を作る
447
  first_file = latest_version["files"][0]
448
- latest_filename = first_file["name"]
449
  folder = os.path.splitext(latest_filename)[0]
450
- logger.warning(f"No 'Model' type file found for {model_id}. Using first file's name.")
451
 
452
- # ローカルに一時フォルダを作成
453
  os.makedirs(folder, exist_ok=True)
454
 
455
- # 同名判定 (model_list.log を読み込み、modelpage_name が既にあればスキップ)
456
  model_list = self.read_model_list()
457
  modelpage_name = model_info.get("name", f"Model_{model_id}")
458
  if modelpage_name in model_list.values():
459
  logger.info(f"Model '{modelpage_name}' already in model_list. Skipping.")
460
- # return # 必要に応じてリターン
461
 
462
- # HTMLやmodel_info は軽いので一括保存→まとめて単ファイル暗号化アップロード
463
- html_path = self.save_html_content(self.config.URLS["modelPage"] + str(model_id), folder)
464
- json_path = self.save_model_info(model_info, folder)
465
 
466
- # 暗号化アップロード(HTML, JSON など)
467
- # HF 上では "{folder}/model_info.json" としておく例
468
- if html_path and os.path.exists(html_path):
469
- in_repo_path = os.path.join(folder, os.path.basename(html_path))
470
- self.upload_file_encrypted_one_by_one(html_path, self.repo_ids['current'], in_repo_path)
471
 
472
- if json_path and os.path.exists(json_path):
473
- in_repo_path = os.path.join(folder, "model_info.json")
474
- self.upload_file_encrypted_one_by_one(json_path, self.repo_ids['current'], in_repo_path)
475
 
476
- # 最新バージョンを1ファイルずつアップロード
477
- self.process_latest_files_one_by_one(latest_version, folder, folder)
478
 
479
- # 画像を1ファイルずつアップロード
480
- self.process_images_one_by_one(model_info["modelVersions"], folder, folder)
481
 
482
- # 古いバージョンを1ファイルずつアップロード
483
- self.process_old_versions_one_by_one(model_info["modelVersions"], folder, folder)
 
484
 
485
- # ここで folder はほぼ空だが、一応削除
486
  if os.path.exists(folder):
487
  shutil.rmtree(folder)
488
 
489
- # 最後に model_list.log に追記 (「modelpage_name: HFのURL構造」)
490
- # 今回はフォルダごと暗号化ではなくファイルごとなので、ひとまず "folder" をルート名に使っておく
491
- # Hugging Face 上でのトップフォルダ URL:
492
- # https://huggingface.co/REPO_ID/tree/main/folder
493
- model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{folder}"
494
  with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
495
  f.write(f"{modelpage_name}: {model_hf_url}\n")
496
 
497
  except Exception as e:
498
  logger.error(f"Unexpected error processing model ({model_url}): {e}")
499
 
500
-
 
 
501
  async def crawl(self):
502
- """新着モデルをチェックし、1��ずつ処理するループ"""
503
  while True:
504
  try:
505
  login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
506
 
507
- # 最新のmodel_list.log & civitai_backup.log をダウンロード
508
  model_list_path = hf_hub_download(repo_id=self.repo_ids['model_list'], filename=self.config.LIST_FILE)
509
  shutil.copyfile(model_list_path, f"./{self.config.LIST_FILE}")
510
 
511
  local_file_path = hf_hub_download(repo_id=self.repo_ids["log"], filename=self.config.LOG_FILE)
512
  shutil.copyfile(local_file_path, f"./{self.config.LOG_FILE}")
513
 
514
- # civitai_backup.log を読み取り
515
  with open(self.config.LOG_FILE, "r", encoding="utf-8") as file:
516
  lines = file.read().splitlines()
517
  old_models = json.loads(lines[0]) if len(lines) > 0 else []
518
  self.repo_ids["current"] = lines[1] if len(lines) > 1 else ""
519
 
520
- # 新着モデルを確認
521
  response = requests.get(self.config.URLS["latest"], headers=self.config.HEADERS)
522
  response.raise_for_status()
523
  latest_models = response.json().get("items", [])
524
- latest_model_ids = [item.get("id") for item in latest_models if "id" in item]
525
 
526
- # 増分チェック
527
  new_models = list(set(latest_model_ids) - set(old_models))
528
 
529
  if new_models:
530
  logger.info(f"New models found: {new_models}")
531
  model_id = new_models[0]
532
 
533
- # 試行5回
534
  for attempt in range(1, 6):
535
  try:
536
  self.process_model(f"{self.config.URLS['modelId']}{model_id}")
@@ -542,7 +515,7 @@ class CivitAICrawler:
542
  else:
543
  await asyncio.sleep(2)
544
  else:
545
- # 新モデルなし → backup.log を更新 & アップロード
546
  with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
547
  f.write(json.dumps(latest_model_ids) + "\n")
548
  f.write(f"{self.repo_ids['current']}\n")
@@ -555,14 +528,14 @@ class CivitAICrawler:
555
  await asyncio.sleep(60)
556
  continue
557
 
558
- # 成功したモデルをold_modelsに追加 → backup.log更新
559
  old_models.append(model_id)
560
  with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
561
  f.write(json.dumps(old_models) + "\n")
562
  f.write(f"{self.repo_ids['current']}\n")
563
  logger.info(f"Updated log file with new model ID: {model_id}")
564
 
565
- # ログ & model_list.log をアップ
566
  self.upload_file_raw(self.config.LOG_FILE, self.repo_ids["log"], self.config.LOG_FILE)
567
  self.upload_file_raw(self.config.LIST_FILE, self.repo_ids["model_list"], self.config.LIST_FILE)
568
 
 
46
  "Content-Type": "application/json"
47
  }
48
 
49
+ # rclone 用の追加設定
50
  RCLONE_CONF_BASE64 = os.environ.get("RCLONE_CONF_BASE64", "")
51
  ENCRYPTED_DIR = "/home/user/app/encrypted"
52
 
 
61
  self.repo_ids = self.config.REPO_IDS.copy()
62
  self.jst = self.config.JST
63
 
64
+ # rclone 設定の読み込み
65
  self.setup_rclone_conf()
 
66
  self.setup_routes()
67
 
68
  def setup_routes(self):
 
82
  asyncio.create_task(self.crawl())
83
 
84
  # ============================================================================
85
+ # rclone 設定 & 暗号化アップロード処理
86
  # ============================================================================
87
  def setup_rclone_conf(self):
88
  if not self.config.RCLONE_CONF_BASE64:
 
96
  logger.info(f"[INFO] rclone.conf created at: {conf_path}")
97
 
98
  def encrypt_with_rclone(self, local_path: str):
99
+ """フォルダ or ファイルを cryptLocal: にコピーし、フォルダ名・ファイル名を暗号化"""
100
  if not os.path.exists(local_path):
101
  raise FileNotFoundError(f"[ERROR] Local path not found: {local_path}")
102
+ # 事前に暗号先ディレクトリを掃除
103
  if os.path.isdir(self.config.ENCRYPTED_DIR):
104
  shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
105
 
 
115
  )
116
 
117
  def upload_encrypted_files(self, repo_id: str, base_path_in_repo: str = ""):
118
+ """self.config.ENCRYPTED_DIR 以下の暗号化済ファイルを再帰的にアップロード"""
119
  max_retries = 5
120
  for root, dirs, files in os.walk(self.config.ENCRYPTED_DIR):
121
  for fn in files:
122
  encrypted_file_path = os.path.join(root, fn)
123
  if not os.path.isfile(encrypted_file_path):
124
  continue
125
+
126
  relative_path = os.path.relpath(encrypted_file_path, self.config.ENCRYPTED_DIR)
127
  upload_path_in_repo = os.path.join(base_path_in_repo, relative_path)
128
 
 
139
  except Exception as e:
140
  attempt += 1
141
  error_message = str(e)
142
+ # 429 Rate-limit with "in XX minutes"
143
  if "rate-limited" in error_message and "minutes" in error_message:
144
  import re
145
  match = re.search(r"in (\d+) minutes?", error_message)
 
149
  time.sleep(minutes * 60)
150
  attempt -= 1
151
  continue
152
+ # 1時間待機
153
  if "you can retry this action in about 1 hour" in error_message:
154
  logger.warning("Encountered 'retry in 1 hour' error. Waiting 1 hour...")
155
  time.sleep(3600)
156
  attempt -= 1
157
  continue
158
+ # ファイル上限
159
  if "over the limit of 100000 files" in error_message:
160
  logger.warning("Repository file limit exceeded. Creating a new repository...")
161
  self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
 
169
  f"Failed to upload {encrypted_file_path}, retry {attempt}/{max_retries}..."
170
  )
171
  else:
172
+ logger.error(f"Failed to upload after {max_retries} attempts: {encrypted_file_path}")
 
 
173
  raise
174
 
175
+ def upload_folder_encrypted(self, folder_path: str, repo_id: Optional[str] = None, path_in_repo: str = ""):
176
+ """フォルダを丸ごと暗号化してアップロード (=フォルダ名も暗号化)"""
177
+ if not repo_id:
178
+ repo_id = self.repo_ids['current']
179
+
180
+ self.encrypt_with_rclone(folder_path)
181
+ self.upload_encrypted_files(repo_id, base_path_in_repo=path_in_repo)
182
+
183
+ # 暗号化フォルダを削除
184
+ if os.path.isdir(self.config.ENCRYPTED_DIR):
185
+ shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
186
 
187
  # ============================================================================
188
+ # 単一ファイルを暗号化アップロードしてローカル削除 (old_versions用)
189
  # ============================================================================
190
+ def upload_file_encrypted_one_by_one(self, file_path: str, repo_id: Optional[str] = None, path_in_repo: str = ""):
 
 
 
 
 
191
  """
192
+ 単一ファイルを暗号化アップロードし、アップロード後にローカルファイルを削除。
193
  """
194
  if not repo_id:
195
  repo_id = self.repo_ids['current']
196
 
197
+ self.encrypt_with_rclone(file_path)
198
+ self.upload_encrypted_files(repo_id, base_path_in_repo=path_in_repo)
199
 
200
+ # 暗号化ディレクトリを削除
201
  if os.path.isdir(self.config.ENCRYPTED_DIR):
202
  shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
203
+ # ローカルの実ファイル削除
204
+ if os.path.exists(file_path):
205
+ os.remove(file_path)
206
 
207
+ @staticmethod
208
+ def increment_repo_name(repo_id: str) -> str:
209
+ match = re.search(r'(\d+)$', repo_id)
210
+ if match:
211
+ number = int(match.group(1)) + 1
212
+ return re.sub(r'\d+$', str(number), repo_id)
213
+ else:
214
+ return f"{repo_id}1"
215
 
216
  # ============================================================================
217
+ # ログや model_list.log は生アップロード
218
  # ============================================================================
219
  def upload_file_raw(self, file_path: str, repo_id: Optional[str] = None, path_in_repo: Optional[str] = None):
220
  if repo_id is None:
 
244
  repo_id = self.repo_ids['current']
245
  continue
246
  elif "you can retry this action in about 1 hour" in error_message:
247
+ logger.warning("Encountered 'retry in 1 hour' error. Waiting 1 hour before retrying...")
248
  time.sleep(3600)
249
  attempt -= 1
250
  else:
 
255
  raise
256
 
257
  # ============================================================================
258
+ # ダウンロード処理
259
  # ============================================================================
260
  @staticmethod
261
  def get_filename_from_cd(content_disposition: Optional[str], default_name: str) -> str:
 
282
  file.write(chunk)
283
 
284
  logger.info(f"Download completed: {file_path}")
285
+ return file_path
286
 
287
  # ============================================================================
288
+ # 古いバージョンのみ1ファイルずつアップロード
289
  # ============================================================================
290
+ def download_old_versions_one_by_one(self, version_list: list, folder: str):
291
+ """version_list[1:] を対象に、モデルファイルを 1ファイルDL→upload→削除 を繰り返す"""
292
+ if len(version_list) <= 1:
293
+ return
294
+
295
+ old_versions_folder = os.path.join(folder, "old_versions")
296
+ os.makedirs(old_versions_folder, exist_ok=True)
297
+
298
+ for version in version_list[1:]:
299
+ for file_info in version.get("files", []):
300
+ download_url = file_info["downloadUrl"]
301
+ file_name = file_info["name"]
302
+
303
+ local_path = self.download_file(download_url, old_versions_folder, file_name)
304
+ if not local_path or not os.path.exists(local_path):
305
+ logger.error(f"Failed to download or file not found: {file_name}")
306
+ continue
307
+
308
+ # 1つアップロードして削除
309
+ # path_in_repo を空文字にすればフォルダ名も暗号化される(トップレベル)
310
+ # もしサブフォルダにまとめたいなら "old_versions" とか指定する
311
+ self.upload_file_encrypted_one_by_one(local_path, path_in_repo="")
312
+
313
+ # old_versions フォルダ内は空になったはずなので削除
314
+ if os.path.exists(old_versions_folder):
315
+ shutil.rmtree(old_versions_folder, ignore_errors=True)
316
+
317
+ # ============================================================================
318
+ # 従来どおり「最新バージョンのファイル一式 + images」フォルダを一括DL→アップロード
319
+ # ============================================================================
320
+ def download_model(self, model_versions: list, folder: str):
321
+ """最新バージョンを一括ダウンロード (フォルダにまとめる)"""
322
+ latest_version = model_versions[0]
323
+ latest_files = latest_version["files"]
324
+
325
+ for file_info in latest_files:
326
  download_url = file_info["downloadUrl"]
327
  file_name = file_info["name"]
328
+ local_path = self.download_file(download_url, folder, file_name)
329
+ if local_path and os.path.exists(local_path):
330
+ logger.info(f"Downloaded {file_name}")
331
+ else:
332
+ logger.warning(f"Could not download {file_name}")
333
 
334
+ def download_images(self, model_versions: list, folder: str):
335
+ images_folder = os.path.join(folder, "images")
336
+ os.makedirs(images_folder, exist_ok=True)
 
 
 
 
 
 
 
337
 
 
 
 
 
 
338
  images = []
339
+ for version in model_versions:
340
+ for img in version.get("images", []):
341
+ images.append(img["url"])
342
 
343
  for image_url in images:
344
+ image_name = os.path.basename(image_url) + ".png"
345
+ local_path = os.path.join(images_folder, image_name)
 
 
346
  try:
347
  resp = requests.get(image_url, stream=True)
348
  resp.raise_for_status()
 
352
  logger.info(f"Downloaded image: {local_path}")
353
  except Exception as e:
354
  logger.error(f"Error downloading image {image_url}: {e}")
 
355
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
  def save_html_content(self, url: str, folder: str):
357
  try:
358
  response = requests.get(url)
 
360
  html_path = os.path.join(folder, os.path.basename(folder) + ".html")
361
  with open(html_path, 'w', encoding='utf-8') as file:
362
  file.write(response.text)
 
363
  except Exception as e:
364
  logger.error(f"Error saving HTML content for URL {url}: {e}")
 
365
 
366
+ @staticmethod
367
+ def save_model_info(model_info: dict, folder: str):
368
+ with open(os.path.join(folder, "model_info.json"), "w", encoding="utf-8") as file:
369
+ json.dump(model_info, file, indent=2)
 
 
 
 
 
370
 
371
  # ============================================================================
372
  # model_list.log
 
387
  logger.error(f"Failed to read model list: {e}")
388
  return model_list
389
 
390
+ # ============================================================================
391
+ # model 情報取得
392
+ # ============================================================================
393
  def get_model_info(self, model_id: str) -> dict:
 
 
 
 
394
  try:
395
  url = self.config.URLS["modelId"] + str(model_id)
396
+ resp = requests.get(url, headers=self.config.HEADERS)
397
+ resp.raise_for_status()
398
+ return resp.json()
399
  except requests.RequestException as e:
400
  logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
401
+ return {}
402
+
403
+ # ============================================================================
404
+ # メイン処理: 最新ファイル + images はフォルダごとアップロード。old_versions は1ファイルずつ。
405
+ # ============================================================================
406
  def process_model(self, model_url: str):
 
407
  try:
408
  model_id = model_url.rstrip("/").split("/")[-1]
409
  model_info = self.get_model_info(model_id)
 
411
  logger.error(f"No model_info returned for {model_id}")
412
  return
413
 
414
+ model_versions = model_info.get("modelVersions", [])
415
+ if not model_versions:
416
+ logger.error(f"No modelVersions in model info {model_id}")
417
+ return
418
+
419
+ latest_version = model_versions[0]
420
+ model_file = next((file for file in latest_version["files"] if file.get('type') == 'Model'), None)
421
  if model_file:
422
+ latest_filename = model_file['name']
423
  folder = os.path.splitext(latest_filename)[0]
424
  else:
 
425
  first_file = latest_version["files"][0]
426
+ latest_filename = first_file['name']
427
  folder = os.path.splitext(latest_filename)[0]
428
+ logger.warning(f"No 'Model' type file found for model ID {model_id}. Using first file's name.")
429
 
 
430
  os.makedirs(folder, exist_ok=True)
431
 
432
+ # すでにアップ済みかどうか model_list.log でチェック (モデル名ベース)
433
  model_list = self.read_model_list()
434
  modelpage_name = model_info.get("name", f"Model_{model_id}")
435
  if modelpage_name in model_list.values():
436
  logger.info(f"Model '{modelpage_name}' already in model_list. Skipping.")
437
+ # 必要ならreturn
438
 
439
+ # 最新バージョン (まとめてダウンロード)
440
+ self.download_model(model_versions, folder)
 
441
 
442
+ # 画像 (imagesフォルダまるごとダウンロード)
443
+ self.download_images(model_versions, folder)
 
 
 
444
 
445
+ # HTML & model_info.json
446
+ self.save_html_content(self.config.URLS["modelPage"] + str(model_id), folder)
447
+ self.save_model_info(model_info, folder)
448
 
449
+ # 古いバージョンのみ「1つずつアップロード&削除」
450
+ self.download_old_versions_one_by_one(model_versions, folder)
451
 
452
+ # ↑で old_versions は空になった → あとはフォルダに残っているのは
453
+ # 最新バージョンファイル・imagesフォルダ・model_info.json・HTML など
454
 
455
+ # "folder" 自体を暗号化アップロード (= images フォルダごとアップロード)
456
+ # path_in_repo を "" にすればフォルダ名も暗号化される
457
+ self.upload_folder_encrypted(folder, path_in_repo="")
458
 
459
+ # ローカルフォルダ削除
460
  if os.path.exists(folder):
461
  shutil.rmtree(folder)
462
 
463
+ # model_list.log に追記
464
+ # HF上では folder名 も暗号化されるが、ここでは元の "modelpage_name"
465
+ # HFへのトップフォルダ参照URLを書く
466
+ model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main"
 
467
  with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
468
  f.write(f"{modelpage_name}: {model_hf_url}\n")
469
 
470
  except Exception as e:
471
  logger.error(f"Unexpected error processing model ({model_url}): {e}")
472
 
473
+ # ============================================================================
474
+ # crawl
475
+ # ============================================================================
476
  async def crawl(self):
 
477
  while True:
478
  try:
479
  login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
480
 
481
+ # 最新の model_list.log & civitai_backup.log をダウンロード
482
  model_list_path = hf_hub_download(repo_id=self.repo_ids['model_list'], filename=self.config.LIST_FILE)
483
  shutil.copyfile(model_list_path, f"./{self.config.LIST_FILE}")
484
 
485
  local_file_path = hf_hub_download(repo_id=self.repo_ids["log"], filename=self.config.LOG_FILE)
486
  shutil.copyfile(local_file_path, f"./{self.config.LOG_FILE}")
487
 
488
+ # ログ読み込み
489
  with open(self.config.LOG_FILE, "r", encoding="utf-8") as file:
490
  lines = file.read().splitlines()
491
  old_models = json.loads(lines[0]) if len(lines) > 0 else []
492
  self.repo_ids["current"] = lines[1] if len(lines) > 1 else ""
493
 
494
+ # 新着モデル確認
495
  response = requests.get(self.config.URLS["latest"], headers=self.config.HEADERS)
496
  response.raise_for_status()
497
  latest_models = response.json().get("items", [])
498
+ latest_model_ids = [m["id"] for m in latest_models if "id" in m]
499
 
500
+ # 差集合
501
  new_models = list(set(latest_model_ids) - set(old_models))
502
 
503
  if new_models:
504
  logger.info(f"New models found: {new_models}")
505
  model_id = new_models[0]
506
 
 
507
  for attempt in range(1, 6):
508
  try:
509
  self.process_model(f"{self.config.URLS['modelId']}{model_id}")
 
515
  else:
516
  await asyncio.sleep(2)
517
  else:
518
+ # 新モデルなし
519
  with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
520
  f.write(json.dumps(latest_model_ids) + "\n")
521
  f.write(f"{self.repo_ids['current']}\n")
 
528
  await asyncio.sleep(60)
529
  continue
530
 
531
+ # 1件アップロードに成功したら old_models に追加
532
  old_models.append(model_id)
533
  with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
534
  f.write(json.dumps(old_models) + "\n")
535
  f.write(f"{self.repo_ids['current']}\n")
536
  logger.info(f"Updated log file with new model ID: {model_id}")
537
 
538
+ # ログと model_list.log をアップ
539
  self.upload_file_raw(self.config.LOG_FILE, self.repo_ids["log"], self.config.LOG_FILE)
540
  self.upload_file_raw(self.config.LIST_FILE, self.repo_ids["model_list"], self.config.LIST_FILE)
541