ttttdiva commited on
Commit
28aa8ba
·
verified ·
1 Parent(s): eb03ec4

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +115 -76
main.py CHANGED
@@ -233,9 +233,15 @@ class CivitAICrawler:
233
  except requests.RequestException as e:
234
  logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
235
 
236
- def download_model(self, model_versions: list, folder: str, existing_old_version_files: list = []):
 
 
 
 
 
237
  latest_version = model_versions[0]
238
  latest_files = latest_version["files"]
 
239
  for file_info in latest_files:
240
  download_url = file_info["downloadUrl"]
241
  file_name = file_info["name"]
@@ -267,45 +273,70 @@ class CivitAICrawler:
267
  except Exception as e:
268
  logger.error(f"Failed to create dummy file for {file_name}: {e}")
269
 
270
- # 古いバージョンのダウンロード
271
- if len(model_versions) > 1:
272
- old_versions_folder = os.path.join(folder, "old_versions")
273
- os.makedirs(old_versions_folder, exist_ok=True)
274
- for version in model_versions[1:]:
275
- for file_info in version["files"]:
276
- file_name = file_info["name"]
277
- if file_name in existing_old_version_files:
278
- logger.info(f"Skipping download of existing old version file: {file_name}")
279
- continue
280
- download_url = file_info["downloadUrl"]
281
- local_file_path = os.path.join(old_versions_folder, file_name)
282
- login_detected_count = 0
283
 
284
- while login_detected_count < 5:
285
- try:
286
- self.download_file(download_url, old_versions_folder, file_name)
287
- except Exception as e:
288
- logger.error(f"Exception occurred while downloading {file_name}: {e}")
289
- login_detected_count += 1
290
- continue
291
 
292
- if "login" in os.listdir(old_versions_folder):
293
- login_detected_count += 1
294
- logger.warning(f"'login' file found while downloading {file_name}. Will try again. ({login_detected_count}/5)")
295
- os.remove(os.path.join(old_versions_folder, "login"))
296
- else:
297
- logger.info(f"Successfully downloaded {file_name}")
298
- break
299
 
300
- if login_detected_count >= 5:
301
- dummy_file_name = f"{file_name}.download_failed"
302
- dummy_file_path = os.path.join(old_versions_folder, dummy_file_name)
303
- try:
304
- with open(dummy_file_path, "w") as f:
305
- f.write("Download failed after 5 attempts.")
306
- logger.error(f"Failed to download {file_name}. Created dummy file {dummy_file_name}. URL: {download_url}")
307
- except Exception as e:
308
- logger.error(f"Failed to create dummy file for {file_name}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
 
310
  def download_images(self, model_versions: list, folder: str):
311
  images_folder = os.path.join(folder, "images")
@@ -475,12 +506,24 @@ class CivitAICrawler:
475
  return {}
476
 
477
  def process_model(self, model_url: str):
478
- """指定されたモデルURLを処理する関数。"""
 
 
 
479
  try:
480
  model_id = model_url.rstrip("/").split("/")[-1]
481
  model_info = self.get_model_info(model_id)
482
-
483
- latest_version = model_info.get("modelVersions", [])[0]
 
 
 
 
 
 
 
 
 
484
  model_file = next(
485
  (file for file in latest_version["files"] if file.get('type') == 'Model'),
486
  None
@@ -493,50 +536,46 @@ class CivitAICrawler:
493
  latest_filename = first_file['name']
494
  folder = os.path.splitext(latest_filename)[0]
495
  logger.warning(f"No 'Model' type file found for model ID {model_id}. Using first file's name.")
496
-
497
  os.makedirs(folder, exist_ok=True)
498
-
499
- # model_list を読み込み
500
- model_list = self.read_model_list()
501
-
502
- # もし既に「同名(モデルページ名)がアップされている」かどうか確認したい場合の例:
503
- # ※ 今回は modelpage_name(= model_info["name"]) をキーにするか、
504
- # あるいは model_id (str) をキーにするか、運用に合わせて設定してください。
505
- # 例として modelpage_name をキーとしてチェックする流れ:
506
- modelpage_name = model_info.get("name", "Unnamed Model")
507
-
508
- if modelpage_name in model_list.values():
509
- # 既に同モデルページ名がアップロード済み → ここでスキップや上書きなどの処理を決定
510
- logger.info(f"Model '{modelpage_name}' is already listed in model_list. Skipping re-upload.")
511
- # もし「強制再アップロード」したくないなら return で処理終了:
512
- # return
513
- # あるいは「強制アップするがバージョンだけ追加」などいろいろ処理が可能
514
- # ここではあえて続行するが、必要に応じて書き換えてください。
515
-
516
- # ダウンロードや画像保存
517
- existing_old_version_files = []
518
- self.download_model(model_info["modelVersions"], folder, existing_old_version_files)
519
- self.download_images(model_info["modelVersions"], folder)
520
  self.save_html_content(model_url, folder)
521
  self.save_model_info(model_info, folder)
522
-
523
- # ========== rclone で暗号化フォルダをアップロード ==========
524
  encrypted_top_name = self.upload_folder_encrypted(folder)
525
-
526
- # 今回アップロードしたモデルの URL
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
  model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{encrypted_top_name}"
528
-
529
- # model_list.log に追記 → "modelpage_name: model_hf_url" 形式
530
  with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
531
  f.write(f"{modelpage_name}: {model_hf_url}\n")
532
-
533
- # ローカルフォルダ削除
534
- if os.path.exists(folder):
535
- shutil.rmtree(folder)
536
-
537
- except Exception as e:
538
- logger.error(f"Unexpected error processing model ({model_url}): {e}")
539
 
 
 
540
 
541
  async def crawl(self):
542
  """モデルを定期的にチェックし、更新を行う。"""
 
233
  except requests.RequestException as e:
234
  logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
235
 
236
+ def download_model(self, model_versions: list, folder: str):
237
+ """
238
+ - 最新バージョンのファイルを「folder」へまとめてダウンロード
239
+ - 古いバージョンはまだダウンロードしない (実は後から個別にダウンロード)
240
+ (※今回は「まとめて old_versions フォルダに入れずに、1ファイルずつ別のメソッドで対処する」流れに)
241
+ """
242
  latest_version = model_versions[0]
243
  latest_files = latest_version["files"]
244
+
245
  for file_info in latest_files:
246
  download_url = file_info["downloadUrl"]
247
  file_name = file_info["name"]
 
273
  except Exception as e:
274
  logger.error(f"Failed to create dummy file for {file_name}: {e}")
275
 
276
+ def download_old_version_file_and_upload(self, file_info, parent_folder: str, encrypted_top_name: str):
277
+ """
278
+ 古いバージョンのファイルを1つダウンロード→暗号化アップロード→削除。
279
+ 「old_versions」というフォルダ名をそのまま使う。
280
+ """
281
+ file_name = file_info["name"]
282
+ download_url = file_info["downloadUrl"]
 
 
 
 
 
 
283
 
284
+ # old_versions フォルダを作成
285
+ old_versions_folder = os.path.join(parent_folder, "old_versions")
286
+ os.makedirs(old_versions_folder, exist_ok=True)
 
 
 
 
287
 
288
+ local_path = os.path.join(old_versions_folder, file_name)
 
 
 
 
 
 
289
 
290
+ login_detected_count = 0
291
+ while login_detected_count < 5:
292
+ try:
293
+ # old_versionsフォルダへダウンロード
294
+ self.download_file(download_url, old_versions_folder, file_name)
295
+ except Exception as e:
296
+ logger.error(f"Exception while downloading old file {file_name}: {e}")
297
+ login_detected_count += 1
298
+ continue
299
+
300
+ # 万が一 "login" という名前のファイルが出現したら削除して再試行
301
+ if "login" in os.listdir(old_versions_folder):
302
+ login_detected_count += 1
303
+ logger.warning(f"'login' file found while downloading {file_name}. Retry {login_detected_count}/5.")
304
+ os.remove(os.path.join(old_versions_folder, "login"))
305
+ else:
306
+ logger.info(f"Successfully downloaded old version file: {file_name}")
307
+ break
308
+
309
+ if login_detected_count >= 5:
310
+ # 5回失敗
311
+ dummy_file_name = f"{file_name}.download_failed"
312
+ dummy_file_path = os.path.join(old_versions_folder, dummy_file_name)
313
+ try:
314
+ with open(dummy_file_path, "w") as f:
315
+ f.write("Download failed after 5 attempts.")
316
+ logger.error(f"Failed to download {file_name} -> created dummy: {dummy_file_name}")
317
+ except Exception as e:
318
+ logger.error(f"Failed to create dummy file for old version {file_name}: {e}")
319
+ return
320
+
321
+ # ダウンロード成功 → 1ファイルだけ暗号化&アップロード → ローカル削除
322
+ try:
323
+ # 1) ファイル単位で暗号化するなら
324
+ self.encrypt_with_rclone(local_path)
325
+
326
+ # 2) 暗号化ファイルをアップロード
327
+ self.upload_encrypted_files(
328
+ repo_id=self.repo_ids["current"],
329
+ base_path_in_repo=encrypted_top_name
330
+ )
331
+ logger.info(f"Uploaded old version file: {file_name} into {encrypted_top_name}")
332
+
333
+ except Exception as e:
334
+ logger.error(f"Error uploading old version file {file_name}: {e}")
335
+
336
+ # 3) アップロード後、平文ファイルを削除
337
+ if os.path.exists(local_path):
338
+ os.remove(local_path)
339
+ logger.info(f"Removed local old version file: {local_path}")
340
 
341
  def download_images(self, model_versions: list, folder: str):
342
  images_folder = os.path.join(folder, "images")
 
506
  return {}
507
 
508
  def process_model(self, model_url: str):
509
+ """
510
+ - 最新バージョンをフォルダに一括DL → フォルダごと暗号化アップロード
511
+ - 古いバージョンはファイル単位で即アップロード
512
+ """
513
  try:
514
  model_id = model_url.rstrip("/").split("/")[-1]
515
  model_info = self.get_model_info(model_id)
516
+
517
+ model_versions = model_info.get("modelVersions", [])
518
+ if not model_versions:
519
+ logger.warning(f"No versions found for model ID {model_id}")
520
+ return
521
+
522
+ # ==================================================================
523
+ # 1) 「古いコード」と同じロジックでフォルダ名を決定
524
+ # (最新バージョンのうち 'type' = 'Model' のファイルがあればそれ、なければ最初のファイル名)
525
+ # ==================================================================
526
+ latest_version = model_versions[0]
527
  model_file = next(
528
  (file for file in latest_version["files"] if file.get('type') == 'Model'),
529
  None
 
536
  latest_filename = first_file['name']
537
  folder = os.path.splitext(latest_filename)[0]
538
  logger.warning(f"No 'Model' type file found for model ID {model_id}. Using first file's name.")
539
+
540
  os.makedirs(folder, exist_ok=True)
541
+
542
+ # ==================================================================
543
+ # 2) 最新バージョンをまとめてダウンロード → フォルダごと暗号化アップロード
544
+ # ==================================================================
545
+ # 最新バージョンのファイルをまとめてダウンロード
546
+ self.download_model(model_versions, folder)
547
+
548
+ # 画像ダウンロード & HTML保存 & info.json保存
549
+ self.download_images(model_versions, folder)
 
 
 
 
 
 
 
 
 
 
 
 
 
550
  self.save_html_content(model_url, folder)
551
  self.save_model_info(model_info, folder)
552
+
553
+ # フォルダごと暗号化 Hugging Face へアップ
554
  encrypted_top_name = self.upload_folder_encrypted(folder)
555
+ logger.info(f"[MAIN] Uploaded latest version folder => {encrypted_top_name}")
556
+
557
+ # ローカルフォルダ削除
558
+ shutil.rmtree(folder, ignore_errors=True)
559
+
560
+ # ==================================================================
561
+ # 3) 古いバージョンのファイルを1つずつDL → 同じ暗号化フォルダに追加
562
+ # ==================================================================
563
+ if len(model_versions) > 1:
564
+ for version in model_versions[1:]:
565
+ for file_info in version["files"]:
566
+ self.download_old_version_file_and_upload(file_info, encrypted_top_name)
567
+
568
+ # ==================================================================
569
+ # 4) model_list.log への登録やその他の処理
570
+ # ==================================================================
571
+ modelpage_name = model_info.get("name", f"Model_{model_id}")
572
  model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{encrypted_top_name}"
573
+
 
574
  with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
575
  f.write(f"{modelpage_name}: {model_hf_url}\n")
 
 
 
 
 
 
 
576
 
577
+ except Exception as e:
578
+ logger.error(f"Unexpected error in process_model ({model_url}): {e}")
579
 
580
  async def crawl(self):
581
  """モデルを定期的にチェックし、更新を行う。"""