ttttdiva commited on
Commit
eb03ec4
·
verified ·
1 Parent(s): 4fd5707

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +88 -110
main.py CHANGED
@@ -233,15 +233,9 @@ class CivitAICrawler:
233
  except requests.RequestException as e:
234
  logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
235
 
236
- def download_model(self, model_versions: list, folder: str):
237
- """
238
- - 最新バージョンのファイルを「folder」へまとめてダウンロード
239
- - 古いバージョンはまだダウンロードしない (実は後から個別にダウンロード)
240
- (※今回は「まとめて old_versions フォルダに入れずに、1ファイルずつ別のメソッドで対処する」流れに)
241
- """
242
  latest_version = model_versions[0]
243
  latest_files = latest_version["files"]
244
-
245
  for file_info in latest_files:
246
  download_url = file_info["downloadUrl"]
247
  file_name = file_info["name"]
@@ -273,67 +267,45 @@ class CivitAICrawler:
273
  except Exception as e:
274
  logger.error(f"Failed to create dummy file for {file_name}: {e}")
275
 
276
- def download_old_version_file_and_upload(self, file_info, encrypted_top_name: str):
277
- """
278
- 古いバージョンのファイルを1個ダウンロードしたら即アップロードし、ローカル削除。
279
- - file_info: CivitAI API で得られる 'files' の要素(dict)
280
- - encrypted_top_name: すでにアップ済みのフォルダのトップ (ランダム名)
281
- """
282
- file_name = file_info["name"]
283
- download_url = file_info["downloadUrl"]
284
-
285
- # 一時フォルダ作成
286
- temp_folder = "temp_old_file"
287
- os.makedirs(temp_folder, exist_ok=True)
288
- local_path = os.path.join(temp_folder, file_name)
289
-
290
- login_detected_count = 0
291
- while login_detected_count < 5:
292
- try:
293
- self.download_file(download_url, temp_folder, file_name)
294
- except Exception as e:
295
- logger.error(f"Exception while downloading old file {file_name}: {e}")
296
- login_detected_count += 1
297
- continue
298
-
299
- if "login" in os.listdir(temp_folder):
300
- login_detected_count += 1
301
- logger.warning(f"'login' file found while downloading {file_name}. Retry {login_detected_count}/5.")
302
- os.remove(os.path.join(temp_folder, "login"))
303
- else:
304
- logger.info(f"Successfully downloaded old version file: {file_name}")
305
- break
306
-
307
- if login_detected_count >= 5:
308
- dummy_file_name = f"{file_name}.download_failed"
309
- dummy_file_path = os.path.join(temp_folder, dummy_file_name)
310
- try:
311
- with open(dummy_file_path, "w") as f:
312
- f.write("Download failed after 5 attempts.")
313
- logger.error(f"Failed to download {file_name} -> created dummy: {dummy_file_name}")
314
- except Exception as e:
315
- logger.error(f"Failed to create dummy file for old version {file_name}: {e}")
316
- # ダウンロード失敗したのでこのファイルはアップロードしない
317
- shutil.rmtree(temp_folder, ignore_errors=True)
318
- return
319
 
320
- # ダウンロード成功したので、暗号化してアップロード(既存のトップフォルダに追加)
321
- try:
322
- # encrypt_with_rclone(temp_folder) temp_folder 全体を暗号化
323
- # upload_encrypted_files(... base_path_in_repo=encrypted_top_name) で
324
- # 既存フォルダ(encrypted_top_name) 以下にファイルを追加
325
- self.encrypt_with_rclone(temp_folder)
326
- self.upload_encrypted_files(
327
- repo_id=self.repo_ids["current"],
328
- base_path_in_repo=encrypted_top_name # ★ここがポイント★
329
- )
330
- logger.info(f"Uploaded old version file: {file_name} into {encrypted_top_name}")
331
 
332
- except Exception as e:
333
- logger.error(f"Error uploading old version file {file_name}: {e}")
 
 
 
 
 
334
 
335
- # 一時フォルダを削除
336
- shutil.rmtree(temp_folder, ignore_errors=True)
 
 
 
 
 
 
 
337
 
338
  def download_images(self, model_versions: list, folder: str):
339
  images_folder = os.path.join(folder, "images")
@@ -503,62 +475,68 @@ class CivitAICrawler:
503
  return {}
504
 
505
  def process_model(self, model_url: str):
506
- """
507
- - 最新バージョンをフォルダに一括DL → フォルダごと暗号化アップロード
508
- - 古いバージョンはファイル単位で即アップロード
509
- """
510
  try:
511
  model_id = model_url.rstrip("/").split("/")[-1]
512
  model_info = self.get_model_info(model_id)
513
-
514
- # =====================================
515
- # 1) フォルダ作成 & 最新バージョンDL
516
- # =====================================
517
- model_versions = model_info.get("modelVersions", [])
518
- if not model_versions:
519
- logger.warning(f"No versions found for model ID {model_id}")
520
- return
521
-
522
- # 例: 最新バージョン用フォルダ名を適当に
523
- folder = f"model_{model_id}_latest"
 
 
 
 
524
  os.makedirs(folder, exist_ok=True)
525
-
526
- # download_model() で最新バージョンだけダウンロード
527
- self.download_model(model_versions, folder)
528
-
529
- # 画像ダウンロード & HTML保存 & info.json保存(従来通り)
530
- self.download_images(model_versions, folder)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
  self.save_html_content(model_url, folder)
532
  self.save_model_info(model_info, folder)
533
-
534
- # =====================================
535
- # 2) 最新バージョンのフォルダを丸ごと暗号化 & アップロード
536
- # → Hugging Face 側には"ランダムフォルダ名"が作られる
537
- # =====================================
538
  encrypted_top_name = self.upload_folder_encrypted(folder)
539
- logger.info(f"[MAIN] Uploaded latest version folder => {encrypted_top_name}")
540
-
541
- # ローカル削除
542
- shutil.rmtree(folder, ignore_errors=True)
543
-
544
- # =====================================
545
- # 3) 古いバージョンのファイルを1つずつDL → 同じフォルダに追加
546
- # =====================================
547
- if len(model_versions) > 1:
548
- for version in model_versions[1:]:
549
- for file_info in version["files"]:
550
- self.download_old_version_file_and_upload(file_info, encrypted_top_name)
551
-
552
- # =====================================
553
- # 4) model_list.log への登録やその他の処理
554
- # =====================================
555
- modelpage_name = model_info.get("name", f"Model_{model_id}")
556
  model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{encrypted_top_name}"
 
 
557
  with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
558
  f.write(f"{modelpage_name}: {model_hf_url}\n")
559
-
 
 
 
 
560
  except Exception as e:
561
- logger.error(f"Unexpected error in process_model ({model_url}): {e}")
 
562
 
563
  async def crawl(self):
564
  """モデルを定期的にチェックし、更新を行う。"""
 
233
  except requests.RequestException as e:
234
  logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
235
 
236
+ def download_model(self, model_versions: list, folder: str, existing_old_version_files: list = []):
 
 
 
 
 
237
  latest_version = model_versions[0]
238
  latest_files = latest_version["files"]
 
239
  for file_info in latest_files:
240
  download_url = file_info["downloadUrl"]
241
  file_name = file_info["name"]
 
267
  except Exception as e:
268
  logger.error(f"Failed to create dummy file for {file_name}: {e}")
269
 
270
+ # 古いバージョンのダウンロード
271
+ if len(model_versions) > 1:
272
+ old_versions_folder = os.path.join(folder, "old_versions")
273
+ os.makedirs(old_versions_folder, exist_ok=True)
274
+ for version in model_versions[1:]:
275
+ for file_info in version["files"]:
276
+ file_name = file_info["name"]
277
+ if file_name in existing_old_version_files:
278
+ logger.info(f"Skipping download of existing old version file: {file_name}")
279
+ continue
280
+ download_url = file_info["downloadUrl"]
281
+ local_file_path = os.path.join(old_versions_folder, file_name)
282
+ login_detected_count = 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
+ while login_detected_count < 5:
285
+ try:
286
+ self.download_file(download_url, old_versions_folder, file_name)
287
+ except Exception as e:
288
+ logger.error(f"Exception occurred while downloading {file_name}: {e}")
289
+ login_detected_count += 1
290
+ continue
 
 
 
 
291
 
292
+ if "login" in os.listdir(old_versions_folder):
293
+ login_detected_count += 1
294
+ logger.warning(f"'login' file found while downloading {file_name}. Will try again. ({login_detected_count}/5)")
295
+ os.remove(os.path.join(old_versions_folder, "login"))
296
+ else:
297
+ logger.info(f"Successfully downloaded {file_name}")
298
+ break
299
 
300
+ if login_detected_count >= 5:
301
+ dummy_file_name = f"{file_name}.download_failed"
302
+ dummy_file_path = os.path.join(old_versions_folder, dummy_file_name)
303
+ try:
304
+ with open(dummy_file_path, "w") as f:
305
+ f.write("Download failed after 5 attempts.")
306
+ logger.error(f"Failed to download {file_name}. Created dummy file {dummy_file_name}. URL: {download_url}")
307
+ except Exception as e:
308
+ logger.error(f"Failed to create dummy file for {file_name}: {e}")
309
 
310
  def download_images(self, model_versions: list, folder: str):
311
  images_folder = os.path.join(folder, "images")
 
475
  return {}
476
 
477
  def process_model(self, model_url: str):
478
+ """指定されたモデルURLを処理する関数。"""
 
 
 
479
  try:
480
  model_id = model_url.rstrip("/").split("/")[-1]
481
  model_info = self.get_model_info(model_id)
482
+
483
+ latest_version = model_info.get("modelVersions", [])[0]
484
+ model_file = next(
485
+ (file for file in latest_version["files"] if file.get('type') == 'Model'),
486
+ None
487
+ )
488
+ if model_file:
489
+ latest_filename = model_file['name']
490
+ folder = os.path.splitext(latest_filename)[0]
491
+ else:
492
+ first_file = latest_version["files"][0]
493
+ latest_filename = first_file['name']
494
+ folder = os.path.splitext(latest_filename)[0]
495
+ logger.warning(f"No 'Model' type file found for model ID {model_id}. Using first file's name.")
496
+
497
  os.makedirs(folder, exist_ok=True)
498
+
499
+ # model_list を読み込み
500
+ model_list = self.read_model_list()
501
+
502
+ # もし既に「同名(モデルページ名)がアップされている」かどうか確認したい場合の例:
503
+ # ※ 今回は modelpage_name(= model_info["name"]) をキーにするか、
504
+ # あるいは model_id (str) をキーにするか、運用に合わせて設定してください。
505
+ # 例として modelpage_name をキーとしてチェックする流れ:
506
+ modelpage_name = model_info.get("name", "Unnamed Model")
507
+
508
+ if modelpage_name in model_list.values():
509
+ # 既に同モデルページ名がアップロード済み → ここでスキップや上書きなどの処理を決定
510
+ logger.info(f"Model '{modelpage_name}' is already listed in model_list. Skipping re-upload.")
511
+ # もし「強制再アップロード」したくないなら return で処理終了:
512
+ # return
513
+ # あるいは「強制アップするがバージョンだけ追加」などいろいろ処理が可能
514
+ # ここではあえて続行するが、必要に応じて書き換えてください。
515
+
516
+ # ダウンロードや画像保存
517
+ existing_old_version_files = []
518
+ self.download_model(model_info["modelVersions"], folder, existing_old_version_files)
519
+ self.download_images(model_info["modelVersions"], folder)
520
  self.save_html_content(model_url, folder)
521
  self.save_model_info(model_info, folder)
522
+
523
+ # ========== rclone で暗号化フォルダをアップロード ==========
 
 
 
524
  encrypted_top_name = self.upload_folder_encrypted(folder)
525
+
526
+ # 今回アップロードしたモデルの URL
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
527
  model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{encrypted_top_name}"
528
+
529
+ # model_list.log に追記 → "modelpage_name: model_hf_url" 形式
530
  with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
531
  f.write(f"{modelpage_name}: {model_hf_url}\n")
532
+
533
+ # ローカルフォルダ削除
534
+ if os.path.exists(folder):
535
+ shutil.rmtree(folder)
536
+
537
  except Exception as e:
538
+ logger.error(f"Unexpected error processing model ({model_url}): {e}")
539
+
540
 
541
  async def crawl(self):
542
  """モデルを定期的にチェックし、更新を行う。"""