Update main.py
Browse files
main.py
CHANGED
@@ -233,9 +233,15 @@ class CivitAICrawler:
|
|
233 |
except requests.RequestException as e:
|
234 |
logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
|
235 |
|
236 |
-
def download_model(self, model_versions: list, folder: str
|
|
|
|
|
|
|
|
|
|
|
237 |
latest_version = model_versions[0]
|
238 |
latest_files = latest_version["files"]
|
|
|
239 |
for file_info in latest_files:
|
240 |
download_url = file_info["downloadUrl"]
|
241 |
file_name = file_info["name"]
|
@@ -267,45 +273,70 @@ class CivitAICrawler:
|
|
267 |
except Exception as e:
|
268 |
logger.error(f"Failed to create dummy file for {file_name}: {e}")
|
269 |
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
if file_name in existing_old_version_files:
|
278 |
-
logger.info(f"Skipping download of existing old version file: {file_name}")
|
279 |
-
continue
|
280 |
-
download_url = file_info["downloadUrl"]
|
281 |
-
local_file_path = os.path.join(old_versions_folder, file_name)
|
282 |
-
login_detected_count = 0
|
283 |
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
except Exception as e:
|
288 |
-
logger.error(f"Exception occurred while downloading {file_name}: {e}")
|
289 |
-
login_detected_count += 1
|
290 |
-
continue
|
291 |
|
292 |
-
|
293 |
-
login_detected_count += 1
|
294 |
-
logger.warning(f"'login' file found while downloading {file_name}. Will try again. ({login_detected_count}/5)")
|
295 |
-
os.remove(os.path.join(old_versions_folder, "login"))
|
296 |
-
else:
|
297 |
-
logger.info(f"Successfully downloaded {file_name}")
|
298 |
-
break
|
299 |
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
309 |
|
310 |
def download_images(self, model_versions: list, folder: str):
|
311 |
images_folder = os.path.join(folder, "images")
|
@@ -475,12 +506,24 @@ class CivitAICrawler:
|
|
475 |
return {}
|
476 |
|
477 |
def process_model(self, model_url: str):
|
478 |
-
"""
|
|
|
|
|
|
|
479 |
try:
|
480 |
model_id = model_url.rstrip("/").split("/")[-1]
|
481 |
model_info = self.get_model_info(model_id)
|
482 |
-
|
483 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
484 |
model_file = next(
|
485 |
(file for file in latest_version["files"] if file.get('type') == 'Model'),
|
486 |
None
|
@@ -493,50 +536,46 @@ class CivitAICrawler:
|
|
493 |
latest_filename = first_file['name']
|
494 |
folder = os.path.splitext(latest_filename)[0]
|
495 |
logger.warning(f"No 'Model' type file found for model ID {model_id}. Using first file's name.")
|
496 |
-
|
497 |
os.makedirs(folder, exist_ok=True)
|
498 |
-
|
499 |
-
#
|
500 |
-
|
501 |
-
|
502 |
-
#
|
503 |
-
|
504 |
-
|
505 |
-
#
|
506 |
-
|
507 |
-
|
508 |
-
if modelpage_name in model_list.values():
|
509 |
-
# 既に同モデルページ名がアップロード済み → ここでスキップや上書きなどの処理を決定
|
510 |
-
logger.info(f"Model '{modelpage_name}' is already listed in model_list. Skipping re-upload.")
|
511 |
-
# もし「強制再アップロード」したくないなら return で処理終了:
|
512 |
-
# return
|
513 |
-
# あるいは「強制アップするがバージョンだけ追加」などいろいろ処理が可能
|
514 |
-
# ここではあえて続行するが、必要に応じて書き換えてください。
|
515 |
-
|
516 |
-
# ダウンロードや画像保存
|
517 |
-
existing_old_version_files = []
|
518 |
-
self.download_model(model_info["modelVersions"], folder, existing_old_version_files)
|
519 |
-
self.download_images(model_info["modelVersions"], folder)
|
520 |
self.save_html_content(model_url, folder)
|
521 |
self.save_model_info(model_info, folder)
|
522 |
-
|
523 |
-
#
|
524 |
encrypted_top_name = self.upload_folder_encrypted(folder)
|
525 |
-
|
526 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
527 |
model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{encrypted_top_name}"
|
528 |
-
|
529 |
-
# model_list.log に追記 → "modelpage_name: model_hf_url" 形式
|
530 |
with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
|
531 |
f.write(f"{modelpage_name}: {model_hf_url}\n")
|
532 |
-
|
533 |
-
# ローカルフォルダ削除
|
534 |
-
if os.path.exists(folder):
|
535 |
-
shutil.rmtree(folder)
|
536 |
-
|
537 |
-
except Exception as e:
|
538 |
-
logger.error(f"Unexpected error processing model ({model_url}): {e}")
|
539 |
|
|
|
|
|
540 |
|
541 |
async def crawl(self):
|
542 |
"""モデルを定期的にチェックし、更新を行う。"""
|
|
|
233 |
except requests.RequestException as e:
|
234 |
logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
|
235 |
|
236 |
+
def download_model(self, model_versions: list, folder: str):
|
237 |
+
"""
|
238 |
+
- 最新バージョンのファイルを「folder」へまとめてダウンロード
|
239 |
+
- 古いバージョンはまだダウンロードしない (実は後から個別にダウンロード)
|
240 |
+
(※今回は「まとめて old_versions フォルダに入れずに、1ファイルずつ別のメソッドで対処する」流れに)
|
241 |
+
"""
|
242 |
latest_version = model_versions[0]
|
243 |
latest_files = latest_version["files"]
|
244 |
+
|
245 |
for file_info in latest_files:
|
246 |
download_url = file_info["downloadUrl"]
|
247 |
file_name = file_info["name"]
|
|
|
273 |
except Exception as e:
|
274 |
logger.error(f"Failed to create dummy file for {file_name}: {e}")
|
275 |
|
276 |
+
def download_old_version_file_and_upload(self, file_info, parent_folder: str, encrypted_top_name: str):
|
277 |
+
"""
|
278 |
+
古いバージョンのファイルを1つダウンロード→暗号化アップロード→削除。
|
279 |
+
「old_versions」というフォルダ名をそのまま使う。
|
280 |
+
"""
|
281 |
+
file_name = file_info["name"]
|
282 |
+
download_url = file_info["downloadUrl"]
|
|
|
|
|
|
|
|
|
|
|
|
|
283 |
|
284 |
+
# old_versions フォルダを作成
|
285 |
+
old_versions_folder = os.path.join(parent_folder, "old_versions")
|
286 |
+
os.makedirs(old_versions_folder, exist_ok=True)
|
|
|
|
|
|
|
|
|
287 |
|
288 |
+
local_path = os.path.join(old_versions_folder, file_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
289 |
|
290 |
+
login_detected_count = 0
|
291 |
+
while login_detected_count < 5:
|
292 |
+
try:
|
293 |
+
# old_versionsフォルダへダウンロード
|
294 |
+
self.download_file(download_url, old_versions_folder, file_name)
|
295 |
+
except Exception as e:
|
296 |
+
logger.error(f"Exception while downloading old file {file_name}: {e}")
|
297 |
+
login_detected_count += 1
|
298 |
+
continue
|
299 |
+
|
300 |
+
# 万が一 "login" という名前のファイルが出現したら削除して再試行
|
301 |
+
if "login" in os.listdir(old_versions_folder):
|
302 |
+
login_detected_count += 1
|
303 |
+
logger.warning(f"'login' file found while downloading {file_name}. Retry {login_detected_count}/5.")
|
304 |
+
os.remove(os.path.join(old_versions_folder, "login"))
|
305 |
+
else:
|
306 |
+
logger.info(f"Successfully downloaded old version file: {file_name}")
|
307 |
+
break
|
308 |
+
|
309 |
+
if login_detected_count >= 5:
|
310 |
+
# 5回失敗
|
311 |
+
dummy_file_name = f"{file_name}.download_failed"
|
312 |
+
dummy_file_path = os.path.join(old_versions_folder, dummy_file_name)
|
313 |
+
try:
|
314 |
+
with open(dummy_file_path, "w") as f:
|
315 |
+
f.write("Download failed after 5 attempts.")
|
316 |
+
logger.error(f"Failed to download {file_name} -> created dummy: {dummy_file_name}")
|
317 |
+
except Exception as e:
|
318 |
+
logger.error(f"Failed to create dummy file for old version {file_name}: {e}")
|
319 |
+
return
|
320 |
+
|
321 |
+
# ダウンロード成功 → 1ファイルだけ暗号化&アップロード → ローカル削除
|
322 |
+
try:
|
323 |
+
# 1) ファイル単位で暗号化するなら
|
324 |
+
self.encrypt_with_rclone(local_path)
|
325 |
+
|
326 |
+
# 2) 暗号化ファイルをアップロード
|
327 |
+
self.upload_encrypted_files(
|
328 |
+
repo_id=self.repo_ids["current"],
|
329 |
+
base_path_in_repo=encrypted_top_name
|
330 |
+
)
|
331 |
+
logger.info(f"Uploaded old version file: {file_name} into {encrypted_top_name}")
|
332 |
+
|
333 |
+
except Exception as e:
|
334 |
+
logger.error(f"Error uploading old version file {file_name}: {e}")
|
335 |
+
|
336 |
+
# 3) アップロード後、平文ファイルを削除
|
337 |
+
if os.path.exists(local_path):
|
338 |
+
os.remove(local_path)
|
339 |
+
logger.info(f"Removed local old version file: {local_path}")
|
340 |
|
341 |
def download_images(self, model_versions: list, folder: str):
|
342 |
images_folder = os.path.join(folder, "images")
|
|
|
506 |
return {}
|
507 |
|
508 |
def process_model(self, model_url: str):
|
509 |
+
"""
|
510 |
+
- 最新バージョンをフォルダに一括DL → フォルダごと暗号化アップロード
|
511 |
+
- 古いバージョンはファイル単位で即アップロード
|
512 |
+
"""
|
513 |
try:
|
514 |
model_id = model_url.rstrip("/").split("/")[-1]
|
515 |
model_info = self.get_model_info(model_id)
|
516 |
+
|
517 |
+
model_versions = model_info.get("modelVersions", [])
|
518 |
+
if not model_versions:
|
519 |
+
logger.warning(f"No versions found for model ID {model_id}")
|
520 |
+
return
|
521 |
+
|
522 |
+
# ==================================================================
|
523 |
+
# 1) 「古いコード」と同じロジックでフォルダ名を決定
|
524 |
+
# (最新バージョンのうち 'type' = 'Model' のファイルがあればそれ、なければ最初のファイル名)
|
525 |
+
# ==================================================================
|
526 |
+
latest_version = model_versions[0]
|
527 |
model_file = next(
|
528 |
(file for file in latest_version["files"] if file.get('type') == 'Model'),
|
529 |
None
|
|
|
536 |
latest_filename = first_file['name']
|
537 |
folder = os.path.splitext(latest_filename)[0]
|
538 |
logger.warning(f"No 'Model' type file found for model ID {model_id}. Using first file's name.")
|
539 |
+
|
540 |
os.makedirs(folder, exist_ok=True)
|
541 |
+
|
542 |
+
# ==================================================================
|
543 |
+
# 2) 最新バージョンをまとめてダウンロード → フォルダごと暗号化アップロード
|
544 |
+
# ==================================================================
|
545 |
+
# 最新バージョンのファイルをまとめてダウンロード
|
546 |
+
self.download_model(model_versions, folder)
|
547 |
+
|
548 |
+
# 画像ダウンロード & HTML保存 & info.json保存
|
549 |
+
self.download_images(model_versions, folder)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
550 |
self.save_html_content(model_url, folder)
|
551 |
self.save_model_info(model_info, folder)
|
552 |
+
|
553 |
+
# フォルダごと暗号化 → Hugging Face へアップ
|
554 |
encrypted_top_name = self.upload_folder_encrypted(folder)
|
555 |
+
logger.info(f"[MAIN] Uploaded latest version folder => {encrypted_top_name}")
|
556 |
+
|
557 |
+
# ローカルフォルダ削除
|
558 |
+
shutil.rmtree(folder, ignore_errors=True)
|
559 |
+
|
560 |
+
# ==================================================================
|
561 |
+
# 3) 古いバージョンのファイルを1つずつDL → 同じ暗号化フォルダに追加
|
562 |
+
# ==================================================================
|
563 |
+
if len(model_versions) > 1:
|
564 |
+
for version in model_versions[1:]:
|
565 |
+
for file_info in version["files"]:
|
566 |
+
self.download_old_version_file_and_upload(file_info, encrypted_top_name)
|
567 |
+
|
568 |
+
# ==================================================================
|
569 |
+
# 4) model_list.log への登録やその他の処理
|
570 |
+
# ==================================================================
|
571 |
+
modelpage_name = model_info.get("name", f"Model_{model_id}")
|
572 |
model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{encrypted_top_name}"
|
573 |
+
|
|
|
574 |
with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
|
575 |
f.write(f"{modelpage_name}: {model_hf_url}\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
576 |
|
577 |
+
except Exception as e:
|
578 |
+
logger.error(f"Unexpected error in process_model ({model_url}): {e}")
|
579 |
|
580 |
async def crawl(self):
|
581 |
"""モデルを定期的にチェックし、更新を行う。"""
|