Update main.py
Browse files
main.py
CHANGED
@@ -233,17 +233,15 @@ class CivitAICrawler:
|
|
233 |
except requests.RequestException as e:
|
234 |
logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
|
235 |
|
236 |
-
def download_model(self, model_versions: list, folder: str
|
237 |
"""
|
238 |
-
|
239 |
-
-
|
240 |
-
|
241 |
"""
|
242 |
-
# ----------------------
|
243 |
-
# 1) 最新バージョンのダウンロード (従来通り)
|
244 |
-
# ----------------------
|
245 |
latest_version = model_versions[0]
|
246 |
latest_files = latest_version["files"]
|
|
|
247 |
for file_info in latest_files:
|
248 |
download_url = file_info["downloadUrl"]
|
249 |
file_name = file_info["name"]
|
@@ -275,76 +273,67 @@ class CivitAICrawler:
|
|
275 |
except Exception as e:
|
276 |
logger.error(f"Failed to create dummy file for {file_name}: {e}")
|
277 |
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
for version in model_versions[1:]:
|
287 |
-
for file_info in version["files"]:
|
288 |
-
file_name = file_info["name"]
|
289 |
-
|
290 |
-
# 既にアップロード済み (existing_old_version_files) の場合はスキップ
|
291 |
-
if file_name in existing_old_version_files:
|
292 |
-
logger.info(f"Skipping download of existing old version file: {file_name}")
|
293 |
-
continue
|
294 |
|
295 |
-
|
296 |
-
|
297 |
-
|
|
|
298 |
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
|
|
|
|
317 |
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
logger.error(f"Failed to create dummy file for {file_name}: {e}")
|
330 |
-
# 失敗時はアップロードせずに次のファイルへ
|
331 |
-
continue
|
332 |
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
self.upload_file_encrypted(
|
339 |
-
file_path=local_file_path,
|
340 |
-
path_in_repo=f"{folder}/old_versions/{file_name}"
|
341 |
-
)
|
342 |
-
# アップロードが問題なく完了したので削除
|
343 |
-
os.remove(local_file_path)
|
344 |
-
logger.info(f"Removed local old version file: {local_file_path}")
|
345 |
-
except Exception as e:
|
346 |
-
logger.error(f"Error uploading and removing old version file {local_file_path}: {e}")
|
347 |
-
# ### ここまで修正 ###
|
348 |
|
349 |
def download_images(self, model_versions: list, folder: str):
|
350 |
images_folder = os.path.join(folder, "images")
|
@@ -514,68 +503,62 @@ class CivitAICrawler:
|
|
514 |
return {}
|
515 |
|
516 |
def process_model(self, model_url: str):
|
517 |
-
"""
|
|
|
|
|
|
|
518 |
try:
|
519 |
model_id = model_url.rstrip("/").split("/")[-1]
|
520 |
model_info = self.get_model_info(model_id)
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
latest_filename = first_file['name']
|
533 |
-
folder = os.path.splitext(latest_filename)[0]
|
534 |
-
logger.warning(f"No 'Model' type file found for model ID {model_id}. Using first file's name.")
|
535 |
-
|
536 |
os.makedirs(folder, exist_ok=True)
|
537 |
-
|
538 |
-
#
|
539 |
-
|
540 |
-
|
541 |
-
#
|
542 |
-
|
543 |
-
# あるいは model_id (str) をキーにするか、運用に合わせて設定してください。
|
544 |
-
# 例として modelpage_name をキーとしてチェックする流れ:
|
545 |
-
modelpage_name = model_info.get("name", "Unnamed Model")
|
546 |
-
|
547 |
-
if modelpage_name in model_list.values():
|
548 |
-
# 既に同モデルページ名がアップロード済み → ここでスキップや上書きなどの処理を決定
|
549 |
-
logger.info(f"Model '{modelpage_name}' is already listed in model_list. Skipping re-upload.")
|
550 |
-
# もし「強制再アップロード」したくないなら return で処理終了:
|
551 |
-
# return
|
552 |
-
# あるいは「強制アップするがバージョンだけ追加」などいろいろ処理が可能
|
553 |
-
# ここではあえて続行するが、必要に応じて書き換えてください。
|
554 |
-
|
555 |
-
# ダウンロードや画像保存
|
556 |
-
existing_old_version_files = []
|
557 |
-
self.download_model(model_info["modelVersions"], folder, existing_old_version_files)
|
558 |
-
self.download_images(model_info["modelVersions"], folder)
|
559 |
self.save_html_content(model_url, folder)
|
560 |
self.save_model_info(model_info, folder)
|
561 |
-
|
562 |
-
#
|
|
|
|
|
|
|
563 |
encrypted_top_name = self.upload_folder_encrypted(folder)
|
564 |
-
|
565 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
566 |
model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{encrypted_top_name}"
|
567 |
-
|
568 |
-
# model_list.log に追記 → "modelpage_name: model_hf_url" 形式
|
569 |
with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
|
570 |
f.write(f"{modelpage_name}: {model_hf_url}\n")
|
571 |
-
|
572 |
-
# ローカルフォルダ削除
|
573 |
-
if os.path.exists(folder):
|
574 |
-
shutil.rmtree(folder)
|
575 |
-
|
576 |
-
except Exception as e:
|
577 |
-
logger.error(f"Unexpected error processing model ({model_url}): {e}")
|
578 |
|
|
|
|
|
579 |
|
580 |
async def crawl(self):
|
581 |
"""モデルを定期的にチェックし、更新を行う。"""
|
|
|
233 |
except requests.RequestException as e:
|
234 |
logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
|
235 |
|
236 |
+
def download_model(self, model_versions: list, folder: str):
|
237 |
"""
|
238 |
+
- 最新バージョンのファイルを「folder」へまとめてダウンロード
|
239 |
+
- 古いバージョンはまだダウンロードしない (実は後から個別にダウンロード)
|
240 |
+
(※今回は「まとめて old_versions フォルダに入れずに、1ファイルずつ別のメソッドで対処する」流れに)
|
241 |
"""
|
|
|
|
|
|
|
242 |
latest_version = model_versions[0]
|
243 |
latest_files = latest_version["files"]
|
244 |
+
|
245 |
for file_info in latest_files:
|
246 |
download_url = file_info["downloadUrl"]
|
247 |
file_name = file_info["name"]
|
|
|
273 |
except Exception as e:
|
274 |
logger.error(f"Failed to create dummy file for {file_name}: {e}")
|
275 |
|
276 |
+
def download_old_version_file_and_upload(self, file_info, encrypted_top_name: str):
|
277 |
+
"""
|
278 |
+
古いバージョンのファイルを1個ダウンロードしたら即アップロードし、ローカル削除。
|
279 |
+
- file_info: CivitAI API で得られる 'files' の要素(dict)
|
280 |
+
- encrypted_top_name: すでにアップ済みのフォルダのトップ (ランダム名)
|
281 |
+
"""
|
282 |
+
file_name = file_info["name"]
|
283 |
+
download_url = file_info["downloadUrl"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
284 |
|
285 |
+
# 一時フォルダ作成
|
286 |
+
temp_folder = "temp_old_file"
|
287 |
+
os.makedirs(temp_folder, exist_ok=True)
|
288 |
+
local_path = os.path.join(temp_folder, file_name)
|
289 |
|
290 |
+
login_detected_count = 0
|
291 |
+
while login_detected_count < 5:
|
292 |
+
try:
|
293 |
+
self.download_file(download_url, temp_folder, file_name)
|
294 |
+
except Exception as e:
|
295 |
+
logger.error(f"Exception while downloading old file {file_name}: {e}")
|
296 |
+
login_detected_count += 1
|
297 |
+
continue
|
298 |
+
|
299 |
+
if "login" in os.listdir(temp_folder):
|
300 |
+
login_detected_count += 1
|
301 |
+
logger.warning(f"'login' file found while downloading {file_name}. Retry {login_detected_count}/5.")
|
302 |
+
os.remove(os.path.join(temp_folder, "login"))
|
303 |
+
else:
|
304 |
+
logger.info(f"Successfully downloaded old version file: {file_name}")
|
305 |
+
break
|
306 |
|
307 |
+
if login_detected_count >= 5:
|
308 |
+
dummy_file_name = f"{file_name}.download_failed"
|
309 |
+
dummy_file_path = os.path.join(temp_folder, dummy_file_name)
|
310 |
+
try:
|
311 |
+
with open(dummy_file_path, "w") as f:
|
312 |
+
f.write("Download failed after 5 attempts.")
|
313 |
+
logger.error(f"Failed to download {file_name} -> created dummy: {dummy_file_name}")
|
314 |
+
except Exception as e:
|
315 |
+
logger.error(f"Failed to create dummy file for old version {file_name}: {e}")
|
316 |
+
# ダウンロード失敗したのでこのファイルはアップロードしない
|
317 |
+
shutil.rmtree(temp_folder, ignore_errors=True)
|
318 |
+
return
|
319 |
|
320 |
+
# ダウンロード成功したので、暗号化してアップロード(既存のトップフォルダに追加)
|
321 |
+
try:
|
322 |
+
# encrypt_with_rclone(temp_folder) で temp_folder 全体を暗号化
|
323 |
+
# → upload_encrypted_files(... base_path_in_repo=encrypted_top_name) で
|
324 |
+
# 既存フォルダ(encrypted_top_name) 以下にファイルを追加
|
325 |
+
self.encrypt_with_rclone(temp_folder)
|
326 |
+
self.upload_encrypted_files(
|
327 |
+
repo_id=self.repo_ids["current"],
|
328 |
+
base_path_in_repo=encrypted_top_name # ★ここがポイント★
|
329 |
+
)
|
330 |
+
logger.info(f"Uploaded old version file: {file_name} into {encrypted_top_name}")
|
|
|
|
|
|
|
331 |
|
332 |
+
except Exception as e:
|
333 |
+
logger.error(f"Error uploading old version file {file_name}: {e}")
|
334 |
+
|
335 |
+
# 一時フォルダを削除
|
336 |
+
shutil.rmtree(temp_folder, ignore_errors=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
337 |
|
338 |
def download_images(self, model_versions: list, folder: str):
|
339 |
images_folder = os.path.join(folder, "images")
|
|
|
503 |
return {}
|
504 |
|
505 |
def process_model(self, model_url: str):
|
506 |
+
"""
|
507 |
+
- 最新バージョンをフォルダに一括DL → フォルダごと暗号化アップロード
|
508 |
+
- 古いバージョンはファイル単位で即アップロード
|
509 |
+
"""
|
510 |
try:
|
511 |
model_id = model_url.rstrip("/").split("/")[-1]
|
512 |
model_info = self.get_model_info(model_id)
|
513 |
+
|
514 |
+
# =====================================
|
515 |
+
# 1) フォルダ作成 & 最新バージョンDL
|
516 |
+
# =====================================
|
517 |
+
model_versions = model_info.get("modelVersions", [])
|
518 |
+
if not model_versions:
|
519 |
+
logger.warning(f"No versions found for model ID {model_id}")
|
520 |
+
return
|
521 |
+
|
522 |
+
# 例: 最新バージョン用フォルダ名を適当に
|
523 |
+
folder = f"model_{model_id}_latest"
|
|
|
|
|
|
|
|
|
524 |
os.makedirs(folder, exist_ok=True)
|
525 |
+
|
526 |
+
# download_model() で最新バージョンだけダウンロード
|
527 |
+
self.download_model(model_versions, folder)
|
528 |
+
|
529 |
+
# 画像ダウンロード & HTML保存 & info.json保存(従来通り)
|
530 |
+
self.download_images(model_versions, folder)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
self.save_html_content(model_url, folder)
|
532 |
self.save_model_info(model_info, folder)
|
533 |
+
|
534 |
+
# =====================================
|
535 |
+
# 2) 最新バージョンのフォルダを丸ごと暗号化 & アップロード
|
536 |
+
# → Hugging Face 側には"ランダムフォルダ名"が作られる
|
537 |
+
# =====================================
|
538 |
encrypted_top_name = self.upload_folder_encrypted(folder)
|
539 |
+
logger.info(f"[MAIN] Uploaded latest version folder => {encrypted_top_name}")
|
540 |
+
|
541 |
+
# ローカル削除
|
542 |
+
shutil.rmtree(folder, ignore_errors=True)
|
543 |
+
|
544 |
+
# =====================================
|
545 |
+
# 3) 古いバージョンのファイルを1つずつDL → 同じフォルダに追加
|
546 |
+
# =====================================
|
547 |
+
if len(model_versions) > 1:
|
548 |
+
for version in model_versions[1:]:
|
549 |
+
for file_info in version["files"]:
|
550 |
+
self.download_old_version_file_and_upload(file_info, encrypted_top_name)
|
551 |
+
|
552 |
+
# =====================================
|
553 |
+
# 4) model_list.log への登録やその他の処理
|
554 |
+
# =====================================
|
555 |
+
modelpage_name = model_info.get("name", f"Model_{model_id}")
|
556 |
model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{encrypted_top_name}"
|
|
|
|
|
557 |
with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
|
558 |
f.write(f"{modelpage_name}: {model_hf_url}\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
559 |
|
560 |
+
except Exception as e:
|
561 |
+
logger.error(f"Unexpected error in process_model ({model_url}): {e}")
|
562 |
|
563 |
async def crawl(self):
|
564 |
"""モデルを定期的にチェックし、更新を行う。"""
|