Update main.py
Browse files
main.py
CHANGED
@@ -114,7 +114,6 @@ class CivitAICrawler:
|
|
114 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
115 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
116 |
|
117 |
-
# コピー先: cryptLocal:{basename(local_path)}
|
118 |
top_level_name = os.path.basename(local_path.rstrip("/"))
|
119 |
if not top_level_name:
|
120 |
top_level_name = "unnamed"
|
@@ -161,7 +160,6 @@ class CivitAICrawler:
|
|
161 |
logger.warning("Repository file limit exceeded. Creating a new repository...")
|
162 |
self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
|
163 |
self.api.create_repo(repo_id=self.repo_ids['current'], private=True)
|
164 |
-
# リポジトリが変わったので attempt をリセット
|
165 |
attempt = 0
|
166 |
repo_id = self.repo_ids['current']
|
167 |
continue
|
@@ -180,13 +178,8 @@ class CivitAICrawler:
|
|
180 |
)
|
181 |
raise
|
182 |
|
183 |
-
# =============================
|
184 |
-
# ここから既存処理
|
185 |
-
# =============================
|
186 |
-
|
187 |
@staticmethod
|
188 |
def get_filename_from_cd(content_disposition: Optional[str], default_name: str) -> str:
|
189 |
-
"""Content-Dispositionヘッダーからファイル名を取得する。"""
|
190 |
if content_disposition:
|
191 |
parts = content_disposition.split(';')
|
192 |
for part in parts:
|
@@ -195,7 +188,6 @@ class CivitAICrawler:
|
|
195 |
return default_name
|
196 |
|
197 |
def download_file(self, url: str, destination_folder: str, default_name: str):
|
198 |
-
"""指定されたURLからファイルをダウンロードし、指定されたフォルダに保存する。"""
|
199 |
try:
|
200 |
response = requests.get(url, headers=self.config.HEADERS, stream=True)
|
201 |
response.raise_for_status()
|
@@ -212,7 +204,6 @@ class CivitAICrawler:
|
|
212 |
logger.info(f"Download completed: {file_path}")
|
213 |
|
214 |
def get_model_info(self, model_id: str) -> dict:
|
215 |
-
"""モデルの情報を取得する。"""
|
216 |
try:
|
217 |
response = requests.get(self.config.URLS["modelId"] + str(model_id), headers=self.config.HEADERS)
|
218 |
response.raise_for_status()
|
@@ -221,7 +212,6 @@ class CivitAICrawler:
|
|
221 |
logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
|
222 |
|
223 |
def download_model(self, model_versions: list, folder: str, existing_old_version_files: list = []):
|
224 |
-
"""モデルのバージョンをダウンロードする。"""
|
225 |
latest_version = model_versions[0]
|
226 |
latest_files = latest_version["files"]
|
227 |
for file_info in latest_files:
|
@@ -294,10 +284,8 @@ class CivitAICrawler:
|
|
294 |
logger.error(f"Failed to download {file_name}. Created dummy file {dummy_file_name}. URL: {download_url}")
|
295 |
except Exception as e:
|
296 |
logger.error(f"Failed to create dummy file for {file_name}: {e}")
|
297 |
-
continue
|
298 |
|
299 |
def download_images(self, model_versions: list, folder: str):
|
300 |
-
"""モデルの画像をダウンロードし、指定されたフォルダに保存する。"""
|
301 |
images_folder = os.path.join(folder, "images")
|
302 |
os.makedirs(images_folder, exist_ok=True)
|
303 |
|
@@ -317,7 +305,6 @@ class CivitAICrawler:
|
|
317 |
except requests.RequestException as e:
|
318 |
logger.error(f"Error downloading image {image_url}: {e}")
|
319 |
|
320 |
-
# 画像フォルダをパスワード付きZIP
|
321 |
try:
|
322 |
original_cwd = os.getcwd()
|
323 |
os.chdir(folder)
|
@@ -332,7 +319,6 @@ class CivitAICrawler:
|
|
332 |
shutil.rmtree(images_folder)
|
333 |
|
334 |
def save_html_content(self, url: str, folder: str):
|
335 |
-
"""指定されたURLからHTMLコンテンツを取得し、保存する。"""
|
336 |
try:
|
337 |
response = requests.get(url)
|
338 |
response.raise_for_status()
|
@@ -344,7 +330,6 @@ class CivitAICrawler:
|
|
344 |
|
345 |
@staticmethod
|
346 |
def save_model_info(model_info: dict, folder: str):
|
347 |
-
"""モデル情報(json)の保存"""
|
348 |
with open(os.path.join(folder, "model_info.json"), "w") as file:
|
349 |
json.dump(model_info, file, indent=2)
|
350 |
|
@@ -358,7 +343,7 @@ class CivitAICrawler:
|
|
358 |
return f"{repo_id}1"
|
359 |
|
360 |
# =============================================================================
|
361 |
-
#
|
362 |
# =============================================================================
|
363 |
def upload_file_raw(
|
364 |
self,
|
@@ -366,10 +351,6 @@ class CivitAICrawler:
|
|
366 |
repo_id: Optional[str] = None,
|
367 |
path_in_repo: Optional[str] = None
|
368 |
):
|
369 |
-
"""
|
370 |
-
暗号化せず、そのまま Hugging Face にアップロードするメソッド。
|
371 |
-
civitai_backup.log や model_list.log などはこれを使う。
|
372 |
-
"""
|
373 |
if repo_id is None:
|
374 |
repo_id = self.repo_ids['current']
|
375 |
if path_in_repo is None:
|
@@ -393,7 +374,6 @@ class CivitAICrawler:
|
|
393 |
logger.warning("Repository file limit exceeded, creating a new repository.")
|
394 |
self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
|
395 |
self.api.create_repo(repo_id=self.repo_ids['current'], private=True)
|
396 |
-
# リポジトリ変更で attempt リセット
|
397 |
attempt = 0
|
398 |
repo_id = self.repo_ids['current']
|
399 |
continue
|
@@ -409,7 +389,7 @@ class CivitAICrawler:
|
|
409 |
raise
|
410 |
|
411 |
# =============================================================================
|
412 |
-
#
|
413 |
# =============================================================================
|
414 |
def upload_file_encrypted(
|
415 |
self,
|
@@ -417,25 +397,18 @@ class CivitAICrawler:
|
|
417 |
repo_id: Optional[str] = None,
|
418 |
path_in_repo: Optional[str] = None
|
419 |
):
|
420 |
-
"""
|
421 |
-
単一ファイルを rclone で暗号化し、そのままHFへアップロード。
|
422 |
-
"""
|
423 |
if repo_id is None:
|
424 |
repo_id = self.repo_ids['current']
|
425 |
-
base_path = path_in_repo or ""
|
426 |
|
427 |
-
# 1) rclone で暗号化
|
428 |
self.encrypt_with_rclone(file_path)
|
429 |
-
|
430 |
-
# 2) アップロード
|
431 |
self.upload_encrypted_files(repo_id=repo_id, base_path_in_repo=base_path)
|
432 |
|
433 |
-
# 3) 後始末
|
434 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
435 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
436 |
|
437 |
# =============================================================================
|
438 |
-
#
|
439 |
# =============================================================================
|
440 |
def upload_folder_encrypted(
|
441 |
self,
|
@@ -443,20 +416,12 @@ class CivitAICrawler:
|
|
443 |
repo_id: Optional[str] = None,
|
444 |
path_in_repo: Optional[str] = None
|
445 |
) -> str:
|
446 |
-
"""
|
447 |
-
フォルダを rclone で暗号化し、暗号化されたフォルダ構造ごとアップロード。
|
448 |
-
終了後に「実際に Hugging Face 上で使われる暗号化後のトップレベルフォルダ名」を返す。
|
449 |
-
"""
|
450 |
if repo_id is None:
|
451 |
repo_id = self.repo_ids['current']
|
452 |
base_path = path_in_repo or ""
|
453 |
|
454 |
-
# 1) rcloneにコピーして暗号化
|
455 |
self.encrypt_with_rclone(folder_path)
|
456 |
|
457 |
-
# 2) 暗号後のトップディレクトリ名を取得
|
458 |
-
# 例: /home/user/app/encrypted/<暗号フォルダ名>
|
459 |
-
# 基本的にトップレベルディレクトリは1つ想定
|
460 |
top_levels = [
|
461 |
d for d in os.listdir(self.config.ENCRYPTED_DIR)
|
462 |
if os.path.isdir(os.path.join(self.config.ENCRYPTED_DIR, d))
|
@@ -468,49 +433,49 @@ class CivitAICrawler:
|
|
468 |
|
469 |
encrypted_top_name = top_levels[0]
|
470 |
|
471 |
-
# 3) アップロード
|
472 |
self.upload_encrypted_files(repo_id=repo_id, base_path_in_repo=base_path)
|
473 |
|
474 |
-
# 4) 後始末
|
475 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
476 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
477 |
|
478 |
-
# 5) 実際にHFに作られた「暗号化後のトップフォルダ名」を返す
|
479 |
return encrypted_top_name
|
480 |
|
|
|
|
|
|
|
481 |
def read_model_list(self):
|
482 |
-
"""
|
|
|
|
|
|
|
|
|
483 |
model_list = {}
|
484 |
try:
|
485 |
with open(self.config.LIST_FILE, "r", encoding="utf-8") as f:
|
486 |
for line in f:
|
487 |
line = line.strip()
|
488 |
-
if line:
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
|
|
493 |
return model_list
|
494 |
except Exception as e:
|
495 |
logger.error(f"Failed to read model list: {e}")
|
496 |
return {}
|
497 |
|
498 |
-
def get_repo_info(self, repo_id):
|
499 |
-
"""リポジトリの情報を取得する。"""
|
500 |
-
try:
|
501 |
-
repo_info = self.api.repo_info(repo_id=repo_id, files_metadata=True)
|
502 |
-
file_paths = [sibling.rfilename for sibling in repo_info.siblings]
|
503 |
-
return file_paths
|
504 |
-
except Exception as e:
|
505 |
-
logger.error(f"Failed to get repo info for {repo_id}: {e}")
|
506 |
-
return []
|
507 |
-
|
508 |
def process_model(self, model_url: str):
|
509 |
"""指定されたモデルURLを処理する関数。"""
|
510 |
try:
|
|
|
511 |
model_id = model_url.rstrip("/").split("/")[-1]
|
512 |
model_info = self.get_model_info(model_id)
|
|
|
|
|
|
|
513 |
|
|
|
514 |
latest_version = model_info.get("modelVersions", [])[0]
|
515 |
model_file = next(
|
516 |
(file for file in latest_version["files"] if file.get('type') == 'Model'),
|
@@ -520,6 +485,7 @@ class CivitAICrawler:
|
|
520 |
latest_filename = model_file['name']
|
521 |
folder = os.path.splitext(latest_filename)[0]
|
522 |
else:
|
|
|
523 |
first_file = latest_version["files"][0]
|
524 |
latest_filename = first_file['name']
|
525 |
folder = os.path.splitext(latest_filename)[0]
|
@@ -527,27 +493,28 @@ class CivitAICrawler:
|
|
527 |
|
528 |
os.makedirs(folder, exist_ok=True)
|
529 |
|
530 |
-
|
|
|
|
|
|
|
531 |
|
532 |
-
|
533 |
-
|
534 |
-
self.download_model(model_info["modelVersions"], folder, existing_old_version_files)
|
535 |
self.download_images(model_info["modelVersions"], folder)
|
536 |
self.save_html_content(model_url, folder)
|
537 |
self.save_model_info(model_info, folder)
|
538 |
|
539 |
-
#
|
540 |
encrypted_top_name = self.upload_folder_encrypted(folder)
|
541 |
|
542 |
-
#
|
543 |
-
|
544 |
-
|
545 |
-
model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{encrypted_top_name}"
|
546 |
|
547 |
with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
|
548 |
-
f.write(f"{
|
549 |
|
550 |
-
#
|
551 |
if os.path.exists(folder):
|
552 |
shutil.rmtree(folder)
|
553 |
|
@@ -560,11 +527,10 @@ class CivitAICrawler:
|
|
560 |
try:
|
561 |
login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
|
562 |
|
563 |
-
# model_list.log
|
564 |
model_list_path = hf_hub_download(repo_id=self.repo_ids['model_list'], filename=self.config.LIST_FILE)
|
565 |
shutil.copyfile(model_list_path, f"./{self.config.LIST_FILE}")
|
566 |
|
567 |
-
# ログファイルのダウンロード(暗号化せず上書き)
|
568 |
local_file_path = hf_hub_download(repo_id=self.repo_ids["log"], filename=self.config.LOG_FILE)
|
569 |
shutil.copyfile(local_file_path, f"./{self.config.LOG_FILE}")
|
570 |
|
@@ -574,18 +540,19 @@ class CivitAICrawler:
|
|
574 |
old_models = json.loads(lines[0]) if len(lines) > 0 else []
|
575 |
self.repo_ids["current"] = lines[1] if len(lines) > 1 else ""
|
576 |
|
577 |
-
#
|
578 |
response = requests.get(self.config.URLS["latest"], headers=self.config.HEADERS)
|
579 |
response.raise_for_status()
|
580 |
latest_models = response.json().get("items", [])
|
581 |
latest_model_ids = [item.get("id") for item in latest_models if "id" in item]
|
582 |
|
583 |
-
#
|
584 |
new_models = list(set(latest_model_ids) - set(old_models))
|
585 |
|
586 |
if new_models:
|
587 |
logger.info(f"New models found: {new_models}")
|
588 |
model_id = new_models[0]
|
|
|
589 |
for attempt in range(1, 6):
|
590 |
try:
|
591 |
self.process_model(f"{self.config.URLS['modelId']}{model_id}")
|
@@ -597,13 +564,12 @@ class CivitAICrawler:
|
|
597 |
else:
|
598 |
await asyncio.sleep(2)
|
599 |
else:
|
600 |
-
# 新モデルなし
|
601 |
with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
|
602 |
f.write(json.dumps(latest_model_ids) + "\n")
|
603 |
f.write(f"{self.repo_ids['current']}\n")
|
604 |
logger.info(f"Updated log file: {self.config.LOG_FILE}")
|
605 |
|
606 |
-
# ログファイルをリポジトリにアップロード(暗号化しない)
|
607 |
self.upload_file_raw(
|
608 |
file_path=self.config.LOG_FILE,
|
609 |
repo_id=self.repo_ids["log"],
|
@@ -615,16 +581,16 @@ class CivitAICrawler:
|
|
615 |
await asyncio.sleep(60)
|
616 |
continue
|
617 |
|
618 |
-
#
|
619 |
old_models.append(model_id)
|
620 |
|
621 |
-
#
|
622 |
with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
|
623 |
f.write(json.dumps(old_models) + "\n")
|
624 |
f.write(f"{self.repo_ids['current']}\n")
|
625 |
logger.info(f"Updated log file with new model ID: {model_id}")
|
626 |
|
627 |
-
#
|
628 |
self.upload_file_raw(
|
629 |
file_path=self.config.LOG_FILE,
|
630 |
repo_id=self.repo_ids["log"],
|
@@ -641,7 +607,7 @@ class CivitAICrawler:
|
|
641 |
await asyncio.sleep(300)
|
642 |
|
643 |
|
644 |
-
#
|
645 |
config = Config()
|
646 |
crawler = CivitAICrawler(config)
|
647 |
app = crawler.app
|
|
|
114 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
115 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
116 |
|
|
|
117 |
top_level_name = os.path.basename(local_path.rstrip("/"))
|
118 |
if not top_level_name:
|
119 |
top_level_name = "unnamed"
|
|
|
160 |
logger.warning("Repository file limit exceeded. Creating a new repository...")
|
161 |
self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
|
162 |
self.api.create_repo(repo_id=self.repo_ids['current'], private=True)
|
|
|
163 |
attempt = 0
|
164 |
repo_id = self.repo_ids['current']
|
165 |
continue
|
|
|
178 |
)
|
179 |
raise
|
180 |
|
|
|
|
|
|
|
|
|
181 |
@staticmethod
|
182 |
def get_filename_from_cd(content_disposition: Optional[str], default_name: str) -> str:
|
|
|
183 |
if content_disposition:
|
184 |
parts = content_disposition.split(';')
|
185 |
for part in parts:
|
|
|
188 |
return default_name
|
189 |
|
190 |
def download_file(self, url: str, destination_folder: str, default_name: str):
|
|
|
191 |
try:
|
192 |
response = requests.get(url, headers=self.config.HEADERS, stream=True)
|
193 |
response.raise_for_status()
|
|
|
204 |
logger.info(f"Download completed: {file_path}")
|
205 |
|
206 |
def get_model_info(self, model_id: str) -> dict:
|
|
|
207 |
try:
|
208 |
response = requests.get(self.config.URLS["modelId"] + str(model_id), headers=self.config.HEADERS)
|
209 |
response.raise_for_status()
|
|
|
212 |
logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
|
213 |
|
214 |
def download_model(self, model_versions: list, folder: str, existing_old_version_files: list = []):
|
|
|
215 |
latest_version = model_versions[0]
|
216 |
latest_files = latest_version["files"]
|
217 |
for file_info in latest_files:
|
|
|
284 |
logger.error(f"Failed to download {file_name}. Created dummy file {dummy_file_name}. URL: {download_url}")
|
285 |
except Exception as e:
|
286 |
logger.error(f"Failed to create dummy file for {file_name}: {e}")
|
|
|
287 |
|
288 |
def download_images(self, model_versions: list, folder: str):
|
|
|
289 |
images_folder = os.path.join(folder, "images")
|
290 |
os.makedirs(images_folder, exist_ok=True)
|
291 |
|
|
|
305 |
except requests.RequestException as e:
|
306 |
logger.error(f"Error downloading image {image_url}: {e}")
|
307 |
|
|
|
308 |
try:
|
309 |
original_cwd = os.getcwd()
|
310 |
os.chdir(folder)
|
|
|
319 |
shutil.rmtree(images_folder)
|
320 |
|
321 |
def save_html_content(self, url: str, folder: str):
|
|
|
322 |
try:
|
323 |
response = requests.get(url)
|
324 |
response.raise_for_status()
|
|
|
330 |
|
331 |
@staticmethod
|
332 |
def save_model_info(model_info: dict, folder: str):
|
|
|
333 |
with open(os.path.join(folder, "model_info.json"), "w") as file:
|
334 |
json.dump(model_info, file, indent=2)
|
335 |
|
|
|
343 |
return f"{repo_id}1"
|
344 |
|
345 |
# =============================================================================
|
346 |
+
# 暗号化しないアップロード(ログや model_list.log 用)
|
347 |
# =============================================================================
|
348 |
def upload_file_raw(
|
349 |
self,
|
|
|
351 |
repo_id: Optional[str] = None,
|
352 |
path_in_repo: Optional[str] = None
|
353 |
):
|
|
|
|
|
|
|
|
|
354 |
if repo_id is None:
|
355 |
repo_id = self.repo_ids['current']
|
356 |
if path_in_repo is None:
|
|
|
374 |
logger.warning("Repository file limit exceeded, creating a new repository.")
|
375 |
self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
|
376 |
self.api.create_repo(repo_id=self.repo_ids['current'], private=True)
|
|
|
377 |
attempt = 0
|
378 |
repo_id = self.repo_ids['current']
|
379 |
continue
|
|
|
389 |
raise
|
390 |
|
391 |
# =============================================================================
|
392 |
+
# 暗号化してアップロード (単ファイル)
|
393 |
# =============================================================================
|
394 |
def upload_file_encrypted(
|
395 |
self,
|
|
|
397 |
repo_id: Optional[str] = None,
|
398 |
path_in_repo: Optional[str] = None
|
399 |
):
|
|
|
|
|
|
|
400 |
if repo_id is None:
|
401 |
repo_id = self.repo_ids['current']
|
402 |
+
base_path = path_in_repo or ""
|
403 |
|
|
|
404 |
self.encrypt_with_rclone(file_path)
|
|
|
|
|
405 |
self.upload_encrypted_files(repo_id=repo_id, base_path_in_repo=base_path)
|
406 |
|
|
|
407 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
408 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
409 |
|
410 |
# =============================================================================
|
411 |
+
# 暗号化してアップロード (フォルダ)
|
412 |
# =============================================================================
|
413 |
def upload_folder_encrypted(
|
414 |
self,
|
|
|
416 |
repo_id: Optional[str] = None,
|
417 |
path_in_repo: Optional[str] = None
|
418 |
) -> str:
|
|
|
|
|
|
|
|
|
419 |
if repo_id is None:
|
420 |
repo_id = self.repo_ids['current']
|
421 |
base_path = path_in_repo or ""
|
422 |
|
|
|
423 |
self.encrypt_with_rclone(folder_path)
|
424 |
|
|
|
|
|
|
|
425 |
top_levels = [
|
426 |
d for d in os.listdir(self.config.ENCRYPTED_DIR)
|
427 |
if os.path.isdir(os.path.join(self.config.ENCRYPTED_DIR, d))
|
|
|
433 |
|
434 |
encrypted_top_name = top_levels[0]
|
435 |
|
|
|
436 |
self.upload_encrypted_files(repo_id=repo_id, base_path_in_repo=base_path)
|
437 |
|
|
|
438 |
if os.path.isdir(self.config.ENCRYPTED_DIR):
|
439 |
shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
|
440 |
|
|
|
441 |
return encrypted_top_name
|
442 |
|
443 |
+
# =============================================================================
|
444 |
+
# model_list.log の読み書きを「model_id: model_hf_url」で扱うよう変更
|
445 |
+
# =============================================================================
|
446 |
def read_model_list(self):
|
447 |
+
"""
|
448 |
+
model_list.log の各行を
|
449 |
+
"123456: https://huggingface.co/...encrypted_folder_name"
|
450 |
+
の形式で読み込み、 { "123456": "https://huggingface.co/..."} の dict を返す
|
451 |
+
"""
|
452 |
model_list = {}
|
453 |
try:
|
454 |
with open(self.config.LIST_FILE, "r", encoding="utf-8") as f:
|
455 |
for line in f:
|
456 |
line = line.strip()
|
457 |
+
if not line:
|
458 |
+
continue
|
459 |
+
parts = line.split(": ", 1)
|
460 |
+
if len(parts) == 2:
|
461 |
+
stored_id, stored_url = parts
|
462 |
+
model_list[stored_id] = stored_url
|
463 |
return model_list
|
464 |
except Exception as e:
|
465 |
logger.error(f"Failed to read model list: {e}")
|
466 |
return {}
|
467 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
468 |
def process_model(self, model_url: str):
|
469 |
"""指定されたモデルURLを処理する関数。"""
|
470 |
try:
|
471 |
+
# ===== 1) モデルID取得 & モデル情報 =====
|
472 |
model_id = model_url.rstrip("/").split("/")[-1]
|
473 |
model_info = self.get_model_info(model_id)
|
474 |
+
if not model_info:
|
475 |
+
logger.error(f"No model info found for ID {model_id}")
|
476 |
+
return
|
477 |
|
478 |
+
# ===== 2) フォルダ名を決める =====
|
479 |
latest_version = model_info.get("modelVersions", [])[0]
|
480 |
model_file = next(
|
481 |
(file for file in latest_version["files"] if file.get('type') == 'Model'),
|
|
|
485 |
latest_filename = model_file['name']
|
486 |
folder = os.path.splitext(latest_filename)[0]
|
487 |
else:
|
488 |
+
# 'Model'タイプファイルが無い場合
|
489 |
first_file = latest_version["files"][0]
|
490 |
latest_filename = first_file['name']
|
491 |
folder = os.path.splitext(latest_filename)[0]
|
|
|
493 |
|
494 |
os.makedirs(folder, exist_ok=True)
|
495 |
|
496 |
+
# ===== 3) model_list.log を読んで「既に同IDがあるかチェック」=====
|
497 |
+
current_list = self.read_model_list()
|
498 |
+
if str(model_id) in current_list:
|
499 |
+
logger.info(f"Model ID {model_id} is already in model_list.log. (No skip in this example)")
|
500 |
|
501 |
+
# ===== 4) ダウンロード処理 =====
|
502 |
+
self.download_model(model_info["modelVersions"], folder)
|
|
|
503 |
self.download_images(model_info["modelVersions"], folder)
|
504 |
self.save_html_content(model_url, folder)
|
505 |
self.save_model_info(model_info, folder)
|
506 |
|
507 |
+
# ===== 5) rclone で暗号化フォルダをアップロード =====
|
508 |
encrypted_top_name = self.upload_folder_encrypted(folder)
|
509 |
|
510 |
+
# ===== 6) model_list.log に "{model_id}: {URL}" 形式で追記 =====
|
511 |
+
# 暗号化されたトップフォルダ名をURLに含める
|
512 |
+
final_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{encrypted_top_name}"
|
|
|
513 |
|
514 |
with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
|
515 |
+
f.write(f"{model_id}: {final_url}\n")
|
516 |
|
517 |
+
# ===== 7) ローカルフォルダ掃除 =====
|
518 |
if os.path.exists(folder):
|
519 |
shutil.rmtree(folder)
|
520 |
|
|
|
527 |
try:
|
528 |
login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
|
529 |
|
530 |
+
# model_list.log & civitai_backup.log を取得
|
531 |
model_list_path = hf_hub_download(repo_id=self.repo_ids['model_list'], filename=self.config.LIST_FILE)
|
532 |
shutil.copyfile(model_list_path, f"./{self.config.LIST_FILE}")
|
533 |
|
|
|
534 |
local_file_path = hf_hub_download(repo_id=self.repo_ids["log"], filename=self.config.LOG_FILE)
|
535 |
shutil.copyfile(local_file_path, f"./{self.config.LOG_FILE}")
|
536 |
|
|
|
540 |
old_models = json.loads(lines[0]) if len(lines) > 0 else []
|
541 |
self.repo_ids["current"] = lines[1] if len(lines) > 1 else ""
|
542 |
|
543 |
+
# 新着モデル確認
|
544 |
response = requests.get(self.config.URLS["latest"], headers=self.config.HEADERS)
|
545 |
response.raise_for_status()
|
546 |
latest_models = response.json().get("items", [])
|
547 |
latest_model_ids = [item.get("id") for item in latest_models if "id" in item]
|
548 |
|
549 |
+
# 増分チェック
|
550 |
new_models = list(set(latest_model_ids) - set(old_models))
|
551 |
|
552 |
if new_models:
|
553 |
logger.info(f"New models found: {new_models}")
|
554 |
model_id = new_models[0]
|
555 |
+
|
556 |
for attempt in range(1, 6):
|
557 |
try:
|
558 |
self.process_model(f"{self.config.URLS['modelId']}{model_id}")
|
|
|
564 |
else:
|
565 |
await asyncio.sleep(2)
|
566 |
else:
|
567 |
+
# 新モデルなし
|
568 |
with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
|
569 |
f.write(json.dumps(latest_model_ids) + "\n")
|
570 |
f.write(f"{self.repo_ids['current']}\n")
|
571 |
logger.info(f"Updated log file: {self.config.LOG_FILE}")
|
572 |
|
|
|
573 |
self.upload_file_raw(
|
574 |
file_path=self.config.LOG_FILE,
|
575 |
repo_id=self.repo_ids["log"],
|
|
|
581 |
await asyncio.sleep(60)
|
582 |
continue
|
583 |
|
584 |
+
# 追加したモデルIDを old_models に追加
|
585 |
old_models.append(model_id)
|
586 |
|
587 |
+
# ログファイル更新
|
588 |
with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
|
589 |
f.write(json.dumps(old_models) + "\n")
|
590 |
f.write(f"{self.repo_ids['current']}\n")
|
591 |
logger.info(f"Updated log file with new model ID: {model_id}")
|
592 |
|
593 |
+
# ログとmodel_list.logをアップロード
|
594 |
self.upload_file_raw(
|
595 |
file_path=self.config.LOG_FILE,
|
596 |
repo_id=self.repo_ids["log"],
|
|
|
607 |
await asyncio.sleep(300)
|
608 |
|
609 |
|
610 |
+
# 実行
|
611 |
config = Config()
|
612 |
crawler = CivitAICrawler(config)
|
613 |
app = crawler.app
|