ttttdiva commited on
Commit
a63143e
·
verified ·
1 Parent(s): e811dd4

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +468 -210
main.py CHANGED
@@ -1,4 +1,5 @@
1
  import asyncio
 
2
  import datetime
3
  import json
4
  import logging
@@ -13,20 +14,23 @@ import requests
13
  from bs4 import BeautifulSoup
14
  from fake_useragent import UserAgent
15
  from fastapi import FastAPI
16
- from huggingface_hub import HfApi, hf_hub_download, login
17
 
 
18
  logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
20
 
 
21
  class Config:
 
22
  HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
23
  CIVITAI_API_TOKEN = os.environ["CIVITAI_API_TOKEN"]
24
  LOG_FILE = "civitai_backup.log"
25
  LIST_FILE = "model_list.log"
26
  REPO_IDS = {
27
- "log": "ttttdiva/CivitAI_log_test",
28
  "model_list": "ttttdiva/CivitAI_model_info_test",
29
- "current": ""
30
  }
31
  URLS = {
32
  "latest": "https://civitai.com/api/v1/models?sort=Newest",
@@ -43,281 +47,535 @@ class Config:
43
  "Content-Type": "application/json"
44
  }
45
 
 
 
 
 
 
 
 
46
  class CivitAICrawler:
 
 
47
  def __init__(self, config: Config):
48
  self.config = config
49
  self.api = HfApi()
50
  self.app = FastAPI()
51
  self.repo_ids = self.config.REPO_IDS.copy()
52
  self.jst = self.config.JST
 
 
53
  self.setup_rclone_conf()
54
- self.setup_routes()
55
 
56
- def setup_rclone_conf(self):
57
- import base64
58
- rclone_b64 = os.environ.get("RCLONE_CONF_BASE64", "")
59
- if rclone_b64:
60
- conf_dir = ".rclone_config"
61
- os.makedirs(conf_dir, exist_ok=True)
62
- conf_path = os.path.join(conf_dir, "rclone.conf")
63
- with open(conf_path, "wb") as f:
64
- f.write(base64.b64decode(rclone_b64))
65
- os.environ["RCLONE_CONFIG"] = conf_path
66
- logger.info(f"[OK] Created rclone.conf => {conf_path}")
67
- else:
68
- logger.warning("[WARN] RCLONE_CONF_BASE64 is empty. rclone may fail.")
69
 
70
  def setup_routes(self):
 
 
71
  @self.app.get("/")
72
  def read_root():
73
  now = str(datetime.datetime.now(self.jst))
74
- return {
75
- "description": f"CivitAI crawler. Time: {now}",
76
- "repo_current": self.repo_ids["current"]
77
- }
 
 
 
 
78
 
79
  @self.app.on_event("startup")
80
  async def startup_event():
81
  asyncio.create_task(self.crawl())
82
 
83
- def download_file(self, url: str, dest_folder: str, filename: str) -> Optional[str]:
84
- os.makedirs(dest_folder, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  try:
86
- r = requests.get(url, headers=self.config.HEADERS, stream=True)
87
- r.raise_for_status()
88
  except requests.RequestException as e:
89
- logger.error(f"[ERR] download_file: {e}")
90
- return None
91
 
92
- file_path = os.path.join(dest_folder, filename)
93
- with open(file_path, 'wb') as f:
94
- for chunk in r.iter_content(chunk_size=8192):
95
- f.write(chunk)
96
- logger.info(f"[OK] Downloaded => {file_path}")
97
- return file_path
98
 
99
- def upload_file(self, file_path: str, repo_id: Optional[str]=None, path_in_repo: Optional[str]=None):
100
- if repo_id is None:
101
- repo_id = self.repo_ids["current"]
102
- if path_in_repo is None:
103
- path_in_repo = os.path.basename(file_path)
104
 
 
 
105
  try:
106
- self.api.upload_file(
107
- path_or_fileobj=file_path,
108
- repo_id=repo_id,
109
- path_in_repo=path_in_repo
110
- )
111
- logger.info(f"[OK] Uploaded file => {repo_id}:{path_in_repo}")
112
- except Exception as e:
113
- logger.error(f"[ERR] upload_file: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
- def upload_folder(self, folder_path: str, path_in_repo: Optional[str] = None):
116
- if path_in_repo is None:
117
- path_in_repo = os.path.basename(folder_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
- try:
120
- self.api.upload_folder(
121
- folder_path=folder_path,
122
- repo_id=self.repo_ids["current"],
123
- path_in_repo=path_in_repo
124
- )
125
- logger.info(f"[OK] uploaded folder => {folder_path} => {self.repo_ids['current']}:{path_in_repo}")
126
- except Exception as e:
127
- logger.error(f"[ERR] upload_folder: {e}")
 
128
 
129
- def encrypt_and_upload_folder(self, local_folder: str) -> Optional[str]:
130
- """local_folder -> cryptLocal: => encrypted/??? => upload_folder => cleanup"""
131
- if not os.path.isdir(local_folder):
132
- logger.error(f"[ERR] {local_folder} is not a directory.")
133
- return None
134
 
135
- encrypted_dir = os.path.join(os.getcwd(), "encrypted")
136
- os.makedirs(encrypted_dir, exist_ok=True)
 
 
137
 
138
- before = set(os.listdir(encrypted_dir))
139
- cmd = ["rclone", "copy", local_folder, "cryptLocal:", "--create-empty-src-dirs"]
140
- logger.info(f"[CMD] {' '.join(cmd)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
  try:
142
- subprocess.run(cmd, check=True)
143
- logger.info("[OK] rclone copy => cryptLocal:")
 
 
144
  except subprocess.CalledProcessError as e:
145
- logger.error(f"[ERR] rclone copy failed: {e}")
146
- return None
147
-
148
- after = set(os.listdir(encrypted_dir))
149
- diff = after - before
150
- if not diff:
151
- logger.error("[ERR] no new directory in ./encrypted after copy")
152
- return None
153
- if len(diff) > 1:
154
- logger.warning(f"[WARN] multiple new dirs => {diff}")
155
- enc_name = diff.pop()
156
- enc_path = os.path.join(encrypted_dir, enc_name)
157
- if not os.path.isdir(enc_path):
158
- logger.error(f"[ERR] {enc_path} is not a directory.")
159
- return None
160
-
161
- # HF upload folder
162
  try:
163
- self.upload_folder(enc_path, path_in_repo=enc_name)
 
 
 
 
164
  except Exception as e:
165
- logger.error(f"[ERR] encrypt_and_upload_folder => upload_folder: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
- # cleanup
168
- shutil.rmtree(local_folder, ignore_errors=True)
169
- shutil.rmtree(enc_path, ignore_errors=True)
170
- logger.info(f"[CLEANUP] removed {local_folder} & {enc_path}")
 
 
 
 
 
 
 
 
171
 
172
- # 成功したら enc_name を返す
173
- return enc_name
174
 
175
- def download_and_process_versions(self, model_versions: list, folder: str):
176
- if not model_versions:
177
- return
178
 
179
- latest = model_versions[0]
180
- for f_info in latest.get("files", []):
181
- url = f_info["downloadUrl"]
182
- fname = f_info["name"]
183
- self.download_file(url, folder, fname)
184
 
185
- if len(model_versions) > 1:
186
- ov_folder = os.path.join(folder, "old_versions")
187
- os.makedirs(ov_folder, exist_ok=True)
188
- for v in model_versions[1:]:
189
- for f_info in v.get("files", []):
190
- url = f_info["downloadUrl"]
191
- fname = f_info["name"]
192
- self.download_file(url, ov_folder, fname)
193
 
194
- def get_model_info(self, model_id: str) -> dict:
 
 
 
 
 
 
 
 
 
 
 
 
195
  try:
196
- url = f"{self.config.URLS['modelId']}{model_id}"
197
- resp = requests.get(url, headers=self.config.HEADERS)
198
- resp.raise_for_status()
199
- return resp.json()
 
 
 
 
 
200
  except Exception as e:
201
- logger.error(f"[ERR] get_model_info({model_id}): {e}")
202
  return {}
203
 
204
- def download_images(self, model_versions: list, folder: str):
205
- """
206
- 各model_versionsから画像URLを集めて、
207
- folder/images 下にダウンロードするメソッド。
208
- """
209
- images_folder = os.path.join(folder, "images")
210
- os.makedirs(images_folder, exist_ok=True)
211
-
212
- for version in model_versions:
213
- for img_info in version.get("images", []):
214
- img_url = img_info["url"]
215
- filename = os.path.basename(img_url)
216
- self.download_file(img_url, images_folder, filename)
217
-
218
- def process_model(self, model_id: str):
219
- info = self.get_model_info(model_id)
220
- if not info or "modelVersions" not in info:
221
- logger.error(f"[ERR] No modelVersions for {model_id}")
222
- return
223
-
224
- versions = info["modelVersions"]
225
- base_dir = "local_models"
226
- os.makedirs(base_dir, exist_ok=True)
227
-
228
- # モデル名
229
- model_name = info.get("name", f"ID_{model_id}")
230
- safe_name = re.sub(r'[\\/*?:"<>|]', '_', model_name) # OSで使えない文字を _
231
- folder_path = os.path.join(base_dir, safe_name)
232
- if os.path.exists(folder_path):
233
- shutil.rmtree(folder_path)
234
- os.makedirs(folder_path, exist_ok=True)
235
- logger.info(f"[OK] Created local folder => {folder_path}")
236
-
237
- # ダウンロード
238
- self.download_and_process_versions(versions, folder_path)
239
- self.download_images(versions, folder_path)
240
-
241
- # === 暗号化&アップロード ===
242
- logger.info(f"[DEBUG] encrypt_and_upload_folder => {folder_path}")
243
- enc_subfolder = self.encrypt_and_upload_folder(folder_path)
244
- if enc_subfolder is None:
245
- # 失敗
246
- enc_subfolder = "[ENCRYPT_FAILED]"
247
- else:
248
- logger.info(f"[OK] Encrypted & uploaded => {enc_subfolder}")
249
 
250
- # model_list.log に追記
251
- # enc_subfolder が "[ENCRYPT_FAILED]" でなければ成功したフォルダ名
252
- hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{enc_subfolder}"
253
- model_list_line = f"{model_name} (ID:{model_id}): {hf_url}\n"
254
  try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
256
- f.write(model_list_line)
257
- logger.info(f"[OK] Wrote to model_list.log => {model_list_line.strip()}")
258
- except Exception as e:
259
- logger.error(f"[ERR] writing model_list.log => {e}")
260
 
261
- # ★ model_list.log をアップロード
262
- self.upload_file(self.config.LIST_FILE, self.repo_ids["model_list"], self.config.LIST_FILE)
 
 
 
 
263
 
264
  async def crawl(self):
 
265
  while True:
266
  try:
267
  login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
268
 
269
- model_list_path = hf_hub_download(self.repo_ids["model_list"], self.config.LIST_FILE)
 
270
  shutil.copyfile(model_list_path, f"./{self.config.LIST_FILE}")
271
 
272
- log_path = hf_hub_download(self.repo_ids["log"], self.config.LOG_FILE)
273
- shutil.copyfile(log_path, f"./{self.config.LOG_FILE}")
274
-
275
- with open(self.config.LOG_FILE, "r", encoding="utf-8") as f:
276
- lines = f.read().splitlines()
277
- old_models = json.loads(lines[0]) if len(lines)>0 else []
278
- self.repo_ids["current"] = lines[1] if len(lines)>1 else ""
279
-
280
- # get newest
281
- r = requests.get(self.config.URLS["latest"], headers=self.config.HEADERS)
282
- r.raise_for_status()
283
- items = r.json().get("items", [])
284
- new_ids = [it["id"] for it in items if "id" in it]
285
-
286
- diff_ids = list(set(new_ids) - set(old_models))
287
- if diff_ids:
288
- mid = diff_ids[0]
289
- for attempt in range(1,6):
 
 
 
 
 
290
  try:
291
- self.process_model(str(mid))
292
  break
293
  except Exception as e:
294
- logger.error(f"[ERR] process_model {mid} (attempt {attempt}): {e}")
295
- if attempt==5:
296
- logger.error(f"[SKIP] model {mid} after 5 fails")
297
  else:
298
  await asyncio.sleep(2)
299
-
300
- old_models.append(mid)
301
- with open(self.config.LOG_FILE,'w',encoding='utf-8') as f:
302
- f.write(json.dumps(old_models)+"\n")
303
- f.write(self.repo_ids["current"]+"\n")
304
-
305
- # アップロードログ
306
- self.upload_file(self.config.LOG_FILE, self.repo_ids["log"], self.config.LOG_FILE)
307
  else:
308
- with open(self.config.LOG_FILE,'w',encoding='utf-8') as f:
309
- f.write(json.dumps(new_ids)+"\n")
310
- f.write(self.repo_ids["current"]+"\n")
311
- self.upload_file(self.config.LOG_FILE, self.repo_ids["log"], self.config.LOG_FILE)
312
- logger.info("[INFO] no new models => sleep(60)")
 
 
 
 
 
 
 
 
 
 
313
  await asyncio.sleep(60)
314
  continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  except Exception as e:
316
- logger.error(f"[ERR] crawl => {e}")
317
  await asyncio.sleep(300)
318
 
319
 
320
- # FastAPI
321
  config = Config()
322
  crawler = CivitAICrawler(config)
323
  app = crawler.app
 
1
  import asyncio
2
+ import base64
3
  import datetime
4
  import json
5
  import logging
 
14
  from bs4 import BeautifulSoup
15
  from fake_useragent import UserAgent
16
  from fastapi import FastAPI
17
+ from huggingface_hub import HfApi, create_repo, hf_hub_download, login
18
 
19
+ # ロギングの設定
20
  logging.basicConfig(level=logging.INFO)
21
  logger = logging.getLogger(__name__)
22
 
23
+
24
  class Config:
25
+ """設定用のクラス"""
26
  HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
27
  CIVITAI_API_TOKEN = os.environ["CIVITAI_API_TOKEN"]
28
  LOG_FILE = "civitai_backup.log"
29
  LIST_FILE = "model_list.log"
30
  REPO_IDS = {
31
+ "log": "ttttdiva/CivitAI_log_test",
32
  "model_list": "ttttdiva/CivitAI_model_info_test",
33
+ "current": ""
34
  }
35
  URLS = {
36
  "latest": "https://civitai.com/api/v1/models?sort=Newest",
 
47
  "Content-Type": "application/json"
48
  }
49
 
50
+ # ===== rclone 用の追加設定 =====
51
+ # (環境変数 RCLONE_CONF_BASE64 に rclone.conf をbase64エンコードした文字列を設定しておく想定)
52
+ RCLONE_CONF_BASE64 = os.environ.get("RCLONE_CONF_BASE64", "")
53
+ # 暗号化されたファイルが出力されるローカルディレクトリ(cryptLocal: の実体)
54
+ ENCRYPTED_DIR = "/home/user/app/encrypted"
55
+
56
+
57
  class CivitAICrawler:
58
+ """CivitAIからモデルをダウンロードし、Hugging Faceにアップロードするクラス(rcloneで暗号化対応版)"""
59
+
60
  def __init__(self, config: Config):
61
  self.config = config
62
  self.api = HfApi()
63
  self.app = FastAPI()
64
  self.repo_ids = self.config.REPO_IDS.copy()
65
  self.jst = self.config.JST
66
+
67
+ # rclone のセットアップ
68
  self.setup_rclone_conf()
 
69
 
70
+ self.setup_routes()
 
 
 
 
 
 
 
 
 
 
 
 
71
 
72
  def setup_routes(self):
73
+ """FastAPIのルーティングを設定する。"""
74
+
75
  @self.app.get("/")
76
  def read_root():
77
  now = str(datetime.datetime.now(self.jst))
78
+ description = f"""
79
+ CivitAIを定期的に周回し新規モデルを {self.repo_ids['current']} にバックアップするspaceです。
80
+ モデルページ名とバックアップURLの紐づきはhttps://huggingface.co/{self.repo_ids['model_list']}/blob/main/model_list.logからどうぞ
81
+ たまに覗いてもらえると動き続けると思います。
82
+ 再起動が必要になっている場合はRestartボタンを押してもらえると助かります。
83
+ Status: {now} + currently running :D
84
+ """
85
+ return description
86
 
87
  @self.app.on_event("startup")
88
  async def startup_event():
89
  asyncio.create_task(self.crawl())
90
 
91
+ # =============================
92
+ # rclone 周りのヘルパー関数
93
+ # =============================
94
+ def setup_rclone_conf(self):
95
+ """環境変数 RCLONE_CONF_BASE64 から rclone.conf を生成し、RCLONE_CONFIG 環境変数を設定"""
96
+ if not self.config.RCLONE_CONF_BASE64:
97
+ logger.warning("[WARN] RCLONE_CONF_BASE64 is empty. rclone may fail.")
98
+ return
99
+
100
+ os.makedirs(".rclone_config", exist_ok=True)
101
+ conf_path = os.path.join(".rclone_config", "rclone.conf")
102
+ with open(conf_path, "wb") as f:
103
+ f.write(base64.b64decode(self.config.RCLONE_CONF_BASE64))
104
+
105
+ os.environ["RCLONE_CONFIG"] = conf_path
106
+ logger.info(f"[INFO] rclone.conf created at: {conf_path}")
107
+
108
+ def encrypt_with_rclone(self, local_path: str, is_file: bool = True):
109
+ """
110
+ 指定ファイル or フォルダを cryptLocal: へコピーし、暗号化ファイルを self.config.ENCRYPTED_DIR に生成する。
111
+ rclone copy の引数にフォルダパスやファイルパスを指定して利用可能。
112
+ """
113
+ if not os.path.exists(local_path):
114
+ raise FileNotFoundError(f"[ERROR] Local path not found: {local_path}")
115
+
116
+ # 事前に暗号先ディレクトリをクリーンアップ(不要なら削除する)
117
+ if os.path.isdir(self.config.ENCRYPTED_DIR):
118
+ shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
119
+
120
+ # rclone コマンドの実行
121
+ cmd = ["rclone", "copy", local_path, "cryptLocal:", "-v"]
122
+ logger.info(f"[INFO] Running: {' '.join(cmd)}")
123
+ subprocess.run(cmd, check=True)
124
+ logger.info(f"[OK] rclone copy {local_path} => cryptLocal:")
125
+
126
+ if not os.path.isdir(self.config.ENCRYPTED_DIR):
127
+ raise FileNotFoundError(
128
+ f"[ERROR] {self.config.ENCRYPTED_DIR} not found. Check your rclone config."
129
+ )
130
+
131
+ def upload_encrypted_files(self, repo_id: str, path_in_repo: str = None):
132
+ """
133
+ self.config.ENCRYPTED_DIR にある暗号化済みファイルをまとめて Hugging Face にアップロードする。
134
+ もとがフォルダの場合はサブディレクトリも含めて再帰的にアップロードする。
135
+ """
136
+ if not path_in_repo:
137
+ path_in_repo = ""
138
+
139
+ max_retries = 5
140
+
141
+ # 再帰的に walk
142
+ for root, dirs, files in os.walk(self.config.ENCRYPTED_DIR):
143
+ for fn in files:
144
+ encrypted_file_path = os.path.join(root, fn)
145
+ if not os.path.isfile(encrypted_file_path):
146
+ continue
147
+
148
+ # 元ディレクトリ相対のパスを生成
149
+ relative_path = os.path.relpath(encrypted_file_path, self.config.ENCRYPTED_DIR)
150
+ # Hugging Face 上に保存するときのフルパス
151
+ upload_path_in_repo = os.path.join(path_in_repo, relative_path)
152
+
153
+ # HFへのアップロードを試行 (over the limitなどの例外をリトライする)
154
+ attempt = 0
155
+ while attempt < max_retries:
156
+ try:
157
+ self.api.upload_file(
158
+ path_or_fileobj=encrypted_file_path,
159
+ repo_id=repo_id,
160
+ path_in_repo=upload_path_in_repo
161
+ )
162
+ logger.info(f"[OK] Uploaded {encrypted_file_path} to {repo_id}/{upload_path_in_repo}")
163
+ break # 成功したらループ抜け
164
+ except Exception as e:
165
+ attempt += 1
166
+ error_message = str(e)
167
+ if "over the limit of 100000 files" in error_message:
168
+ logger.warning("Repository file limit exceeded, creating a new repository.")
169
+ self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
170
+ self.api.create_repo(repo_id=self.repo_ids['current'], private=True)
171
+ # リポジトリを変えたので attempt をリセット
172
+ attempt = 0
173
+ repo_id = self.repo_ids['current']
174
+ continue
175
+ elif "you can retry this action in about 1 hour" in error_message:
176
+ logger.warning("Encountered 'retry in 1 hour' error. Waiting 1 hour before retrying...")
177
+ time.sleep(3600)
178
+ attempt -= 1 # この場合はリトライ回数をカウントしない
179
+ else:
180
+ if attempt < max_retries:
181
+ logger.warning(f"Failed to upload file {encrypted_file_path}, retrying... {attempt}/{max_retries}")
182
+ else:
183
+ logger.error(f"Failed to upload file after {max_retries} attempts: {encrypted_file_path}")
184
+ raise
185
+
186
+ # =============================
187
+ # ここから既存処理
188
+ # =============================
189
+
190
+ @staticmethod
191
+ def get_filename_from_cd(content_disposition: Optional[str], default_name: str) -> str:
192
+ """Content-Dispositionヘッダーからファイル名を取得する。"""
193
+ if content_disposition:
194
+ parts = content_disposition.split(';')
195
+ for part in parts:
196
+ if "filename=" in part:
197
+ return part.split("=")[1].strip().strip('"')
198
+ return default_name
199
+
200
+ def download_file(self, url: str, destination_folder: str, default_name: str):
201
+ """指定されたURLからファイルをダウンロードし、指定されたフォルダに保存する。"""
202
  try:
203
+ response = requests.get(url, headers=self.config.HEADERS, stream=True)
204
+ response.raise_for_status()
205
  except requests.RequestException as e:
206
+ logger.error(f"Failed to download file from {url}: {e}")
207
+ return
208
 
209
+ filename = self.get_filename_from_cd(response.headers.get('content-disposition'), default_name)
210
+ file_path = os.path.join(destination_folder, filename)
 
 
 
 
211
 
212
+ with open(file_path, 'wb') as file:
213
+ for chunk in response.iter_content(chunk_size=8192):
214
+ file.write(chunk)
215
+ logger.info(f"Download completed: {file_path}")
 
216
 
217
+ def get_model_info(self, model_id: str) -> dict:
218
+ """モデルの情報を取得する。"""
219
  try:
220
+ response = requests.get(self.config.URLS["modelId"] + str(model_id), headers=self.config.HEADERS)
221
+ response.raise_for_status()
222
+ return response.json()
223
+ except requests.RequestException as e:
224
+ logger.error(f"Failed to retrieve model info for ID {model_id}: {e}")
225
+
226
+ def download_model(self, model_versions: list, folder: str, existing_old_version_files: list = []):
227
+ """モデルのバージョンをダウンロードする。"""
228
+ latest_version = model_versions[0]
229
+ latest_files = latest_version["files"]
230
+ for file_info in latest_files:
231
+ download_url = file_info["downloadUrl"]
232
+ file_name = file_info["name"]
233
+ login_detected_count = 0
234
+
235
+ while login_detected_count < 5:
236
+ try:
237
+ self.download_file(download_url, folder, file_name)
238
+ except Exception as e:
239
+ logger.error(f"Exception occurred while downloading {file_name}: {e}")
240
+ login_detected_count += 1
241
+ continue
242
 
243
+ if "login" in os.listdir(folder):
244
+ login_detected_count += 1
245
+ logger.warning(f"'login' file found. Will try again. ({login_detected_count}/5)")
246
+ os.remove(os.path.join(folder, "login"))
247
+ else:
248
+ logger.info(f"Successfully downloaded {file_name}")
249
+ break
250
+
251
+ if login_detected_count >= 5:
252
+ dummy_file_name = f"{file_name}.download_failed"
253
+ dummy_file_path = os.path.join(folder, dummy_file_name)
254
+ try:
255
+ with open(dummy_file_path, "w") as f:
256
+ f.write("Download failed after 5 attempts.")
257
+ logger.error(f"Failed to download {file_name}. Created dummy file {dummy_file_name}. URL: {download_url}")
258
+ except Exception as e:
259
+ logger.error(f"Failed to create dummy file for {file_name}: {e}")
260
+
261
+ # 古いバージョンのダウンロード
262
+ if len(model_versions) > 1:
263
+ old_versions_folder = os.path.join(folder, "old_versions")
264
+ os.makedirs(old_versions_folder, exist_ok=True)
265
+ for version in model_versions[1:]:
266
+ for file_info in version["files"]:
267
+ file_name = file_info["name"]
268
+ if file_name in existing_old_version_files:
269
+ logger.info(f"Skipping download of existing old version file: {file_name}")
270
+ continue
271
+ download_url = file_info["downloadUrl"]
272
+ local_file_path = os.path.join(old_versions_folder, file_name)
273
+ login_detected_count = 0
274
+
275
+ while login_detected_count < 5:
276
+ try:
277
+ self.download_file(download_url, old_versions_folder, file_name)
278
+ except Exception as e:
279
+ logger.error(f"Exception occurred while downloading {file_name}: {e}")
280
+ login_detected_count += 1
281
+ continue
282
+
283
+ if "login" in os.listdir(old_versions_folder):
284
+ login_detected_count += 1
285
+ logger.warning(f"'login' file found while downloading {file_name}. Will try again. ({login_detected_count}/5)")
286
+ os.remove(os.path.join(old_versions_folder, "login"))
287
+ else:
288
+ logger.info(f"Successfully downloaded {file_name}")
289
+ break
290
 
291
+ if login_detected_count >= 5:
292
+ dummy_file_name = f"{file_name}.download_failed"
293
+ dummy_file_path = os.path.join(old_versions_folder, dummy_file_name)
294
+ try:
295
+ with open(dummy_file_path, "w") as f:
296
+ f.write("Download failed after 5 attempts.")
297
+ logger.error(f"Failed to download {file_name}. Created dummy file {dummy_file_name}. URL: {download_url}")
298
+ except Exception as e:
299
+ logger.error(f"Failed to create dummy file for {file_name}: {e}")
300
+ continue
301
 
302
+ # ===== 旧コード: 直接アップロードしていた箇所を削除して、rclone暗号化&アップロードに変更する場合は呼び出さない =====
303
+ # 旧来は self.upload_file(...) していたが、このタイミングでアップロードしたくない場合は消すかコメントアウト
304
+ # self.upload_file(local_file_path, path_in_repo=...)
305
+ # os.remove(local_file_path)
 
306
 
307
+ def download_images(self, model_versions: list, folder: str):
308
+ """モデルの画像をダウンロードし、指定されたフォルダに保存する。"""
309
+ images_folder = os.path.join(folder, "images")
310
+ os.makedirs(images_folder, exist_ok=True)
311
 
312
+ images = []
313
+ for version in model_versions:
314
+ for img in version.get("images", []):
315
+ image_url = img["url"]
316
+ images.append(image_url)
317
+
318
+ for image_url in images:
319
+ image_name = image_url.split("/")[-1]
320
+ try:
321
+ response = requests.get(image_url)
322
+ response.raise_for_status()
323
+ with open(os.path.join(images_folder, f"{image_name}.png"), "wb") as file:
324
+ file.write(response.content)
325
+ except requests.RequestException as e:
326
+ logger.error(f"Error downloading image {image_url}: {e}")
327
+
328
+ # 画像フォルダをパスワード付きZIP
329
  try:
330
+ original_cwd = os.getcwd()
331
+ os.chdir(folder)
332
+ subprocess.run(['zip', '-e', '--password=osanpo', 'images.zip', '-r', 'images'], check=True)
333
+ logger.info(f"Images compressed and saved to {os.path.join(folder, 'images.zip')}")
334
  except subprocess.CalledProcessError as e:
335
+ logger.error(f"Error creating zip file: {e}")
336
+ finally:
337
+ os.chdir(original_cwd)
338
+
339
+ if os.path.exists(images_folder):
340
+ shutil.rmtree(images_folder)
341
+
342
+ def save_html_content(self, url: str, folder: str):
343
+ """指定されたURLからHTMLコンテンツを取得し、保存する。"""
 
 
 
 
 
 
 
 
344
  try:
345
+ response = requests.get(url)
346
+ response.raise_for_status()
347
+ html_path = os.path.join(folder, f"{folder}.html")
348
+ with open(html_path, 'w', encoding='utf-8') as file:
349
+ file.write(response.text)
350
  except Exception as e:
351
+ logger.error(f"Error saving HTML content for URL {url}: {e}")
352
+
353
+ @staticmethod
354
+ def save_model_info(model_info: dict, folder: str):
355
+ """モデル情報(json)の保存"""
356
+ with open(os.path.join(folder, "model_info.json"), "w") as file:
357
+ json.dump(model_info, file, indent=2)
358
+
359
+ @staticmethod
360
+ def increment_repo_name(repo_id: str) -> str:
361
+ """リポジトリ名の末尾の数字をインクリメントする。"""
362
+ match = re.search(r'(\d+)$', repo_id)
363
+ if match:
364
+ number = int(match.group(1)) + 1
365
+ new_repo_id = re.sub(r'\d+$', str(number), repo_id)
366
+ else:
367
+ new_repo_id = f"{repo_id}1"
368
+ return new_repo_id
369
 
370
+ # =============================
371
+ # ここを rclone 暗号化&アップロードに書き換え
372
+ # =============================
373
+ def upload_file(self, file_path: str, repo_id: Optional[str] = None, path_in_repo: Optional[str] = None):
374
+ """
375
+ 1) rcloneで file_path を暗号化
376
+ 2) 暗号化されたファイル(群)を Hugging Face にアップロード
377
+ """
378
+ if repo_id is None:
379
+ repo_id = self.repo_ids['current']
380
+ if path_in_repo is None:
381
+ path_in_repo = os.path.basename(file_path)
382
 
383
+ # 1) rclone copy (ファイル暗号化)
384
+ self.encrypt_with_rclone(file_path, is_file=True)
385
 
386
+ # 2) 暗号ファイルをアップロード
387
+ self.upload_encrypted_files(repo_id=repo_id, path_in_repo=path_in_repo)
 
388
 
389
+ # 3) 暗号ディレクトリの掃除
390
+ if os.path.isdir(self.config.ENCRYPTED_DIR):
391
+ shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
 
 
392
 
393
+ def upload_folder(self, folder_path: str, path_in_repo: Optional[str] = None):
394
+ """
395
+ 1) rcloneで folder_path を暗号化
396
+ 2) 暗号化されたフォルダを Hugging Face にアップロード
397
+ """
398
+ if path_in_repo is None:
399
+ path_in_repo = os.path.basename(folder_path)
 
400
 
401
+ # 1) rclone copy (フォルダ暗号化)
402
+ self.encrypt_with_rclone(folder_path, is_file=False)
403
+
404
+ # 2) 暗号フォルダをアップロード
405
+ self.upload_encrypted_files(repo_id=self.repo_ids['current'], path_in_repo=path_in_repo)
406
+
407
+ # 3) 掃除
408
+ if os.path.isdir(self.config.ENCRYPTED_DIR):
409
+ shutil.rmtree(self.config.ENCRYPTED_DIR, ignore_errors=True)
410
+
411
+ def read_model_list(self):
412
+ """モデルリストを読み込む。"""
413
+ model_list = {}
414
  try:
415
+ with open(self.config.LIST_FILE, "r", encoding="utf-8") as f:
416
+ for line in f:
417
+ line = line.strip()
418
+ if line:
419
+ parts = line.split(": ", 1)
420
+ if len(parts) == 2:
421
+ modelpage_name, model_hf_url = parts
422
+ model_list[model_hf_url] = modelpage_name
423
+ return model_list
424
  except Exception as e:
425
+ logger.error(f"Failed to read model list: {e}")
426
  return {}
427
 
428
+ def get_repo_info(self, repo_id):
429
+ """リポジトリの情報を取得する。"""
430
+ try:
431
+ repo_info = self.api.repo_info(repo_id=repo_id, files_metadata=True)
432
+ file_paths = [sibling.rfilename for sibling in repo_info.siblings]
433
+ return file_paths
434
+ except Exception as e:
435
+ logger.error(f"Failed to get repo info for {repo_id}: {e}")
436
+ return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437
 
438
+ def process_model(self, model_url: str):
439
+ """指定されたモデルURLを処理する関数。"""
 
 
440
  try:
441
+ model_id = model_url.rstrip("/").split("/")[-1]
442
+ model_info = self.get_model_info(model_id)
443
+
444
+ latest_version = model_info.get("modelVersions", [])[0]
445
+ model_file = next(
446
+ (file for file in latest_version["files"] if file.get('type') == 'Model'),
447
+ None
448
+ )
449
+ if model_file:
450
+ latest_filename = model_file['name']
451
+ folder = os.path.splitext(latest_filename)[0]
452
+ else:
453
+ first_file = latest_version["files"][0]
454
+ latest_filename = first_file['name']
455
+ folder = os.path.splitext(latest_filename)[0]
456
+ logger.warning(f"No 'Model' type file found for model ID {model_id}. Using first file's name.")
457
+
458
+ os.makedirs(folder, exist_ok=True)
459
+ model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{folder}"
460
+ model_list = self.read_model_list()
461
+
462
+ if model_hf_url in model_list:
463
+ repo_id = self.repo_ids['current']
464
+ repo_files = self.get_repo_info(repo_id)
465
+ old_versions_files = [f for f in repo_files if f.startswith(f"{folder}/old_versions/")]
466
+ existing_old_version_files = [os.path.basename(f) for f in old_versions_files]
467
+ else:
468
+ existing_old_version_files = []
469
+
470
+ self.download_model(model_info["modelVersions"], folder, existing_old_version_files)
471
+ self.download_images(model_info["modelVersions"], folder)
472
+ self.save_html_content(model_url, folder)
473
+ self.save_model_info(model_info, folder)
474
+
475
+ # ====== rclone でフォルダ暗号化 → HFへアップロード ======
476
+ self.upload_folder(folder)
477
+
478
+ # モデルリスト更新
479
+ modelpage_name = model_info.get("name", "Unnamed Model")
480
+ model_hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{folder}"
481
  with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
482
+ f.write(f"{modelpage_name}: {model_hf_url}\n")
 
 
 
483
 
484
+ # ローカルフォルダを削除
485
+ if os.path.exists(folder):
486
+ shutil.rmtree(folder)
487
+
488
+ except Exception as e:
489
+ logger.error(f"Unexpected error processing model ({model_url}): {e}")
490
 
491
  async def crawl(self):
492
+ """モデルを定期的にチェックし、更新を行う。"""
493
  while True:
494
  try:
495
  login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
496
 
497
+ # model_list.logのダウンロード
498
+ model_list_path = hf_hub_download(repo_id=self.repo_ids['model_list'], filename=self.config.LIST_FILE)
499
  shutil.copyfile(model_list_path, f"./{self.config.LIST_FILE}")
500
 
501
+ # ログファイルのダウンロード
502
+ local_file_path = hf_hub_download(repo_id=self.repo_ids["log"], filename=self.config.LOG_FILE)
503
+ shutil.copyfile(local_file_path, f"./{self.config.LOG_FILE}")
504
+
505
+ # ログ読み込み
506
+ with open(self.config.LOG_FILE, "r", encoding="utf-8") as file:
507
+ lines = file.read().splitlines()
508
+ old_models = json.loads(lines[0]) if len(lines) > 0 else []
509
+ self.repo_ids["current"] = lines[1] if len(lines) > 1 else ""
510
+
511
+ # 新着モデルの取得
512
+ response = requests.get(self.config.URLS["latest"], headers=self.config.HEADERS)
513
+ response.raise_for_status()
514
+ latest_models = response.json().get("items", [])
515
+ latest_model_ids = [item.get("id") for item in latest_models if "id" in item]
516
+
517
+ # 増分の確認
518
+ new_models = list(set(latest_model_ids) - set(old_models))
519
+
520
+ if new_models:
521
+ logger.info(f"New models found: {new_models}")
522
+ model_id = new_models[0]
523
+ for attempt in range(1, 6):
524
  try:
525
+ self.process_model(f"{self.config.URLS['modelId']}{model_id}")
526
  break
527
  except Exception as e:
528
+ logger.error(f"Failed to process model ID {model_id} (Attempt {attempt}/5): {e}")
529
+ if attempt == 5:
530
+ logger.error(f"Skipping model ID {model_id} after 5 failed attempts.")
531
  else:
532
  await asyncio.sleep(2)
 
 
 
 
 
 
 
 
533
  else:
534
+ # ログファイルを最新のモデルIDで上書き
535
+ with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
536
+ f.write(json.dumps(latest_model_ids) + "\n")
537
+ f.write(f"{self.repo_ids['current']}\n")
538
+ logger.info(f"Updated log file: {self.config.LOG_FILE}")
539
+
540
+ # ログファイルをリポジトリにアップロード
541
+ self.upload_file(
542
+ file_path=self.config.LOG_FILE,
543
+ repo_id=self.repo_ids["log"],
544
+ path_in_repo=self.config.LOG_FILE
545
+ )
546
+ logger.info("Uploaded log file to repository.")
547
+
548
+ logger.info("No new models found.")
549
  await asyncio.sleep(60)
550
  continue
551
+
552
+ # 古いモデルリストに追加
553
+ old_models.append(model_id)
554
+
555
+ # ログファイルの更新
556
+ with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
557
+ f.write(json.dumps(old_models) + "\n")
558
+ f.write(f"{self.repo_ids['current']}\n")
559
+ logger.info(f"Updated log file with new model ID: {model_id}")
560
+
561
+ # ログとモデルリストのアップロード (rcloneは使わず直接uploadでもOKならそのまま)
562
+ self.upload_file(
563
+ file_path=self.config.LOG_FILE,
564
+ repo_id=self.repo_ids["log"],
565
+ path_in_repo=self.config.LOG_FILE
566
+ )
567
+ self.upload_file(
568
+ file_path=self.config.LIST_FILE,
569
+ repo_id=self.repo_ids["model_list"],
570
+ path_in_repo=self.config.LIST_FILE
571
+ )
572
+
573
  except Exception as e:
574
+ logger.error(f"Error during crawling: {e}")
575
  await asyncio.sleep(300)
576
 
577
 
578
+ # モジュールレベルでFastAPIのアプリケーションを公開
579
  config = Config()
580
  crawler = CivitAICrawler(config)
581
  app = crawler.app