ttttdiva commited on
Commit
e811dd4
·
verified ·
1 Parent(s): 4762059

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +77 -108
main.py CHANGED
@@ -7,7 +7,6 @@ import re
7
  import shutil
8
  import subprocess
9
  import time
10
- import uuid
11
  from typing import Optional
12
 
13
  import requests
@@ -25,9 +24,9 @@ class Config:
25
  LOG_FILE = "civitai_backup.log"
26
  LIST_FILE = "model_list.log"
27
  REPO_IDS = {
28
- "log": "ttttdiva/CivitAI_log_test", # ←ログ用リポジトリ
29
- "model_list": "ttttdiva/CivitAI_model_info_test", # ←モデル一覧用リポジトリ
30
- "current": "" # ←実際のアップ先
31
  }
32
  URLS = {
33
  "latest": "https://civitai.com/api/v1/models?sort=Newest",
@@ -81,10 +80,7 @@ class CivitAICrawler:
81
  async def startup_event():
82
  asyncio.create_task(self.crawl())
83
 
84
- # ここで「download_file」や「encrypt_and_upload_folder」等のサポート関数を定義
85
-
86
  def download_file(self, url: str, dest_folder: str, filename: str) -> Optional[str]:
87
- """実際にファイルをダウンロードし、dest_folder/filename に保存。"""
88
  os.makedirs(dest_folder, exist_ok=True)
89
  try:
90
  r = requests.get(url, headers=self.config.HEADERS, stream=True)
@@ -101,7 +97,6 @@ class CivitAICrawler:
101
  return file_path
102
 
103
  def upload_file(self, file_path: str, repo_id: Optional[str]=None, path_in_repo: Optional[str]=None):
104
- """単一ファイルアップロード用メソッド"""
105
  if repo_id is None:
106
  repo_id = self.repo_ids["current"]
107
  if path_in_repo is None:
@@ -118,7 +113,6 @@ class CivitAICrawler:
118
  logger.error(f"[ERR] upload_file: {e}")
119
 
120
  def upload_folder(self, folder_path: str, path_in_repo: Optional[str] = None):
121
- """フォルダ丸ごとアップロード"""
122
  if path_in_repo is None:
123
  path_in_repo = os.path.basename(folder_path)
124
 
@@ -133,98 +127,61 @@ class CivitAICrawler:
133
  logger.error(f"[ERR] upload_folder: {e}")
134
 
135
  def encrypt_and_upload_folder(self, local_folder: str) -> Optional[str]:
136
- """
137
- 1) subfolder_label = モデルフォルダ名 (local_folderのベース名)
138
- 2) rclone mkdir cryptLocal:subfolder_label
139
- 3) rclone copy local_folder => cryptLocal:subfolder_label
140
- 4) rclone copy cryptLocal:subfolder_label => encrypted/subfolder_label (差分検知やめる)
141
- 5) upload_folder(encrypted/subfolder_label, path_in_repo=subfolder_label)
142
- 6) cleanup
143
- """
144
  if not os.path.isdir(local_folder):
145
  logger.error(f"[ERR] {local_folder} is not a directory.")
146
  return None
147
 
148
- subfolder_label = os.path.basename(local_folder) # ★ モデル名 or ファイル名を取得
149
- if not subfolder_label:
150
- logger.error(f"[ERR] local_folder has empty basename: {local_folder}")
151
- return None
152
 
153
- # === 1) rclone mkdir
154
- mkdir_cmd = ["rclone", "mkdir", f"cryptLocal:{subfolder_label}"]
155
- logger.info(f"[CMD] {' '.join(mkdir_cmd)}")
156
  try:
157
- subprocess.run(mkdir_cmd, check=True)
158
- logger.info(f"[OK] rclone mkdir cryptLocal:{subfolder_label}")
159
  except subprocess.CalledProcessError as e:
160
- logger.error(f"[ERR] rclone mkdir => {e}")
161
  return None
162
 
163
- # === 2) rclone copy local_folder => cryptLocal:subfolder_label
164
- copy_cmd = ["rclone", "copy", local_folder, f"cryptLocal:{subfolder_label}", "--create-empty-src-dirs"]
165
- logger.info(f"[CMD] {' '.join(copy_cmd)}")
166
- try:
167
- subprocess.run(copy_cmd, check=True)
168
- logger.info(f"[OK] rclone copy => cryptLocal:{subfolder_label}")
169
- except subprocess.CalledProcessError as e:
170
- logger.error(f"[ERR] rclone copy => {e}")
171
  return None
172
-
173
- # === 3) ローカル "encrypted/subfolder_label" に復号ファイル(?) を落とすかどうか
174
- # → HFへのフォルダアップには「ローカルに暗号ファイル群」が必要
175
- # → しかし "directory_name_encryption=true" ならリモート名だけで復号されず、ランダム名かも
176
- # 今回は「物理的に local へ落として upload_folder()」する想定にする
177
- encrypted_base = os.path.join(os.getcwd(), "encrypted")
178
- os.makedirs(encrypted_base, exist_ok=True)
179
-
180
- # remove old if exist
181
- local_enc_path = os.path.join(encrypted_base, subfolder_label)
182
- if os.path.exists(local_enc_path):
183
- shutil.rmtree(local_enc_path)
184
-
185
- # rclone copy cryptLocal:subfolder_label => encrypted/subfolder_label
186
- pull_cmd = ["rclone", "copy", f"cryptLocal:{subfolder_label}", local_enc_path, "--create-empty-src-dirs"]
187
- logger.info(f"[CMD] {' '.join(pull_cmd)}")
188
- try:
189
- subprocess.run(pull_cmd, check=True)
190
- logger.info(f"[OK] rclone copy => {local_enc_path}")
191
- except subprocess.CalledProcessError as e:
192
- logger.error(f"[ERR] rclone copy back => {e}")
193
  return None
194
 
195
- # === 4) upload_folder(local_enc_path, path_in_repo=subfolder_label)
196
  try:
197
- self.upload_folder(local_enc_path, path_in_repo=subfolder_label)
198
  except Exception as e:
199
  logger.error(f"[ERR] encrypt_and_upload_folder => upload_folder: {e}")
200
 
201
- # === 5) cleanup
202
- # remove local_folder & local_enc_path
203
  shutil.rmtree(local_folder, ignore_errors=True)
204
- shutil.rmtree(local_enc_path, ignore_errors=True)
205
- logger.info(f"[CLEANUP] removed {local_folder} & {local_enc_path}")
206
 
207
- return subfolder_label
 
208
 
209
- # ここで「download_and_process_versions」(=古いバージョン含めまとめてダウンロード) 定義
210
- # => 今回は名前を合わせ「download_and_process_versions」に合わせる
211
-
212
  def download_and_process_versions(self, model_versions: list, folder: str):
213
- """
214
- 例: 最新バージョン => folder
215
- 古いバージョン => folder/old_versions
216
- """
217
  if not model_versions:
218
  return
219
 
220
- # 最新
221
  latest = model_versions[0]
222
  for f_info in latest.get("files", []):
223
  url = f_info["downloadUrl"]
224
  fname = f_info["name"]
225
  self.download_file(url, folder, fname)
226
 
227
- # 古い
228
  if len(model_versions) > 1:
229
  ov_folder = os.path.join(folder, "old_versions")
230
  os.makedirs(ov_folder, exist_ok=True)
@@ -235,7 +192,6 @@ class CivitAICrawler:
235
  self.download_file(url, ov_folder, fname)
236
 
237
  def get_model_info(self, model_id: str) -> dict:
238
- """モデルIDからCivitAIの情報を取得"""
239
  try:
240
  url = f"{self.config.URLS['modelId']}{model_id}"
241
  resp = requests.get(url, headers=self.config.HEADERS)
@@ -245,6 +201,20 @@ class CivitAICrawler:
245
  logger.error(f"[ERR] get_model_info({model_id}): {e}")
246
  return {}
247
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  def process_model(self, model_id: str):
249
  info = self.get_model_info(model_id)
250
  if not info or "modelVersions" not in info:
@@ -252,57 +222,57 @@ class CivitAICrawler:
252
  return
253
 
254
  versions = info["modelVersions"]
255
-
256
- base_dir = "local_models"
257
  os.makedirs(base_dir, exist_ok=True)
258
 
259
- # フォルダ名はモデル名にする場合:
260
- # name = info.get("name","UnknownModel")
261
- # folder_name = re.sub(r'[\\/*?:"<>|]', '_', name)
262
- # ただし今はIDが使われているので:
263
- folder_name = str(model_id)
264
-
265
- folder_path = os.path.join(base_dir, folder_name)
266
  if os.path.exists(folder_path):
267
  shutil.rmtree(folder_path)
268
  os.makedirs(folder_path, exist_ok=True)
269
-
270
  logger.info(f"[OK] Created local folder => {folder_path}")
271
 
272
  # ダウン���ード
273
- for v in versions:
274
- for f_info in v.get("files", []):
275
- dl_url = f_info["downloadUrl"]
276
- fname = f_info["name"]
277
- self.download_file(dl_url, folder_path, fname)
278
-
279
- # 画像
280
  self.download_images(versions, folder_path)
281
 
282
- # OPTIONAL: save model_info.json or HTML
283
- # self.save_model_info(info, folder_path)
284
-
285
  # === 暗号化&アップロード ===
286
- logger.info(f"[DEBUG] Calling encrypt_and_upload_folder => {folder_path}")
287
- enc_folder = self.encrypt_and_upload_folder(folder_path)
288
- if enc_folder is None:
289
- logger.warning("[WARN] Encryption/Upload returned None.")
 
290
  else:
291
- logger.info(f"[OK] Encrypted & uploaded folder => {enc_folder}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
292
 
293
  async def crawl(self):
294
  while True:
295
  try:
296
  login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
297
- # model_list, log_file のダウンロード
298
- mlist_path = hf_hub_download(repo_id=self.repo_ids["model_list"], filename=self.config.LIST_FILE)
299
- shutil.copyfile(mlist_path, f"./{self.config.LIST_FILE}")
300
 
301
- lfile_path = hf_hub_download(repo_id=self.repo_ids["log"], filename=self.config.LOG_FILE)
302
- shutil.copyfile(lfile_path, f"./{self.config.LOG_FILE}")
 
 
 
303
 
304
- # read logs
305
- with open(self.config.LOG_FILE, 'r', encoding='utf-8') as f:
306
  lines = f.read().splitlines()
307
  old_models = json.loads(lines[0]) if len(lines)>0 else []
308
  self.repo_ids["current"] = lines[1] if len(lines)>1 else ""
@@ -313,13 +283,12 @@ class CivitAICrawler:
313
  items = r.json().get("items", [])
314
  new_ids = [it["id"] for it in items if "id" in it]
315
 
316
- # diff
317
  diff_ids = list(set(new_ids) - set(old_models))
318
  if diff_ids:
319
  mid = diff_ids[0]
320
  for attempt in range(1,6):
321
  try:
322
- self.process_model(str(mid)) # モデルIDはstrにして渡す
323
  break
324
  except Exception as e:
325
  logger.error(f"[ERR] process_model {mid} (attempt {attempt}): {e}")
@@ -335,7 +304,6 @@ class CivitAICrawler:
335
 
336
  # アップロードログ
337
  self.upload_file(self.config.LOG_FILE, self.repo_ids["log"], self.config.LOG_FILE)
338
- self.upload_file(self.config.LIST_FILE, self.repo_ids["model_list"], self.config.LIST_FILE)
339
  else:
340
  with open(self.config.LOG_FILE,'w',encoding='utf-8') as f:
341
  f.write(json.dumps(new_ids)+"\n")
@@ -348,6 +316,7 @@ class CivitAICrawler:
348
  logger.error(f"[ERR] crawl => {e}")
349
  await asyncio.sleep(300)
350
 
 
351
  # FastAPI
352
  config = Config()
353
  crawler = CivitAICrawler(config)
 
7
  import shutil
8
  import subprocess
9
  import time
 
10
  from typing import Optional
11
 
12
  import requests
 
24
  LOG_FILE = "civitai_backup.log"
25
  LIST_FILE = "model_list.log"
26
  REPO_IDS = {
27
+ "log": "ttttdiva/CivitAI_log_test",
28
+ "model_list": "ttttdiva/CivitAI_model_info_test",
29
+ "current": ""
30
  }
31
  URLS = {
32
  "latest": "https://civitai.com/api/v1/models?sort=Newest",
 
80
  async def startup_event():
81
  asyncio.create_task(self.crawl())
82
 
 
 
83
  def download_file(self, url: str, dest_folder: str, filename: str) -> Optional[str]:
 
84
  os.makedirs(dest_folder, exist_ok=True)
85
  try:
86
  r = requests.get(url, headers=self.config.HEADERS, stream=True)
 
97
  return file_path
98
 
99
  def upload_file(self, file_path: str, repo_id: Optional[str]=None, path_in_repo: Optional[str]=None):
 
100
  if repo_id is None:
101
  repo_id = self.repo_ids["current"]
102
  if path_in_repo is None:
 
113
  logger.error(f"[ERR] upload_file: {e}")
114
 
115
  def upload_folder(self, folder_path: str, path_in_repo: Optional[str] = None):
 
116
  if path_in_repo is None:
117
  path_in_repo = os.path.basename(folder_path)
118
 
 
127
  logger.error(f"[ERR] upload_folder: {e}")
128
 
129
  def encrypt_and_upload_folder(self, local_folder: str) -> Optional[str]:
130
+ """local_folder -> cryptLocal: => encrypted/??? => upload_folder => cleanup"""
 
 
 
 
 
 
 
131
  if not os.path.isdir(local_folder):
132
  logger.error(f"[ERR] {local_folder} is not a directory.")
133
  return None
134
 
135
+ encrypted_dir = os.path.join(os.getcwd(), "encrypted")
136
+ os.makedirs(encrypted_dir, exist_ok=True)
 
 
137
 
138
+ before = set(os.listdir(encrypted_dir))
139
+ cmd = ["rclone", "copy", local_folder, "cryptLocal:", "--create-empty-src-dirs"]
140
+ logger.info(f"[CMD] {' '.join(cmd)}")
141
  try:
142
+ subprocess.run(cmd, check=True)
143
+ logger.info("[OK] rclone copy => cryptLocal:")
144
  except subprocess.CalledProcessError as e:
145
+ logger.error(f"[ERR] rclone copy failed: {e}")
146
  return None
147
 
148
+ after = set(os.listdir(encrypted_dir))
149
+ diff = after - before
150
+ if not diff:
151
+ logger.error("[ERR] no new directory in ./encrypted after copy")
 
 
 
 
152
  return None
153
+ if len(diff) > 1:
154
+ logger.warning(f"[WARN] multiple new dirs => {diff}")
155
+ enc_name = diff.pop()
156
+ enc_path = os.path.join(encrypted_dir, enc_name)
157
+ if not os.path.isdir(enc_path):
158
+ logger.error(f"[ERR] {enc_path} is not a directory.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  return None
160
 
161
+ # HF upload folder
162
  try:
163
+ self.upload_folder(enc_path, path_in_repo=enc_name)
164
  except Exception as e:
165
  logger.error(f"[ERR] encrypt_and_upload_folder => upload_folder: {e}")
166
 
167
+ # cleanup
 
168
  shutil.rmtree(local_folder, ignore_errors=True)
169
+ shutil.rmtree(enc_path, ignore_errors=True)
170
+ logger.info(f"[CLEANUP] removed {local_folder} & {enc_path}")
171
 
172
+ # ★ 成功したら enc_name を返す
173
+ return enc_name
174
 
 
 
 
175
  def download_and_process_versions(self, model_versions: list, folder: str):
 
 
 
 
176
  if not model_versions:
177
  return
178
 
 
179
  latest = model_versions[0]
180
  for f_info in latest.get("files", []):
181
  url = f_info["downloadUrl"]
182
  fname = f_info["name"]
183
  self.download_file(url, folder, fname)
184
 
 
185
  if len(model_versions) > 1:
186
  ov_folder = os.path.join(folder, "old_versions")
187
  os.makedirs(ov_folder, exist_ok=True)
 
192
  self.download_file(url, ov_folder, fname)
193
 
194
  def get_model_info(self, model_id: str) -> dict:
 
195
  try:
196
  url = f"{self.config.URLS['modelId']}{model_id}"
197
  resp = requests.get(url, headers=self.config.HEADERS)
 
201
  logger.error(f"[ERR] get_model_info({model_id}): {e}")
202
  return {}
203
 
204
+ def download_images(self, model_versions: list, folder: str):
205
+ """
206
+ 各model_versionsから画像URLを集めて、
207
+ folder/images 下にダウンロードするメソッド。
208
+ """
209
+ images_folder = os.path.join(folder, "images")
210
+ os.makedirs(images_folder, exist_ok=True)
211
+
212
+ for version in model_versions:
213
+ for img_info in version.get("images", []):
214
+ img_url = img_info["url"]
215
+ filename = os.path.basename(img_url)
216
+ self.download_file(img_url, images_folder, filename)
217
+
218
  def process_model(self, model_id: str):
219
  info = self.get_model_info(model_id)
220
  if not info or "modelVersions" not in info:
 
222
  return
223
 
224
  versions = info["modelVersions"]
225
+ base_dir = "local_models"
 
226
  os.makedirs(base_dir, exist_ok=True)
227
 
228
+ # モデル名
229
+ model_name = info.get("name", f"ID_{model_id}")
230
+ safe_name = re.sub(r'[\\/*?:"<>|]', '_', model_name) # OSで使えない文字を _
231
+ folder_path = os.path.join(base_dir, safe_name)
 
 
 
232
  if os.path.exists(folder_path):
233
  shutil.rmtree(folder_path)
234
  os.makedirs(folder_path, exist_ok=True)
 
235
  logger.info(f"[OK] Created local folder => {folder_path}")
236
 
237
  # ダウン���ード
238
+ self.download_and_process_versions(versions, folder_path)
 
 
 
 
 
 
239
  self.download_images(versions, folder_path)
240
 
 
 
 
241
  # === 暗号化&アップロード ===
242
+ logger.info(f"[DEBUG] encrypt_and_upload_folder => {folder_path}")
243
+ enc_subfolder = self.encrypt_and_upload_folder(folder_path)
244
+ if enc_subfolder is None:
245
+ # 失敗
246
+ enc_subfolder = "[ENCRYPT_FAILED]"
247
  else:
248
+ logger.info(f"[OK] Encrypted & uploaded => {enc_subfolder}")
249
+
250
+ # ★ model_list.log に追記
251
+ # enc_subfolder が "[ENCRYPT_FAILED]" でなければ成功したフォルダ名
252
+ hf_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{enc_subfolder}"
253
+ model_list_line = f"{model_name} (ID:{model_id}): {hf_url}\n"
254
+ try:
255
+ with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
256
+ f.write(model_list_line)
257
+ logger.info(f"[OK] Wrote to model_list.log => {model_list_line.strip()}")
258
+ except Exception as e:
259
+ logger.error(f"[ERR] writing model_list.log => {e}")
260
+
261
+ # ★ model_list.log をアップロード
262
+ self.upload_file(self.config.LIST_FILE, self.repo_ids["model_list"], self.config.LIST_FILE)
263
 
264
  async def crawl(self):
265
  while True:
266
  try:
267
  login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
 
 
 
268
 
269
+ model_list_path = hf_hub_download(self.repo_ids["model_list"], self.config.LIST_FILE)
270
+ shutil.copyfile(model_list_path, f"./{self.config.LIST_FILE}")
271
+
272
+ log_path = hf_hub_download(self.repo_ids["log"], self.config.LOG_FILE)
273
+ shutil.copyfile(log_path, f"./{self.config.LOG_FILE}")
274
 
275
+ with open(self.config.LOG_FILE, "r", encoding="utf-8") as f:
 
276
  lines = f.read().splitlines()
277
  old_models = json.loads(lines[0]) if len(lines)>0 else []
278
  self.repo_ids["current"] = lines[1] if len(lines)>1 else ""
 
283
  items = r.json().get("items", [])
284
  new_ids = [it["id"] for it in items if "id" in it]
285
 
 
286
  diff_ids = list(set(new_ids) - set(old_models))
287
  if diff_ids:
288
  mid = diff_ids[0]
289
  for attempt in range(1,6):
290
  try:
291
+ self.process_model(str(mid))
292
  break
293
  except Exception as e:
294
  logger.error(f"[ERR] process_model {mid} (attempt {attempt}): {e}")
 
304
 
305
  # アップロードログ
306
  self.upload_file(self.config.LOG_FILE, self.repo_ids["log"], self.config.LOG_FILE)
 
307
  else:
308
  with open(self.config.LOG_FILE,'w',encoding='utf-8') as f:
309
  f.write(json.dumps(new_ids)+"\n")
 
316
  logger.error(f"[ERR] crawl => {e}")
317
  await asyncio.sleep(300)
318
 
319
+
320
  # FastAPI
321
  config = Config()
322
  crawler = CivitAICrawler(config)