ttttdiva commited on
Commit
d13d5c8
·
verified ·
1 Parent(s): f52b391

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +153 -138
main.py CHANGED
@@ -25,9 +25,9 @@ class Config:
25
  LOG_FILE = "civitai_backup.log"
26
  LIST_FILE = "model_list.log"
27
  REPO_IDS = {
28
- "log": "ttttdiva/CivitAI_log_test",
29
- "model_list": "ttttdiva/CivitAI_model_info_test",
30
- "current": ""
31
  }
32
  URLS = {
33
  "latest": "https://civitai.com/api/v1/models?sort=Newest",
@@ -55,83 +55,99 @@ class CivitAICrawler:
55
  self.setup_routes()
56
 
57
  def setup_rclone_conf(self):
58
- # RCLONE_CONF_BASE64→rclone.conf復元
59
  import base64
60
  rclone_b64 = os.environ.get("RCLONE_CONF_BASE64", "")
61
- if not rclone_b64:
62
- logger.warning("[WARN] RCLONE_CONF_BASE64 is empty, rclone may fail.")
63
- return
64
- conf_dir = ".rclone_config"
65
- os.makedirs(conf_dir, exist_ok=True)
66
- conf_path = os.path.join(conf_dir, "rclone.conf")
67
- with open(conf_path, "wb") as f:
68
- f.write(base64.b64decode(rclone_b64))
69
- os.environ["RCLONE_CONFIG"] = conf_path
70
- logger.info(f"[OK] rclone.conf => {conf_path}")
71
 
72
  def setup_routes(self):
73
  @self.app.get("/")
74
  def read_root():
75
  now = str(datetime.datetime.now(self.jst))
76
  return {
77
- "description": f"CivitAI crawler. Current time: {now}",
78
- "repo_current": self.repo_ids["current"],
79
- "note": "Startup event => self.crawl() loop"
80
  }
81
 
82
  @self.app.on_event("startup")
83
  async def startup_event():
84
  asyncio.create_task(self.crawl())
85
 
86
- def download_file(self, url: str, destination_folder: str, default_name: str) -> Optional[str]:
87
- os.makedirs(destination_folder, exist_ok=True)
 
 
 
88
  try:
89
- resp = requests.get(url, headers=self.config.HEADERS, stream=True)
90
- resp.raise_for_status()
91
  except requests.RequestException as e:
92
- logger.error(f"[ERR] download_file fail: {e}")
93
  return None
94
 
95
- file_path = os.path.join(destination_folder, default_name)
96
  with open(file_path, 'wb') as f:
97
- for chunk in resp.iter_content(chunk_size=8192):
98
  f.write(chunk)
99
- logger.info(f"[OK] Downloaded: {file_path}")
100
  return file_path
101
 
102
- def get_filename_from_cd(self, cd: Optional[str], default_name: str) -> str:
103
- if cd:
104
- parts = cd.split(';')
105
- for p in parts:
106
- if "filename=" in p:
107
- return p.split("=")[1].strip().strip('"')
108
- return default_name
109
 
110
- def get_model_info(self, model_id: str) -> dict:
111
  try:
112
- r = requests.get(f"{self.config.URLS['modelId']}{model_id}", headers=self.config.HEADERS)
113
- r.raise_for_status()
114
- return r.json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  except Exception as e:
116
- logger.error(f"[ERR] get_model_info({model_id}): {e}")
117
- return {}
118
 
119
  def encrypt_and_upload_folder(self, local_folder: str) -> Optional[str]:
120
- """
121
- rclone copy local_folder => cryptLocal:
122
- => diff => upload_folder => cleanup
123
- """
124
  if not os.path.isdir(local_folder):
125
  logger.error(f"[ERR] {local_folder} is not a directory.")
126
  return None
 
127
  encrypted_dir = os.path.join(os.getcwd(), "encrypted")
128
  os.makedirs(encrypted_dir, exist_ok=True)
129
 
130
  before = set(os.listdir(encrypted_dir))
131
-
 
 
132
  try:
133
- subprocess.run(["rclone", "copy", local_folder, "cryptLocal:", "--create-empty-src-dirs"], check=True)
134
- logger.info(f"[OK] rclone copy {local_folder} => cryptLocal:")
135
  except subprocess.CalledProcessError as e:
136
  logger.error(f"[ERR] rclone copy failed: {e}")
137
  return None
@@ -139,85 +155,47 @@ class CivitAICrawler:
139
  after = set(os.listdir(encrypted_dir))
140
  diff = after - before
141
  if not diff:
142
- logger.error("[ERR] No new dir in ./encrypted after rclone copy.")
143
  return None
144
  if len(diff) > 1:
145
  logger.warning(f"[WARN] multiple new dirs => {diff}")
146
- enc_folder_name = diff.pop()
147
- enc_folder_path = os.path.join(encrypted_dir, enc_folder_name)
148
-
149
- if not os.path.isdir(enc_folder_path):
150
- logger.error(f"[ERR] {enc_folder_path} is not a directory.")
151
  return None
152
 
153
- # upload_folder to HF
154
  try:
155
- self.api.upload_folder(
156
- folder_path=enc_folder_path,
157
- repo_id=self.repo_ids["current"],
158
- path_in_repo=enc_folder_name
159
- )
160
- logger.info(f"[OK] uploaded {enc_folder_path} => {self.repo_ids['current']}:{enc_folder_name}")
161
  except Exception as e:
162
- logger.error(f"[ERR] HF upload_folder fail: {e}")
163
 
164
- # cleanup local
165
  shutil.rmtree(local_folder, ignore_errors=True)
166
- shutil.rmtree(enc_folder_path, ignore_errors=True)
167
- logger.info(f"[CLEANUP] removed {local_folder} & {enc_folder_path}")
168
-
169
- return enc_folder_name
170
-
171
- def process_model(self, model_url: str):
172
- try:
173
- model_id = model_url.rstrip("/").split("/")[-1]
174
- model_info = self.get_model_info(model_id)
175
- if not model_info or "modelVersions" not in model_info:
176
- logger.error(f"No valid model info for ID {model_id}. Skipping.")
177
- return
178
-
179
- versions = model_info["modelVersions"]
180
- if not versions:
181
- logger.warning(f"No modelVersions found for ID {model_id}.")
182
- return
183
-
184
- # 1) 単にモデル名をサニタイズしたディレクトリにまとめる
185
- folder_name = re.sub(r'[\\/*?:"<>|]', '_', model_info.get("name", "UnknownModel"))
186
-
187
- # フォルダが既にあれば削除し、新規に作り直す (上書き)
188
- if os.path.exists(folder_name):
189
- shutil.rmtree(folder_name)
190
- os.makedirs(folder_name, exist_ok=True)
191
-
192
- # ダウンロード (最新/old_versions), 画像, HTML, model_info.json など
193
- self.download_and_process_versions(versions, folder_name)
194
- self.download_images(versions, folder_name)
195
- self.save_html_content(f"{self.config.URLS['modelPage']}{model_id}", folder_name)
196
- self.save_model_info_json(model_info, folder_name)
197
-
198
- # 暗号化アップロード
199
- enc_subfolder = self.encrypt_and_upload_folder(folder_name)
200
- if enc_subfolder is None:
201
- enc_subfolder = "[ENCRYPT_FAILED]"
202
-
203
- hf_enc_url = f"https://huggingface.co/{self.repo_ids['current']}/tree/main/{enc_subfolder}"
204
- with open(self.config.LIST_FILE, "a", encoding="utf-8") as f:
205
- f.write(f"{model_info.get('name', 'UnnamedModel')} (ID:{model_id}): {hf_enc_url}\n")
206
-
207
- except Exception as e:
208
- logger.error(f"Error in process_model({model_url}): {e}")
209
-
210
- def download_versions(self, model_versions: list, folder: str):
211
- # すべて folder/ にまとめる or old_versions subfolder
212
- # 例: 最新 => folder, old => folder/old_versions
213
  if not model_versions:
214
  return
 
 
215
  latest = model_versions[0]
216
  for f_info in latest.get("files", []):
217
  url = f_info["downloadUrl"]
218
  fname = f_info["name"]
219
  self.download_file(url, folder, fname)
220
 
 
221
  if len(model_versions) > 1:
222
  ov_folder = os.path.join(folder, "old_versions")
223
  os.makedirs(ov_folder, exist_ok=True)
@@ -227,31 +205,67 @@ class CivitAICrawler:
227
  fname = f_info["name"]
228
  self.download_file(url, ov_folder, fname)
229
 
230
- def download_images(self, model_versions: list, folder: str):
231
- images_folder = os.path.join(folder, "images")
232
- os.makedirs(images_folder, exist_ok=True)
233
- for v in model_versions:
234
- for im in v.get("images", []):
235
- iurl = im["url"]
236
- iname = os.path.basename(iurl)
237
- self.download_file(iurl, images_folder, iname)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
  async def crawl(self):
240
  while True:
241
  try:
242
- # HF Login
243
  login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
244
-
245
- # model_list.log
246
- mlist_path = hf_hub_download(self.repo_ids["model_list"], self.config.LIST_FILE)
247
  shutil.copyfile(mlist_path, f"./{self.config.LIST_FILE}")
248
 
249
- # log_file
250
- log_path = hf_hub_download(self.repo_ids["log"], self.config.LOG_FILE)
251
- shutil.copyfile(log_path, f"./{self.config.LOG_FILE}")
252
 
253
  # read logs
254
- with open(self.config.LOG_FILE, "r", encoding="utf-8") as f:
255
  lines = f.read().splitlines()
256
  old_models = json.loads(lines[0]) if len(lines)>0 else []
257
  self.repo_ids["current"] = lines[1] if len(lines)>1 else ""
@@ -260,43 +274,44 @@ class CivitAICrawler:
260
  r = requests.get(self.config.URLS["latest"], headers=self.config.HEADERS)
261
  r.raise_for_status()
262
  items = r.json().get("items", [])
263
- ids = [it["id"] for it in items if "id" in it]
264
 
265
- new_ids = list(set(ids)-set(old_models))
266
- if new_ids:
267
- mid = new_ids[0]
 
268
  for attempt in range(1,6):
269
  try:
270
- self.process_model(str(mid))
271
  break
272
  except Exception as e:
273
- logger.error(f"[ERR] process_model {mid}, attempt {attempt}: {e}")
274
  if attempt==5:
275
- logger.error("Skipping model after 5 fails")
276
  else:
277
  await asyncio.sleep(2)
278
- # update logs
279
  old_models.append(mid)
280
- with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
281
  f.write(json.dumps(old_models)+"\n")
282
  f.write(self.repo_ids["current"]+"\n")
283
 
 
284
  self.upload_file(self.config.LOG_FILE, self.repo_ids["log"], self.config.LOG_FILE)
285
  self.upload_file(self.config.LIST_FILE, self.repo_ids["model_list"], self.config.LIST_FILE)
286
  else:
287
- # no new
288
- with open(self.config.LOG_FILE,"w",encoding="utf-8") as f:
289
- f.write(json.dumps(ids)+"\n")
290
  f.write(self.repo_ids["current"]+"\n")
291
  self.upload_file(self.config.LOG_FILE, self.repo_ids["log"], self.config.LOG_FILE)
292
- logger.info("No new models => wait 60s")
293
  await asyncio.sleep(60)
294
  continue
295
  except Exception as e:
296
- logger.error(f"[ERR] crawl loop => {e}")
297
  await asyncio.sleep(300)
298
 
299
- # === FastAPI
300
  config = Config()
301
  crawler = CivitAICrawler(config)
302
  app = crawler.app
 
25
  LOG_FILE = "civitai_backup.log"
26
  LIST_FILE = "model_list.log"
27
  REPO_IDS = {
28
+ "log": "ttttdiva/CivitAI_log_test", # ←ログ用リポジトリ
29
+ "model_list": "ttttdiva/CivitAI_model_info_test", # ←モデル一覧用リポジトリ
30
+ "current": "" # ←実際のアップ先
31
  }
32
  URLS = {
33
  "latest": "https://civitai.com/api/v1/models?sort=Newest",
 
55
  self.setup_routes()
56
 
57
  def setup_rclone_conf(self):
 
58
  import base64
59
  rclone_b64 = os.environ.get("RCLONE_CONF_BASE64", "")
60
+ if rclone_b64:
61
+ conf_dir = ".rclone_config"
62
+ os.makedirs(conf_dir, exist_ok=True)
63
+ conf_path = os.path.join(conf_dir, "rclone.conf")
64
+ with open(conf_path, "wb") as f:
65
+ f.write(base64.b64decode(rclone_b64))
66
+ os.environ["RCLONE_CONFIG"] = conf_path
67
+ logger.info(f"[OK] Created rclone.conf => {conf_path}")
68
+ else:
69
+ logger.warning("[WARN] RCLONE_CONF_BASE64 is empty. rclone may fail.")
70
 
71
  def setup_routes(self):
72
  @self.app.get("/")
73
  def read_root():
74
  now = str(datetime.datetime.now(self.jst))
75
  return {
76
+ "description": f"CivitAI crawler. Time: {now}",
77
+ "repo_current": self.repo_ids["current"]
 
78
  }
79
 
80
  @self.app.on_event("startup")
81
  async def startup_event():
82
  asyncio.create_task(self.crawl())
83
 
84
+ # ここで「download_file」や「encrypt_and_upload_folder」等のサポート関数を定義
85
+
86
+ def download_file(self, url: str, dest_folder: str, filename: str) -> Optional[str]:
87
+ """実際にファイルをダウンロードし、dest_folder/filename に保存。"""
88
+ os.makedirs(dest_folder, exist_ok=True)
89
  try:
90
+ r = requests.get(url, headers=self.config.HEADERS, stream=True)
91
+ r.raise_for_status()
92
  except requests.RequestException as e:
93
+ logger.error(f"[ERR] download_file: {e}")
94
  return None
95
 
96
+ file_path = os.path.join(dest_folder, filename)
97
  with open(file_path, 'wb') as f:
98
+ for chunk in r.iter_content(chunk_size=8192):
99
  f.write(chunk)
100
+ logger.info(f"[OK] Downloaded => {file_path}")
101
  return file_path
102
 
103
+ def upload_file(self, file_path: str, repo_id: Optional[str]=None, path_in_repo: Optional[str]=None):
104
+ """単一ファイルアップロード用メソッド"""
105
+ if repo_id is None:
106
+ repo_id = self.repo_ids["current"]
107
+ if path_in_repo is None:
108
+ path_in_repo = os.path.basename(file_path)
 
109
 
 
110
  try:
111
+ self.api.upload_file(
112
+ path_or_fileobj=file_path,
113
+ repo_id=repo_id,
114
+ path_in_repo=path_in_repo
115
+ )
116
+ logger.info(f"[OK] Uploaded file => {repo_id}:{path_in_repo}")
117
+ except Exception as e:
118
+ logger.error(f"[ERR] upload_file: {e}")
119
+
120
+ def upload_folder(self, folder_path: str, path_in_repo: Optional[str] = None):
121
+ """フォルダ丸ごとアッ���ロード"""
122
+ if path_in_repo is None:
123
+ path_in_repo = os.path.basename(folder_path)
124
+
125
+ try:
126
+ self.api.upload_folder(
127
+ folder_path=folder_path,
128
+ repo_id=self.repo_ids["current"],
129
+ path_in_repo=path_in_repo
130
+ )
131
+ logger.info(f"[OK] uploaded folder => {folder_path} => {self.repo_ids['current']}:{path_in_repo}")
132
  except Exception as e:
133
+ logger.error(f"[ERR] upload_folder: {e}")
 
134
 
135
  def encrypt_and_upload_folder(self, local_folder: str) -> Optional[str]:
136
+ """local_folder -> cryptLocal: => encrypted/??? => upload_folder => cleanup"""
 
 
 
137
  if not os.path.isdir(local_folder):
138
  logger.error(f"[ERR] {local_folder} is not a directory.")
139
  return None
140
+
141
  encrypted_dir = os.path.join(os.getcwd(), "encrypted")
142
  os.makedirs(encrypted_dir, exist_ok=True)
143
 
144
  before = set(os.listdir(encrypted_dir))
145
+ # rclone copy
146
+ cmd = ["rclone", "copy", local_folder, "cryptLocal:", "--create-empty-src-dirs"]
147
+ logger.info(f"[CMD] {' '.join(cmd)}")
148
  try:
149
+ subprocess.run(cmd, check=True)
150
+ logger.info(f"[OK] rclone copy => cryptLocal:")
151
  except subprocess.CalledProcessError as e:
152
  logger.error(f"[ERR] rclone copy failed: {e}")
153
  return None
 
155
  after = set(os.listdir(encrypted_dir))
156
  diff = after - before
157
  if not diff:
158
+ logger.error("[ERR] no new directory in ./encrypted after copy")
159
  return None
160
  if len(diff) > 1:
161
  logger.warning(f"[WARN] multiple new dirs => {diff}")
162
+ enc_name = diff.pop()
163
+ enc_path = os.path.join(encrypted_dir, enc_name)
164
+ if not os.path.isdir(enc_path):
165
+ logger.error(f"[ERR] {enc_path} is not a directory.")
 
166
  return None
167
 
168
+ # HF upload folder
169
  try:
170
+ self.upload_folder(enc_path, path_in_repo=enc_name)
 
 
 
 
 
171
  except Exception as e:
172
+ logger.error(f"[ERR] encrypt_and_upload_folder => upload_folder: {e}")
173
 
174
+ # cleanup
175
  shutil.rmtree(local_folder, ignore_errors=True)
176
+ shutil.rmtree(enc_path, ignore_errors=True)
177
+ logger.info(f"[CLEANUP] removed {local_folder} & {enc_path}")
178
+ return enc_name
179
+
180
+ # ここで「download_and_process_versions」(=古いバージョン含めまとめてダウンロード) 定義
181
+ # => 今回は名前を合わせ「download_and_process_versions」に合わせる
182
+
183
+ def download_and_process_versions(self, model_versions: list, folder: str):
184
+ """
185
+ 例: 最新バージョン => folder
186
+ 古いバージョン => folder/old_versions
187
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  if not model_versions:
189
  return
190
+
191
+ # 最新
192
  latest = model_versions[0]
193
  for f_info in latest.get("files", []):
194
  url = f_info["downloadUrl"]
195
  fname = f_info["name"]
196
  self.download_file(url, folder, fname)
197
 
198
+ # 古い
199
  if len(model_versions) > 1:
200
  ov_folder = os.path.join(folder, "old_versions")
201
  os.makedirs(ov_folder, exist_ok=True)
 
205
  fname = f_info["name"]
206
  self.download_file(url, ov_folder, fname)
207
 
208
+ def get_model_info_json(self, model_info: dict, folder: str):
209
+ """model_info.jsonを保存"""
210
+ info_path = os.path.join(folder, "model_info.json")
211
+ try:
212
+ with open(info_path, 'w', encoding='utf-8') as f:
213
+ json.dump(model_info, f, indent=2)
214
+ logger.info(f"[OK] saved model_info.json => {info_path}")
215
+ except Exception as e:
216
+ logger.error(f"[ERR] saving model_info.json: {e}")
217
+
218
+ def process_model(self, model_id: str):
219
+ """本体処理: フォルダにまとめてDL => encrypt => upload"""
220
+ try:
221
+ info = self.get_model_info(model_id)
222
+ if not info or "modelVersions" not in info:
223
+ logger.error(f"[ERR] no modelVersions for {model_id}")
224
+ return
225
+ versions = info["modelVersions"]
226
+ folder_name = re.sub(r'[\\/*?:"<>|]', '_', info.get("name","UnknownModel"))
227
+ # local_folderベースに作りたいならこう↓
228
+ # local_folder = "local_folder"
229
+ # os.makedirs(local_folder, exist_ok=True)
230
+ # folder_path = os.path.join(local_folder, folder_name)
231
+ # ここでは、とりあえずapp直下に folder_name作成
232
+ folder_path = folder_name
233
+
234
+ if os.path.exists(folder_path):
235
+ shutil.rmtree(folder_path)
236
+ os.makedirs(folder_path, exist_ok=True)
237
+ logger.info(f"[OK] created folder => {folder_path}")
238
+
239
+ # ダウンロード
240
+ self.download_and_process_versions(versions, folder_path)
241
+
242
+ # 画像
243
+ self.download_images(versions, folder_path)
244
+
245
+ # model_info.json
246
+ self.get_model_info_json(info, folder_path)
247
+
248
+ # 暗号化アップ
249
+ enc_folder = self.encrypt_and_upload_folder(folder_path)
250
+ if enc_folder is None:
251
+ enc_folder = "[ENCRYPT_FAILED]"
252
+ # (Optional) model_list.log に追記 など
253
+ except Exception as e:
254
+ logger.error(f"Error in process_model({model_id}): {e}")
255
 
256
  async def crawl(self):
257
  while True:
258
  try:
 
259
  login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
260
+ # model_list, log_file のダウンロード
261
+ mlist_path = hf_hub_download(repo_id=self.repo_ids["model_list"], filename=self.config.LIST_FILE)
 
262
  shutil.copyfile(mlist_path, f"./{self.config.LIST_FILE}")
263
 
264
+ lfile_path = hf_hub_download(repo_id=self.repo_ids["log"], filename=self.config.LOG_FILE)
265
+ shutil.copyfile(lfile_path, f"./{self.config.LOG_FILE}")
 
266
 
267
  # read logs
268
+ with open(self.config.LOG_FILE, 'r', encoding='utf-8') as f:
269
  lines = f.read().splitlines()
270
  old_models = json.loads(lines[0]) if len(lines)>0 else []
271
  self.repo_ids["current"] = lines[1] if len(lines)>1 else ""
 
274
  r = requests.get(self.config.URLS["latest"], headers=self.config.HEADERS)
275
  r.raise_for_status()
276
  items = r.json().get("items", [])
277
+ new_ids = [it["id"] for it in items if "id" in it]
278
 
279
+ # diff
280
+ diff_ids = list(set(new_ids) - set(old_models))
281
+ if diff_ids:
282
+ mid = diff_ids[0]
283
  for attempt in range(1,6):
284
  try:
285
+ self.process_model(str(mid)) # モデルIDはstrにして渡す
286
  break
287
  except Exception as e:
288
+ logger.error(f"[ERR] process_model {mid} (attempt {attempt}): {e}")
289
  if attempt==5:
290
+ logger.error(f"[SKIP] model {mid} after 5 fails")
291
  else:
292
  await asyncio.sleep(2)
293
+
294
  old_models.append(mid)
295
+ with open(self.config.LOG_FILE,'w',encoding='utf-8') as f:
296
  f.write(json.dumps(old_models)+"\n")
297
  f.write(self.repo_ids["current"]+"\n")
298
 
299
+ # アップロードログ
300
  self.upload_file(self.config.LOG_FILE, self.repo_ids["log"], self.config.LOG_FILE)
301
  self.upload_file(self.config.LIST_FILE, self.repo_ids["model_list"], self.config.LIST_FILE)
302
  else:
303
+ with open(self.config.LOG_FILE,'w',encoding='utf-8') as f:
304
+ f.write(json.dumps(new_ids)+"\n")
 
305
  f.write(self.repo_ids["current"]+"\n")
306
  self.upload_file(self.config.LOG_FILE, self.repo_ids["log"], self.config.LOG_FILE)
307
+ logger.info("[INFO] no new models => sleep(60)")
308
  await asyncio.sleep(60)
309
  continue
310
  except Exception as e:
311
+ logger.error(f"[ERR] crawl => {e}")
312
  await asyncio.sleep(300)
313
 
314
+ # FastAPI
315
  config = Config()
316
  crawler = CivitAICrawler(config)
317
  app = crawler.app