ttttdiva commited on
Commit
0261398
·
verified ·
1 Parent(s): a890d5f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +281 -133
main.py CHANGED
@@ -1,142 +1,290 @@
 
 
 
 
1
  import os
2
- import subprocess
3
  import shutil
 
 
4
  import uuid
5
- import base64
6
- import requests
7
 
8
- from huggingface_hub import HfApi, login
 
 
 
 
9
 
10
- # ログ出力用
11
- import logging
12
  logging.basicConfig(level=logging.INFO)
13
  logger = logging.getLogger(__name__)
14
 
15
- # === 環境変数 ===
16
- HUGGINGFACE_API_KEY = os.environ.get("HUGGINGFACE_API_KEY", "")
17
- RCLONE_CONF_BASE64 = os.environ.get("RCLONE_CONF_BASE64", "")
18
- REPO_ID = os.environ.get("REPO_ID", "username/testrepo")
19
- # あなたのアップロード先リポジトリ (例: "ttttdiva/CivitAI_Auto12")
20
-
21
- def setup_rclone_conf():
22
- """RCLONE_CONF_BASE64 から rclone.conf を復元"""
23
- if not RCLONE_CONF_BASE64:
24
- logger.warning("[WARN] RCLONE_CONF_BASE64 not set. rclone may fail.")
25
- return
26
- conf_dir = ".rclone_config"
27
- os.makedirs(conf_dir, exist_ok=True)
28
- conf_path = os.path.join(conf_dir, "rclone.conf")
29
- with open(conf_path, "wb") as f:
30
- f.write(base64.b64decode(RCLONE_CONF_BASE64))
31
- os.environ["RCLONE_CONFIG"] = conf_path
32
- logger.info(f"[INFO] rclone.conf created => {conf_path}")
33
-
34
- def download_file(url: str, dest_folder: str, filename: str):
35
- """URL をダウンロードして dest_folder/filename に保存"""
36
- os.makedirs(dest_folder, exist_ok=True)
37
- try:
38
- r = requests.get(url, stream=True)
39
- r.raise_for_status()
40
- filepath = os.path.join(dest_folder, filename)
41
- with open(filepath, 'wb') as f:
42
- for chunk in r.iter_content(chunk_size=8192):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  f.write(chunk)
44
- logger.info(f"[OK] Downloaded: {filepath}")
45
- return filepath
46
- except Exception as e:
47
- logger.error(f"[ERR] download_file failed: {e}")
48
- return None
49
-
50
- def encrypt_and_upload_folder(local_folder: str):
51
- """
52
- 1) rclone copy local_folder => cryptLocal:
53
- => /home/user/app/encrypted/<暗号フォルダ>
54
- 2) その暗号フォルダを Hugging Face へアップロード
55
- 3) ローカル削除
56
- """
57
- if not os.path.isdir(local_folder):
58
- logger.error(f"[ERR] {local_folder} is not a directory.")
59
- return
60
-
61
- encrypted_dir = os.path.join(os.getcwd(), "encrypted")
62
- os.makedirs(encrypted_dir, exist_ok=True)
63
-
64
- # 差分検知
65
- before = set(os.listdir(encrypted_dir))
66
-
67
- # rclone copy
68
- try:
69
- subprocess.run(["rclone", "copy", local_folder, "cryptLocal:", "--create-empty-src-dirs"], check=True)
70
- logger.info(f"[OK] rclone copy {local_folder} => cryptLocal:")
71
- except subprocess.CalledProcessError as e:
72
- logger.error(f"[ERR] rclone copy failed: {e}")
73
- return
74
-
75
- after = set(os.listdir(encrypted_dir))
76
- diff = after - before
77
- if not diff:
78
- logger.error("[ERR] No new folder in ./encrypted after rclone copy.")
79
- return
80
- if len(diff) > 1:
81
- logger.warning(f"[WARN] multiple new folders? {diff}")
82
- enc_folder_name = diff.pop()
83
- enc_folder_path = os.path.join(encrypted_dir, enc_folder_name)
84
- logger.info(f"[DEBUG] enc_folder_path => {enc_folder_path}")
85
-
86
- if not os.path.isdir(enc_folder_path):
87
- logger.error(f"[ERR] {enc_folder_path} is not a directory.")
88
- return
89
-
90
- # Hugging Face にアップロード (フォルダ)
91
- try:
92
- api = HfApi()
93
- # subfolder_label = enc_folder_name # そのまま
94
- subfolder_label = enc_folder_name # そのまま
95
-
96
- api.upload_folder(
97
- folder_path=enc_folder_path,
98
- repo_id=REPO_ID,
99
- path_in_repo=subfolder_label
100
- )
101
- logger.info(f"[OK] uploaded folder => {enc_folder_path} to {REPO_ID}:{subfolder_label}")
102
- except Exception as e:
103
- logger.error(f"[ERR] upload_folder failed: {e}")
104
-
105
- # cleanup
106
- shutil.rmtree(local_folder, ignore_errors=True)
107
- shutil.rmtree(enc_folder_path, ignore_errors=True)
108
- logger.info(f"[CLEANUP] removed {local_folder} & {enc_folder_path}")
109
-
110
- def main():
111
- logger.info("===== Starting minimal test =====")
112
-
113
- # 1) rclone.conf 復元
114
- setup_rclone_conf()
115
-
116
- # 2) Hugging Face login
117
- if not HUGGINGFACE_API_KEY:
118
- logger.error("[ERR] HUGGINGFACE_API_KEY not set.")
119
- return
120
- login(token=HUGGINGFACE_API_KEY, add_to_git_credential=True)
121
- logger.info("[OK] HF login success")
122
-
123
- # 3) ローカルにフォルダ + ファイル
124
- test_folder = "MyTestModel"
125
- if os.path.exists(test_folder):
126
- shutil.rmtree(test_folder)
127
- os.makedirs(test_folder, exist_ok=True)
128
-
129
- # ダウンロード例 (画像2つ + info.json 1つ)
130
- download_file("https://picsum.photos/200/300", test_folder, "image1.jpg")
131
- download_file("https://picsum.photos/300/300", test_folder, "image2.jpg")
132
-
133
- info_path = os.path.join(test_folder, "model_info.json")
134
- with open(info_path, 'w', encoding='utf-8') as f:
135
- f.write('{"model":"test","desc":"some description"}')
136
- logger.info(f"[OK] Created {info_path}")
137
-
138
- # 4) 暗号化してアップロード
139
- encrypt_and_upload_folder(test_folder)
140
-
141
- if __name__ == "__main__":
142
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import datetime
3
+ import json
4
+ import logging
5
  import os
6
+ import re
7
  import shutil
8
+ import subprocess
9
+ import time
10
  import uuid
11
+ from typing import Optional
 
12
 
13
+ import requests
14
+ from bs4 import BeautifulSoup
15
+ from fake_useragent import UserAgent
16
+ from fastapi import FastAPI
17
+ from huggingface_hub import HfApi, hf_hub_download, login
18
 
 
 
19
  logging.basicConfig(level=logging.INFO)
20
  logger = logging.getLogger(__name__)
21
 
22
+ class Config:
23
+ HUGGINGFACE_API_KEY = os.environ["HUGGINGFACE_API_KEY"]
24
+ CIVITAI_API_TOKEN = os.environ["CIVITAI_API_TOKEN"]
25
+ LOG_FILE = "civitai_backup.log"
26
+ LIST_FILE = "model_list.log"
27
+ REPO_IDS = {
28
+ "log": "ttttdiva/CivitAI_log_test",
29
+ "model_list": "ttttdiva/CivitAI_model_info_test",
30
+ "current": ""
31
+ }
32
+ URLS = {
33
+ "latest": "https://civitai.com/api/v1/models?sort=Newest",
34
+ "modelPage": "https://civitai.com/models/",
35
+ "modelId": "https://civitai.com/api/v1/models/",
36
+ "modelVersionId": "https://civitai.com/api/v1/model-versions/",
37
+ "hash": "https://civitai.com/api/v1/model-versions/by-hash/"
38
+ }
39
+ JST = datetime.timezone(datetime.timedelta(hours=9))
40
+ UA = UserAgent()
41
+ HEADERS = {
42
+ 'Authorization': f'Bearer {CIVITAI_API_TOKEN}',
43
+ 'User-Agent': UA.random,
44
+ "Content-Type": "application/json"
45
+ }
46
+
47
+ class CivitAICrawler:
48
+ def __init__(self, config: Config):
49
+ self.config = config
50
+ self.api = HfApi()
51
+ self.app = FastAPI()
52
+ self.repo_ids = self.config.REPO_IDS.copy()
53
+ self.jst = self.config.JST
54
+ self.setup_rclone_conf()
55
+ self.setup_routes()
56
+
57
+ def setup_rclone_conf(self):
58
+ # RCLONE_CONF_BASE64→rclone.conf復元
59
+ import base64
60
+ rclone_b64 = os.environ.get("RCLONE_CONF_BASE64", "")
61
+ if not rclone_b64:
62
+ logger.warning("[WARN] RCLONE_CONF_BASE64 is empty, rclone may fail.")
63
+ return
64
+ conf_dir = ".rclone_config"
65
+ os.makedirs(conf_dir, exist_ok=True)
66
+ conf_path = os.path.join(conf_dir, "rclone.conf")
67
+ with open(conf_path, "wb") as f:
68
+ f.write(base64.b64decode(rclone_b64))
69
+ os.environ["RCLONE_CONFIG"] = conf_path
70
+ logger.info(f"[OK] rclone.conf => {conf_path}")
71
+
72
+ def setup_routes(self):
73
+ @self.app.get("/")
74
+ def read_root():
75
+ now = str(datetime.datetime.now(self.jst))
76
+ return {
77
+ "description": f"CivitAI crawler. Current time: {now}",
78
+ "repo_current": self.repo_ids["current"],
79
+ "note": "Startup event => self.crawl() loop"
80
+ }
81
+
82
+ @self.app.on_event("startup")
83
+ async def startup_event():
84
+ asyncio.create_task(self.crawl())
85
+
86
+ def download_file(self, url: str, destination_folder: str, default_name: str) -> Optional[str]:
87
+ os.makedirs(destination_folder, exist_ok=True)
88
+ try:
89
+ resp = requests.get(url, headers=self.config.HEADERS, stream=True)
90
+ resp.raise_for_status()
91
+ except requests.RequestException as e:
92
+ logger.error(f"[ERR] download_file fail: {e}")
93
+ return None
94
+
95
+ file_path = os.path.join(destination_folder, default_name)
96
+ with open(file_path, 'wb') as f:
97
+ for chunk in resp.iter_content(chunk_size=8192):
98
  f.write(chunk)
99
+ logger.info(f"[OK] Downloaded: {file_path}")
100
+ return file_path
101
+
102
+ def get_filename_from_cd(self, cd: Optional[str], default_name: str) -> str:
103
+ if cd:
104
+ parts = cd.split(';')
105
+ for p in parts:
106
+ if "filename=" in p:
107
+ return p.split("=")[1].strip().strip('"')
108
+ return default_name
109
+
110
+ def get_model_info(self, model_id: str) -> dict:
111
+ try:
112
+ r = requests.get(f"{self.config.URLS['modelId']}{model_id}", headers=self.config.HEADERS)
113
+ r.raise_for_status()
114
+ return r.json()
115
+ except Exception as e:
116
+ logger.error(f"[ERR] get_model_info({model_id}): {e}")
117
+ return {}
118
+
119
+ def encrypt_and_upload_folder(self, local_folder: str) -> Optional[str]:
120
+ """
121
+ rclone copy local_folder => cryptLocal:
122
+ => diff => upload_folder => cleanup
123
+ """
124
+ if not os.path.isdir(local_folder):
125
+ logger.error(f"[ERR] {local_folder} is not a directory.")
126
+ return None
127
+ encrypted_dir = os.path.join(os.getcwd(), "encrypted")
128
+ os.makedirs(encrypted_dir, exist_ok=True)
129
+
130
+ before = set(os.listdir(encrypted_dir))
131
+
132
+ try:
133
+ subprocess.run(["rclone", "copy", local_folder, "cryptLocal:", "--create-empty-src-dirs"], check=True)
134
+ logger.info(f"[OK] rclone copy {local_folder} => cryptLocal:")
135
+ except subprocess.CalledProcessError as e:
136
+ logger.error(f"[ERR] rclone copy failed: {e}")
137
+ return None
138
+
139
+ after = set(os.listdir(encrypted_dir))
140
+ diff = after - before
141
+ if not diff:
142
+ logger.error("[ERR] No new dir in ./encrypted after rclone copy.")
143
+ return None
144
+ if len(diff) > 1:
145
+ logger.warning(f"[WARN] multiple new dirs => {diff}")
146
+ enc_folder_name = diff.pop()
147
+ enc_folder_path = os.path.join(encrypted_dir, enc_folder_name)
148
+
149
+ if not os.path.isdir(enc_folder_path):
150
+ logger.error(f"[ERR] {enc_folder_path} is not a directory.")
151
+ return None
152
+
153
+ # upload_folder to HF
154
+ try:
155
+ self.api.upload_folder(
156
+ folder_path=enc_folder_path,
157
+ repo_id=self.repo_ids["current"],
158
+ path_in_repo=enc_folder_name
159
+ )
160
+ logger.info(f"[OK] uploaded {enc_folder_path} => {self.repo_ids['current']}:{enc_folder_name}")
161
+ except Exception as e:
162
+ logger.error(f"[ERR] HF upload_folder fail: {e}")
163
+
164
+ # cleanup local
165
+ shutil.rmtree(local_folder, ignore_errors=True)
166
+ shutil.rmtree(enc_folder_path, ignore_errors=True)
167
+ logger.info(f"[CLEANUP] removed {local_folder} & {enc_folder_path}")
168
+
169
+ return enc_folder_name
170
+
171
+ def process_model(self, model_id: str):
172
+ # ダウンロード + 暗号化 + アップ
173
+ try:
174
+ minfo = self.get_model_info(model_id)
175
+ if not minfo or "modelVersions" not in minfo:
176
+ logger.error(f"[ERR] No modelVersions for {model_id}")
177
+ return
178
+
179
+ versions = minfo["modelVersions"]
180
+ # local folder
181
+ folder_name = re.sub(r'[\\/*?:"<>|]', '_', minfo.get("name", "UnknownModel"))
182
+ folder_name += f"_{uuid.uuid4()[:8]}"
183
+ os.makedirs(folder_name, exist_ok=True)
184
+
185
+ # ダウンロード (最新/古い) => まとめて folder_name
186
+ # 画像 => folder_name/images
187
+ # etc...
188
+ self.download_versions(versions, folder_name)
189
+ self.download_images(versions, folder_name)
190
+ # ここでHTMLや model_info.json 保存もやるなら追加:
191
+ # ...
192
+ enc_f = self.encrypt_and_upload_folder(folder_name)
193
+ # => HF upload + cleanup
194
+
195
+ except Exception as e:
196
+ logger.error(f"[ERR] process_model({model_id}) => {e}")
197
+
198
+ def download_versions(self, model_versions: list, folder: str):
199
+ # すべて folder/ にまとめる or old_versions subfolder
200
+ # 例: 最新 => folder, old => folder/old_versions
201
+ if not model_versions:
202
+ return
203
+ latest = model_versions[0]
204
+ for f_info in latest.get("files", []):
205
+ url = f_info["downloadUrl"]
206
+ fname = f_info["name"]
207
+ self.download_file(url, folder, fname)
208
+
209
+ if len(model_versions) > 1:
210
+ ov_folder = os.path.join(folder, "old_versions")
211
+ os.makedirs(ov_folder, exist_ok=True)
212
+ for v in model_versions[1:]:
213
+ for f_info in v.get("files", []):
214
+ url = f_info["downloadUrl"]
215
+ fname = f_info["name"]
216
+ self.download_file(url, ov_folder, fname)
217
+
218
+ def download_images(self, model_versions: list, folder: str):
219
+ images_folder = os.path.join(folder, "images")
220
+ os.makedirs(images_folder, exist_ok=True)
221
+ for v in model_versions:
222
+ for im in v.get("images", []):
223
+ iurl = im["url"]
224
+ iname = os.path.basename(iurl)
225
+ self.download_file(iurl, images_folder, iname)
226
+
227
+ async def crawl(self):
228
+ while True:
229
+ try:
230
+ # HF Login
231
+ login(token=self.config.HUGGINGFACE_API_KEY, add_to_git_credential=True)
232
+
233
+ # model_list.log
234
+ mlist_path = hf_hub_download(self.repo_ids["model_list"], self.config.LIST_FILE)
235
+ shutil.copyfile(mlist_path, f"./{self.config.LIST_FILE}")
236
+
237
+ # log_file
238
+ log_path = hf_hub_download(self.repo_ids["log"], self.config.LOG_FILE)
239
+ shutil.copyfile(log_path, f"./{self.config.LOG_FILE}")
240
+
241
+ # read logs
242
+ with open(self.config.LOG_FILE, "r", encoding="utf-8") as f:
243
+ lines = f.read().splitlines()
244
+ old_models = json.loads(lines[0]) if len(lines)>0 else []
245
+ self.repo_ids["current"] = lines[1] if len(lines)>1 else ""
246
+
247
+ # get newest
248
+ r = requests.get(self.config.URLS["latest"], headers=self.config.HEADERS)
249
+ r.raise_for_status()
250
+ items = r.json().get("items", [])
251
+ ids = [it["id"] for it in items if "id" in it]
252
+
253
+ new_ids = list(set(ids)-set(old_models))
254
+ if new_ids:
255
+ mid = new_ids[0]
256
+ for attempt in range(1,6):
257
+ try:
258
+ self.process_model(str(mid))
259
+ break
260
+ except Exception as e:
261
+ logger.error(f"[ERR] process_model {mid}, attempt {attempt}: {e}")
262
+ if attempt==5:
263
+ logger.error("Skipping model after 5 fails")
264
+ else:
265
+ await asyncio.sleep(2)
266
+ # update logs
267
+ old_models.append(mid)
268
+ with open(self.config.LOG_FILE, "w", encoding="utf-8") as f:
269
+ f.write(json.dumps(old_models)+"\n")
270
+ f.write(self.repo_ids["current"]+"\n")
271
+
272
+ self.upload_file(self.config.LOG_FILE, self.repo_ids["log"], self.config.LOG_FILE)
273
+ self.upload_file(self.config.LIST_FILE, self.repo_ids["model_list"], self.config.LIST_FILE)
274
+ else:
275
+ # no new
276
+ with open(self.config.LOG_FILE,"w",encoding="utf-8") as f:
277
+ f.write(json.dumps(ids)+"\n")
278
+ f.write(self.repo_ids["current"]+"\n")
279
+ self.upload_file(self.config.LOG_FILE, self.repo_ids["log"], self.config.LOG_FILE)
280
+ logger.info("No new models => wait 60s")
281
+ await asyncio.sleep(60)
282
+ continue
283
+ except Exception as e:
284
+ logger.error(f"[ERR] crawl loop => {e}")
285
+ await asyncio.sleep(300)
286
+
287
+ # === FastAPI
288
+ config = Config()
289
+ crawler = CivitAICrawler(config)
290
+ app = crawler.app