Upload main.py
Browse files
main.py
CHANGED
@@ -128,11 +128,8 @@ class CivitAICrawler:
|
|
128 |
f"[ERROR] {self.config.ENCRYPTED_DIR} not found. Check your rclone config."
|
129 |
)
|
130 |
|
|
|
131 |
def upload_encrypted_files(self, repo_id: str, base_path_in_repo: str = ""):
|
132 |
-
"""
|
133 |
-
self.config.ENCRYPTED_DIR 以下に生成された暗号化後のファイル・フォルダ構造を再帰的に
|
134 |
-
Hugging Face にアップロードする。フォルダ名・ファイル名は rclone により暗号化済み。
|
135 |
-
"""
|
136 |
max_retries = 5
|
137 |
for root, dirs, files in os.walk(self.config.ENCRYPTED_DIR):
|
138 |
for fn in files:
|
@@ -153,9 +150,37 @@ class CivitAICrawler:
|
|
153 |
)
|
154 |
logger.info(f"[OK] Uploaded => {repo_id}/{upload_path_in_repo}")
|
155 |
break
|
|
|
156 |
except Exception as e:
|
157 |
attempt += 1
|
158 |
error_message = str(e)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
if "over the limit of 100000 files" in error_message:
|
160 |
logger.warning("Repository file limit exceeded. Creating a new repository...")
|
161 |
self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
|
@@ -163,20 +188,17 @@ class CivitAICrawler:
|
|
163 |
attempt = 0
|
164 |
repo_id = self.repo_ids['current']
|
165 |
continue
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
|
|
|
|
170 |
else:
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
else:
|
176 |
-
logger.error(
|
177 |
-
f"Failed to upload after {max_retries} attempts: {encrypted_file_path}"
|
178 |
-
)
|
179 |
-
raise
|
180 |
|
181 |
@staticmethod
|
182 |
def get_filename_from_cd(content_disposition: Optional[str], default_name: str) -> str:
|
|
|
128 |
f"[ERROR] {self.config.ENCRYPTED_DIR} not found. Check your rclone config."
|
129 |
)
|
130 |
|
131 |
+
# 例: upload_encrypted_files の中の再試行処理
|
132 |
def upload_encrypted_files(self, repo_id: str, base_path_in_repo: str = ""):
|
|
|
|
|
|
|
|
|
133 |
max_retries = 5
|
134 |
for root, dirs, files in os.walk(self.config.ENCRYPTED_DIR):
|
135 |
for fn in files:
|
|
|
150 |
)
|
151 |
logger.info(f"[OK] Uploaded => {repo_id}/{upload_path_in_repo}")
|
152 |
break
|
153 |
+
|
154 |
except Exception as e:
|
155 |
attempt += 1
|
156 |
error_message = str(e)
|
157 |
+
|
158 |
+
# ================================
|
159 |
+
# 429によるrate-limit検出追加
|
160 |
+
# ================================
|
161 |
+
# "You have been rate-limited; you can retry this action in 31 minutes."
|
162 |
+
# のようなメッセージから時間を抽出し、その時間+1分だけ待機後、再試行
|
163 |
+
if "rate-limited" in error_message and "minutes" in error_message:
|
164 |
+
import re
|
165 |
+
match = re.search(r"in (\d+) minutes?", error_message)
|
166 |
+
if match:
|
167 |
+
minutes = int(match.group(1))
|
168 |
+
# +1分して待機
|
169 |
+
minutes += 1
|
170 |
+
logger.warning(f"Rate-limited. Waiting {minutes} minutes before retry...")
|
171 |
+
time.sleep(minutes * 60)
|
172 |
+
attempt -= 1 # 同じ attempt カウントで再試行
|
173 |
+
continue
|
174 |
+
|
175 |
+
# ================================
|
176 |
+
# すでにある1時間待機処理
|
177 |
+
# ================================
|
178 |
+
if "you can retry this action in about 1 hour" in error_message:
|
179 |
+
logger.warning("Encountered 'retry in 1 hour' error. Waiting 1 hour before retrying...")
|
180 |
+
time.sleep(3600)
|
181 |
+
attempt -= 1 # 再試行回数を増やさずにループを続ける
|
182 |
+
continue
|
183 |
+
|
184 |
if "over the limit of 100000 files" in error_message:
|
185 |
logger.warning("Repository file limit exceeded. Creating a new repository...")
|
186 |
self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
|
|
|
188 |
attempt = 0
|
189 |
repo_id = self.repo_ids['current']
|
190 |
continue
|
191 |
+
|
192 |
+
# 上記以外のエラーの場合
|
193 |
+
if attempt < max_retries:
|
194 |
+
logger.warning(
|
195 |
+
f"Failed to upload {encrypted_file_path}, retry {attempt}/{max_retries}..."
|
196 |
+
)
|
197 |
else:
|
198 |
+
logger.error(
|
199 |
+
f"Failed to upload after {max_retries} attempts: {encrypted_file_path}"
|
200 |
+
)
|
201 |
+
raise
|
|
|
|
|
|
|
|
|
|
|
202 |
|
203 |
@staticmethod
|
204 |
def get_filename_from_cd(content_disposition: Optional[str], default_name: str) -> str:
|