ttttdiva commited on
Commit
a7e29db
·
verified ·
1 Parent(s): 5546557

Upload main.py

Browse files
Files changed (1) hide show
  1. main.py +39 -17
main.py CHANGED
@@ -128,11 +128,8 @@ class CivitAICrawler:
128
  f"[ERROR] {self.config.ENCRYPTED_DIR} not found. Check your rclone config."
129
  )
130
 
 
131
  def upload_encrypted_files(self, repo_id: str, base_path_in_repo: str = ""):
132
- """
133
- self.config.ENCRYPTED_DIR 以下に生成された暗号化後のファイル・フォルダ構造を再帰的に
134
- Hugging Face にアップロードする。フォルダ名・ファイル名は rclone により暗号化済み。
135
- """
136
  max_retries = 5
137
  for root, dirs, files in os.walk(self.config.ENCRYPTED_DIR):
138
  for fn in files:
@@ -153,9 +150,37 @@ class CivitAICrawler:
153
  )
154
  logger.info(f"[OK] Uploaded => {repo_id}/{upload_path_in_repo}")
155
  break
 
156
  except Exception as e:
157
  attempt += 1
158
  error_message = str(e)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  if "over the limit of 100000 files" in error_message:
160
  logger.warning("Repository file limit exceeded. Creating a new repository...")
161
  self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
@@ -163,20 +188,17 @@ class CivitAICrawler:
163
  attempt = 0
164
  repo_id = self.repo_ids['current']
165
  continue
166
- elif "you can retry this action in about 1 hour" in error_message:
167
- logger.warning("Encountered 'retry in 1 hour' error. Waiting 1 hour before retrying...")
168
- time.sleep(3600)
169
- attempt -= 1
 
 
170
  else:
171
- if attempt < max_retries:
172
- logger.warning(
173
- f"Failed to upload {encrypted_file_path}, retry {attempt}/{max_retries}..."
174
- )
175
- else:
176
- logger.error(
177
- f"Failed to upload after {max_retries} attempts: {encrypted_file_path}"
178
- )
179
- raise
180
 
181
  @staticmethod
182
  def get_filename_from_cd(content_disposition: Optional[str], default_name: str) -> str:
 
128
  f"[ERROR] {self.config.ENCRYPTED_DIR} not found. Check your rclone config."
129
  )
130
 
131
+ # 例: upload_encrypted_files の中の再試行処理
132
  def upload_encrypted_files(self, repo_id: str, base_path_in_repo: str = ""):
 
 
 
 
133
  max_retries = 5
134
  for root, dirs, files in os.walk(self.config.ENCRYPTED_DIR):
135
  for fn in files:
 
150
  )
151
  logger.info(f"[OK] Uploaded => {repo_id}/{upload_path_in_repo}")
152
  break
153
+
154
  except Exception as e:
155
  attempt += 1
156
  error_message = str(e)
157
+
158
+ # ================================
159
+ # 429によるrate-limit検出追加
160
+ # ================================
161
+ # "You have been rate-limited; you can retry this action in 31 minutes."
162
+ # のようなメッセージから時間を抽出し、その時間+1分だけ待機後、再試行
163
+ if "rate-limited" in error_message and "minutes" in error_message:
164
+ import re
165
+ match = re.search(r"in (\d+) minutes?", error_message)
166
+ if match:
167
+ minutes = int(match.group(1))
168
+ # +1分して待機
169
+ minutes += 1
170
+ logger.warning(f"Rate-limited. Waiting {minutes} minutes before retry...")
171
+ time.sleep(minutes * 60)
172
+ attempt -= 1 # 同じ attempt カウントで再試行
173
+ continue
174
+
175
+ # ================================
176
+ # すでにある1時間待機処理
177
+ # ================================
178
+ if "you can retry this action in about 1 hour" in error_message:
179
+ logger.warning("Encountered 'retry in 1 hour' error. Waiting 1 hour before retrying...")
180
+ time.sleep(3600)
181
+ attempt -= 1 # 再試行回数を増やさずにループを続ける
182
+ continue
183
+
184
  if "over the limit of 100000 files" in error_message:
185
  logger.warning("Repository file limit exceeded. Creating a new repository...")
186
  self.repo_ids['current'] = self.increment_repo_name(self.repo_ids['current'])
 
188
  attempt = 0
189
  repo_id = self.repo_ids['current']
190
  continue
191
+
192
+ # 上記以外のエラーの場合
193
+ if attempt < max_retries:
194
+ logger.warning(
195
+ f"Failed to upload {encrypted_file_path}, retry {attempt}/{max_retries}..."
196
+ )
197
  else:
198
+ logger.error(
199
+ f"Failed to upload after {max_retries} attempts: {encrypted_file_path}"
200
+ )
201
+ raise
 
 
 
 
 
202
 
203
  @staticmethod
204
  def get_filename_from_cd(content_disposition: Optional[str], default_name: str) -> str: