jbilcke-hf HF Staff commited on
Commit
347756a
·
1 Parent(s): 5a793ee
Files changed (1) hide show
  1. vms/utils/finetrainers_utils.py +14 -6
vms/utils/finetrainers_utils.py CHANGED
@@ -136,7 +136,7 @@ def copy_files_to_training_dir(prompt_prefix: str, training_videos_path=None) ->
136
  Number of copied pairs
137
  """
138
 
139
- gr.Info("Copying assets to the training dataset..")
140
 
141
  # Get project ID from global config
142
  config = load_global_config()
@@ -162,8 +162,17 @@ def copy_files_to_training_dir(prompt_prefix: str, training_videos_path=None) ->
162
  all_files = video_files + image_files
163
 
164
  nb_copied_pairs = 0
 
165
 
166
  for file_path in all_files:
 
 
 
 
 
 
 
 
167
 
168
  caption = ""
169
  file_caption_path = file_path.with_suffix('.txt')
@@ -181,10 +190,6 @@ def copy_files_to_training_dir(prompt_prefix: str, training_videos_path=None) ->
181
  logger.debug(f"Found parent caption file: {parent_caption_path}")
182
  parent_caption = parent_caption_path.read_text().strip()
183
 
184
- target_file_path = training_videos_path / file_path.name
185
-
186
- target_caption_path = target_file_path.with_suffix('.txt')
187
-
188
  if parent_caption and not caption.endswith(parent_caption):
189
  caption = f"{caption}\n{parent_caption}"
190
 
@@ -213,7 +218,10 @@ def copy_files_to_training_dir(prompt_prefix: str, training_videos_path=None) ->
213
  training_path, _, _, _ = get_project_paths(project_id)
214
  prepare_finetrainers_dataset(training_path, training_videos_path)
215
 
216
- gr.Info(f"Successfully generated the training dataset ({nb_copied_pairs} pairs)")
 
 
 
217
 
218
  return nb_copied_pairs
219
 
 
136
  Number of copied pairs
137
  """
138
 
139
+ gr.Info("Copying new assets to the training dataset..")
140
 
141
  # Get project ID from global config
142
  config = load_global_config()
 
162
  all_files = video_files + image_files
163
 
164
  nb_copied_pairs = 0
165
+ nb_skipped_pairs = 0
166
 
167
  for file_path in all_files:
168
+ target_file_path = training_videos_path / file_path.name
169
+ target_caption_path = target_file_path.with_suffix('.txt')
170
+
171
+ # Skip if both file and caption already exist in training directory
172
+ if target_file_path.exists() and target_caption_path.exists():
173
+ logger.debug(f"Skipping {file_path.name} - already exists in training directory")
174
+ nb_skipped_pairs += 1
175
+ continue
176
 
177
  caption = ""
178
  file_caption_path = file_path.with_suffix('.txt')
 
190
  logger.debug(f"Found parent caption file: {parent_caption_path}")
191
  parent_caption = parent_caption_path.read_text().strip()
192
 
 
 
 
 
193
  if parent_caption and not caption.endswith(parent_caption):
194
  caption = f"{caption}\n{parent_caption}"
195
 
 
218
  training_path, _, _, _ = get_project_paths(project_id)
219
  prepare_finetrainers_dataset(training_path, training_videos_path)
220
 
221
+ if nb_skipped_pairs > 0:
222
+ gr.Info(f"Successfully updated the training dataset ({nb_copied_pairs} new pairs, {nb_skipped_pairs} already existed)")
223
+ else:
224
+ gr.Info(f"Successfully generated the training dataset ({nb_copied_pairs} pairs)")
225
 
226
  return nb_copied_pairs
227