Spaces:

jbilcke-hf
/

VideoModelStudio

Running

App Files Files Community

jbilcke-hf HF Staff commited on Mar 3

Commit

76eb17f

1 Parent(s): 222f539

working on adding WebDataset support

Browse files

Files changed (4) hide show

vms/services/importer.py +59 -10
vms/tabs/import_tab.py +6 -5
vms/utils/__init__.py +4 -0
vms/utils/webdataset_handler.py +143 -0

vms/services/importer.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 import shutil
 import zipfile
 import tempfile
 import gradio as gr
 from pathlib import Path
@@ -8,17 +9,18 @@ from typing import List, Dict, Optional, Tuple
 from pytubefix import YouTube
 import logging
-from ..config import NORMALIZE_IMAGES_TO, TRAINING_VIDEOS_PATH, VIDEOS_TO_SPLIT_PATH, TRAINING_PATH, DEFAULT_PROMPT_PREFIX
 from ..utils import normalize_image, is_image_file, is_video_file, add_prefix_to_caption
 logger = logging.getLogger(__name__)
 class ImportService:
     def process_uploaded_files(self, file_paths: List[str]) -> str:
-        """Process uploaded file (ZIP, MP4, or image)
         Args:
-            file_paths: File paths to the ploaded files from Gradio
         Returns:
             Status message string
@@ -34,6 +36,8 @@ class ImportService:
                 if file_ext == '.zip':
                     return self.process_zip_file(file_path)
                 elif file_ext == '.mp4' or file_ext == '.webm':
                     return self.process_mp4_file(file_path, original_name)
                 elif is_image_file(file_path):
@@ -86,7 +90,7 @@ class ImportService:
             raise gr.Error(f"Error processing image file: {str(e)}")
     def process_zip_file(self, file_path: Path) -> str:
-        """Process uploaded ZIP file containing media files
         Args:
             file_path: Path to the uploaded ZIP file
@@ -97,6 +101,7 @@ class ImportService:
         try:
             video_count = 0
             image_count = 0
             # Create temporary directory
             with tempfile.TemporaryDirectory() as temp_dir:
@@ -115,7 +120,16 @@ class ImportService:
                         file_path = Path(root) / file
                         try:
-                            if is_video_file(file_path):
                                 # Copy video to videos_to_split
                                 target_path = VIDEOS_TO_SPLIT_PATH / file_path.name
                                 counter = 1
@@ -137,11 +151,13 @@ class ImportService:
                             # Copy associated caption file if it exists
                             txt_path = file_path.with_suffix('.txt')
-                            if txt_path.exists():
                                 if is_video_file(file_path):
                                     shutil.copy2(txt_path, target_path.with_suffix('.txt'))
                                 elif is_image_file(file_path):
-                                    shutil.copy2(txt_path, target_path.with_suffix('.txt'))
                         except Exception as e:
                             logger.error(f"Error processing {file_path.name}: {str(e)}")
@@ -149,21 +165,54 @@ class ImportService:
             # Generate status message
             parts = []
             if video_count > 0:
-                parts.append(f"{video_count} videos")
             if image_count > 0:
-                parts.append(f"{image_count} images")
             if not parts:
                 return "No supported media files found in ZIP"
-            status = f"Successfully stored {' and '.join(parts)}"
             gr.Info(status)
             return status
         except Exception as e:
             raise gr.Error(f"Error processing ZIP: {str(e)}")
     def process_mp4_file(self, file_path: Path, original_name: str) -> str:
         """Process a single video file

 import os
 import shutil
 import zipfile
+import tarfile
 import tempfile
 import gradio as gr
 from pathlib import Path
 from pytubefix import YouTube
 import logging
+from ..config import NORMALIZE_IMAGES_TO, TRAINING_VIDEOS_PATH, VIDEOS_TO_SPLIT_PATH, STAGING_PATH, DEFAULT_PROMPT_PREFIX
 from ..utils import normalize_image, is_image_file, is_video_file, add_prefix_to_caption
+from ..webdataset import webdataset_handler
 logger = logging.getLogger(__name__)
 class ImportService:
     def process_uploaded_files(self, file_paths: List[str]) -> str:
+        """Process uploaded file (ZIP, TAR, MP4, or image)
         Args:
+            file_paths: File paths to the uploaded files from Gradio
         Returns:
             Status message string
                 if file_ext == '.zip':
                     return self.process_zip_file(file_path)
+                elif file_ext == '.tar':
+                    return self.process_tar_file(file_path)
                 elif file_ext == '.mp4' or file_ext == '.webm':
                     return self.process_mp4_file(file_path, original_name)
                 elif is_image_file(file_path):
             raise gr.Error(f"Error processing image file: {str(e)}")
     def process_zip_file(self, file_path: Path) -> str:
+        """Process uploaded ZIP file containing media files or WebDataset tar files
         Args:
             file_path: Path to the uploaded ZIP file
         try:
             video_count = 0
             image_count = 0
+            tar_count = 0
             # Create temporary directory
             with tempfile.TemporaryDirectory() as temp_dir:
                         file_path = Path(root) / file
                         try:
+                            # Check if it's a WebDataset tar file
+                            if file.lower().endswith('.tar'):
+                                # Process WebDataset shard
+                                vid_count, img_count = webdataset_handler.process_webdataset_shard(
+                                    file_path, VIDEOS_TO_SPLIT_PATH, STAGING_PATH
+                                )
+                                video_count += vid_count
+                                image_count += img_count
+                                tar_count += 1
+                            elif is_video_file(file_path):
                                 # Copy video to videos_to_split
                                 target_path = VIDEOS_TO_SPLIT_PATH / file_path.name
                                 counter = 1
                             # Copy associated caption file if it exists
                             txt_path = file_path.with_suffix('.txt')
+                            if txt_path.exists() and not file.lower().endswith('.tar'):
                                 if is_video_file(file_path):
                                     shutil.copy2(txt_path, target_path.with_suffix('.txt'))
                                 elif is_image_file(file_path):
+                                    caption = txt_path.read_text()
+                                    caption = add_prefix_to_caption(caption, DEFAULT_PROMPT_PREFIX)
+                                    target_path.with_suffix('.txt').write_text(caption)
                         except Exception as e:
                             logger.error(f"Error processing {file_path.name}: {str(e)}")
             # Generate status message
             parts = []
+            if tar_count > 0:
+                parts.append(f"{tar_count} WebDataset shard{'s' if tar_count != 1 else ''}")
             if video_count > 0:
+                parts.append(f"{video_count} video{'s' if video_count != 1 else ''}")
             if image_count > 0:
+                parts.append(f"{image_count} image{'s' if image_count != 1 else ''}")
             if not parts:
                 return "No supported media files found in ZIP"
+            status = f"Successfully stored {', '.join(parts)}"
             gr.Info(status)
             return status
         except Exception as e:
             raise gr.Error(f"Error processing ZIP: {str(e)}")
+    def process_tar_file(self, file_path: Path) -> str:
+        """Process a WebDataset tar file
+        Args:
+            file_path: Path to the uploaded tar file
+        Returns:
+            Status message string
+        """
+        try:
+            video_count, image_count = webdataset_handler.process_webdataset_shard(
+                file_path, VIDEOS_TO_SPLIT_PATH, STAGING_PATH
+            )
+            # Generate status message
+            parts = []
+            if video_count > 0:
+                parts.append(f"{video_count} video{'s' if video_count != 1 else ''}")
+            if image_count > 0:
+                parts.append(f"{image_count} image{'s' if image_count != 1 else ''}")
+            if not parts:
+                return "No supported media files found in WebDataset"
+            status = f"Successfully extracted {' and '.join(parts)} from WebDataset"
+            gr.Info(status)
+            return status
+        except Exception as e:
+            raise gr.Error(f"Error processing WebDataset tar file: {str(e)}")
     def process_mp4_file(self, file_path: Path, original_name: str) -> str:
         """Process a single video file

vms/tabs/import_tab.py CHANGED Viewed

@@ -47,16 +47,17 @@ class ImportTab(BaseTab):
                 with gr.Column(scale=3):
                     with gr.Row():
                         with gr.Column():
-                            gr.Markdown("## Import video files")
                             gr.Markdown("You can upload either:")
                             gr.Markdown("- A single MP4 video file")
-                            gr.Markdown("- A ZIP archive containing multiple videos and optional caption files")
-                            gr.Markdown("For ZIP files: Create a folder containing videos (name is not important) and optional caption files with the same name (eg. `some_video.txt` for `some_video.mp4`)")
                     with gr.Row():
                         self.components["files"] = gr.Files(
-                            label="Upload Images, Videos or ZIP",
-                            file_types=[".jpg", ".jpeg", ".png", ".webp", ".webp", ".avif", ".heic", ".mp4", ".zip"],
                             type="filepath"
                         )

                 with gr.Column(scale=3):
                     with gr.Row():
                         with gr.Column():
+                            gr.Markdown("## Import files")
                             gr.Markdown("You can upload either:")
                             gr.Markdown("- A single MP4 video file")
+                            gr.Markdown("- A ZIP archive containing multiple videos/images and optional caption files")
+                            gr.Markdown("- A WebDataset shard (.tar file)")
+                            gr.Markdown("- A ZIP archive containing WebDataset shards (.tar files)")
                     with gr.Row():
                         self.components["files"] = gr.Files(
+                            label="Upload Images, Videos, ZIP or WebDataset",
+                            file_types=[".jpg", ".jpeg", ".png", ".webp", ".webp", ".avif", ".heic", ".mp4", ".zip", ".tar"],
                             type="filepath"
                         )

vms/utils/__init__.py CHANGED Viewed

@@ -6,6 +6,8 @@ from .image_preprocessing import normalize_image
 from .video_preprocessing import remove_black_bars
 from .finetrainers_utils import prepare_finetrainers_dataset, copy_files_to_training_dir
 __all__ = [
     'validate_model_repo',
     'make_archive',
@@ -30,4 +32,6 @@ __all__ = [
     'prepare_finetrainers_dataset',
     'copy_files_to_training_dir',
 ]

 from .video_preprocessing import remove_black_bars
 from .finetrainers_utils import prepare_finetrainers_dataset, copy_files_to_training_dir
+from . import webdataset_handler
 __all__ = [
     'validate_model_repo',
     'make_archive',
     'prepare_finetrainers_dataset',
     'copy_files_to_training_dir',
+    'webdataset_handler'n
 ]

vms/utils/webdataset_handler.py ADDED Viewed

	@@ -0,0 +1,143 @@

+"""
+WebDataset format handling for Video Model Studio
+"""
+import os
+import tarfile
+import tempfile
+import logging
+from pathlib import Path
+from typing import List, Dict, Tuple, Optional
+from ..utils import is_image_file, is_video_file, extract_scene_info
+logger = logging.getLogger(__name__)
+def is_webdataset_file(file_path: Path) -> bool:
+    """Check if file is a WebDataset tar file
+    Args:
+        file_path: Path to check
+    Returns:
+        bool: True if file has .tar extension
+    """
+    return file_path.suffix.lower() == '.tar'
+def process_webdataset_shard(
+    tar_path: Path,
+    videos_output_dir: Path,
+    staging_output_dir: Path
+) -> Tuple[int, int]:
+    """Process a WebDataset shard (tar file) extracting video/image and caption pairs
+    Args:
+        tar_path: Path to the WebDataset tar file
+        videos_output_dir: Directory to store videos for splitting
+        staging_output_dir: Directory to store images and captions
+    Returns:
+        Tuple of (video_count, image_count)
+    """
+    video_count = 0
+    image_count = 0
+    try:
+        # Dictionary to store grouped files by prefix
+        grouped_files = {}
+        # First pass: collect and group files by prefix
+        with tarfile.open(tar_path, 'r') as tar:
+            for member in tar.getmembers():
+                if member.isdir():
+                    continue
+                # Skip hidden files
+                if os.path.basename(member.name).startswith('.'):
+                    continue
+                # Extract file prefix (everything up to the first dot after the last slash)
+                file_path = Path(member.name)
+                file_name = file_path.name
+                # Get prefix (filename without extensions)
+                # For WebDataset, the prefix is everything up to the first dot
+                prefix_parts = file_name.split('.', 1)
+                if len(prefix_parts) < 2:
+                    # No extension, skip
+                    continue
+                prefix = prefix_parts[0]
+                extension = '.' + prefix_parts[1]
+                # Include directory in the prefix to keep samples grouped correctly
+                full_prefix = str(file_path.parent / prefix) if file_path.parent != Path('.') else prefix
+                if full_prefix not in grouped_files:
+                    grouped_files[full_prefix] = []
+                grouped_files[full_prefix].append((member, extension))
+        # Second pass: extract and process grouped files
+        with tarfile.open(tar_path, 'r') as tar:
+            for prefix, members in grouped_files.items():
+                # Create safe filename from prefix
+                safe_prefix = Path(prefix).name
+                # Find media and caption files
+                media_file = None
+                caption_file = None
+                media_ext = None
+                for member, ext in members:
+                    if ext.lower() in ['.jpg', '.jpeg', '.png', '.webp', '.avif', '.heic']:
+                        media_file = member
+                        media_ext = ext
+                    elif ext.lower() in ['.mp4', '.webm']:
+                        media_file = member
+                        media_ext = ext
+                    elif ext.lower() in ['.txt', '.caption', '.json', '.cls']:
+                        caption_file = member
+                # If we have a media file, process it
+                if media_file:
+                    # Determine if it's video or image
+                    is_video = media_ext.lower() in ['.mp4', '.webm']
+                    # Choose target directory based on media type
+                    target_dir = videos_output_dir if is_video else staging_output_dir
+                    # Create target filename
+                    target_filename = f"{safe_prefix}{media_ext}"
+                    target_path = target_dir / target_filename
+                    # If file already exists, add number suffix
+                    counter = 1
+                    while target_path.exists():
+                        target_path = target_dir / f"{safe_prefix}___{counter}{media_ext}"
+                        counter += 1
+                    # Extract media file
+                    with open(target_path, 'wb') as f:
+                        f.write(tar.extractfile(media_file).read())
+                    # If we have a caption file, extract it too
+                    if caption_file:
+                        caption_text = tar.extractfile(caption_file).read().decode('utf-8', errors='ignore')
+                        # Save caption with media file extension
+                        caption_path = target_path.with_suffix('.txt')
+                        with open(caption_path, 'w', encoding='utf-8') as f:
+                            f.write(caption_text)
+                    # Update counters
+                    if is_video:
+                        video_count += 1
+                    else:
+                        image_count += 1
+    except Exception as e:
+        logger.error(f"Error processing WebDataset file {tar_path}: {e}")
+        raise
+    return video_count, image_count