Spaces:
Running
Running
""" | |
Main Import Service for Video Model Studio. | |
Delegates to specialized handler classes for different import types. | |
""" | |
import logging | |
from typing import List, Dict, Optional, Tuple, Any, Union, Callable | |
from pathlib import Path | |
import gradio as gr | |
from huggingface_hub import HfApi | |
from vms.config import HF_API_TOKEN | |
from vms.ui.project.services.importing.file_upload import FileUploadHandler | |
from vms.ui.project.services.importing.youtube import YouTubeDownloader | |
from vms.ui.project.services.importing.hub_dataset import HubDatasetBrowser | |
logger = logging.getLogger(__name__) | |
class ImportingService: | |
"""Main service class for handling imports from various sources""" | |
def __init__(self): | |
"""Initialize the import service and handlers""" | |
self.hf_api = HfApi(token=HF_API_TOKEN) | |
self.file_handler = FileUploadHandler() | |
self.youtube_handler = YouTubeDownloader() | |
self.hub_browser = HubDatasetBrowser(self.hf_api) | |
def process_uploaded_files(self, file_paths: List[str], enable_splitting: bool) -> str: | |
"""Process uploaded file (ZIP, TAR, MP4, or image) | |
Args: | |
file_paths: File paths to the uploaded files from Gradio | |
enable_splitting: Whether to enable automatic video splitting | |
Returns: | |
Status message string | |
""" | |
print(f"process_uploaded_files(..., enable_splitting = { enable_splitting})") | |
if not file_paths or len(file_paths) == 0: | |
logger.warning("No files provided to process_uploaded_files") | |
return "No files provided" | |
print(f"process_uploaded_files(..., enable_splitting = {enable_splitting:})") | |
print(f"process_uploaded_files: calling self.file_handler.process_uploaded_files") | |
return self.file_handler.process_uploaded_files(file_paths, enable_splitting) | |
def download_youtube_video(self, url: str, enable_splitting: bool, progress=None) -> str: | |
"""Download a video from YouTube | |
Args: | |
url: YouTube video URL | |
enable_splitting: Whether to enable automatic video splitting | |
progress: Optional Gradio progress indicator | |
Returns: | |
Status message string | |
""" | |
return self.youtube_handler.download_video(url, enable_splitting, progress) | |
def search_datasets(self, query: str) -> List[List[str]]: | |
"""Search for datasets on the Hugging Face Hub | |
Args: | |
query: Search query string | |
Returns: | |
List of datasets matching the query [id, title, downloads] | |
""" | |
return self.hub_browser.search_datasets(query) | |
def get_dataset_info(self, dataset_id: str) -> Tuple[str, Dict[str, int], Dict[str, List[str]]]: | |
"""Get detailed information about a dataset | |
Args: | |
dataset_id: The dataset ID to get information for | |
Returns: | |
Tuple of (markdown_info, file_counts, file_groups) | |
""" | |
return self.hub_browser.get_dataset_info(dataset_id) | |
async def download_dataset( | |
self, | |
dataset_id: str, | |
enable_splitting: bool, | |
progress_callback: Optional[Callable] = None | |
) -> Tuple[str, str]: | |
"""Download a dataset and process its video/image content | |
Args: | |
dataset_id: The dataset ID to download | |
enable_splitting: Whether to enable automatic video splitting | |
progress_callback: Optional callback for progress tracking | |
Returns: | |
Tuple of (loading_msg, status_msg) | |
""" | |
return await self.hub_browser.download_dataset(dataset_id, enable_splitting, progress_callback) | |
async def download_file_group( | |
self, | |
dataset_id: str, | |
file_type: str, | |
enable_splitting: bool, | |
progress_callback: Optional[Callable] = None | |
) -> str: | |
"""Download a group of files (videos or WebDatasets) | |
Args: | |
dataset_id: The dataset ID | |
file_type: Type of file ("video" or "webdataset") | |
enable_splitting: Whether to enable automatic video splitting | |
progress_callback: Optional callback for progress tracking | |
Returns: | |
Status message | |
""" | |
return await self.hub_browser.download_file_group(dataset_id, file_type, enable_splitting, progress_callback) |