File size: 4,536 Bytes
b613c3c
 
 
 
 
 
aa1e877
b613c3c
 
 
 
 
 
 
89bbef2
 
 
 
b613c3c
 
c8cb798
b613c3c
 
 
 
 
 
 
 
 
246c64e
b613c3c
 
 
 
246c64e
b613c3c
 
 
 
246c64e
b613c3c
 
 
 
246c64e
 
 
b613c3c
246c64e
b613c3c
 
 
 
246c64e
b613c3c
 
 
 
 
246c64e
b613c3c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa1e877
 
 
246c64e
aa1e877
 
b613c3c
 
 
 
 
aa1e877
b613c3c
 
 
 
aa1e877
b613c3c
aa1e877
 
 
 
246c64e
aa1e877
 
b613c3c
 
 
 
 
 
aa1e877
b613c3c
 
 
 
aa1e877
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""
Main Import Service for Video Model Studio.
Delegates to specialized handler classes for different import types.
"""

import logging
from typing import List, Dict, Optional, Tuple, Any, Union, Callable
from pathlib import Path
import gradio as gr

from huggingface_hub import HfApi

from vms.config import HF_API_TOKEN

from vms.ui.project.services.importing.file_upload import FileUploadHandler
from vms.ui.project.services.importing.youtube import YouTubeDownloader
from vms.ui.project.services.importing.hub_dataset import HubDatasetBrowser

logger = logging.getLogger(__name__)

class ImportingService:
    """Main service class for handling imports from various sources"""
    
    def __init__(self):
        """Initialize the import service and handlers"""
        self.hf_api = HfApi(token=HF_API_TOKEN)
        self.file_handler = FileUploadHandler()
        self.youtube_handler = YouTubeDownloader()
        self.hub_browser = HubDatasetBrowser(self.hf_api)
    
    def process_uploaded_files(self, file_paths: List[str], enable_splitting: bool) -> str:
        """Process uploaded file (ZIP, TAR, MP4, or image)
        
        Args:
            file_paths: File paths to the uploaded files from Gradio
            enable_splitting: Whether to enable automatic video splitting
                
        Returns:
            Status message string
        """
        print(f"process_uploaded_files(...,  enable_splitting = { enable_splitting})")
        if not file_paths or len(file_paths) == 0:
            logger.warning("No files provided to process_uploaded_files")
            return "No files provided"
        
        print(f"process_uploaded_files(..., enable_splitting = {enable_splitting:})")
        print(f"process_uploaded_files: calling self.file_handler.process_uploaded_files")
        return self.file_handler.process_uploaded_files(file_paths, enable_splitting)
    
    def download_youtube_video(self, url: str, enable_splitting: bool, progress=None) -> str:
        """Download a video from YouTube
        
        Args:
            url: YouTube video URL
            enable_splitting: Whether to enable automatic video splitting
            progress: Optional Gradio progress indicator
            
        Returns:
            Status message string
        """
        return self.youtube_handler.download_video(url, enable_splitting, progress)
    
    def search_datasets(self, query: str) -> List[List[str]]:
        """Search for datasets on the Hugging Face Hub
        
        Args:
            query: Search query string
            
        Returns:
            List of datasets matching the query [id, title, downloads]
        """
        return self.hub_browser.search_datasets(query)
    
    def get_dataset_info(self, dataset_id: str) -> Tuple[str, Dict[str, int], Dict[str, List[str]]]:
        """Get detailed information about a dataset
        
        Args:
            dataset_id: The dataset ID to get information for
            
        Returns:
            Tuple of (markdown_info, file_counts, file_groups)
        """
        return self.hub_browser.get_dataset_info(dataset_id)
    
    async def download_dataset(
        self, 
        dataset_id: str, 
        enable_splitting: bool,
        progress_callback: Optional[Callable] = None
    ) -> Tuple[str, str]:
        """Download a dataset and process its video/image content
        
        Args:
            dataset_id: The dataset ID to download
            enable_splitting: Whether to enable automatic video splitting
            progress_callback: Optional callback for progress tracking
            
        Returns:
            Tuple of (loading_msg, status_msg)
        """
        return await self.hub_browser.download_dataset(dataset_id, enable_splitting, progress_callback)
    
    async def download_file_group(
        self, 
        dataset_id: str, 
        file_type: str, 
        enable_splitting: bool,
        progress_callback: Optional[Callable] = None
    ) -> str:
        """Download a group of files (videos or WebDatasets)
        
        Args:
            dataset_id: The dataset ID
            file_type: Type of file ("video" or "webdataset")
            enable_splitting: Whether to enable automatic video splitting
            progress_callback: Optional callback for progress tracking
            
        Returns:
            Status message
        """
        return await self.hub_browser.download_file_group(dataset_id, file_type, enable_splitting, progress_callback)