File size: 7,428 Bytes
0ad7e2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76eb17f
0ad7e2a
 
76eb17f
 
 
0ad7e2a
 
 
76eb17f
 
0ad7e2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64a70c0
0ad7e2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64a70c0
0ad7e2a
 
 
 
 
 
 
 
 
 
64a70c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ad7e2a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
"""
Import tab for Video Model Studio UI
"""

import gradio as gr
import logging
import asyncio
from pathlib import Path
from typing import Dict, Any, List, Optional

from .base_tab import BaseTab
from ..config import (
    VIDEOS_TO_SPLIT_PATH, DEFAULT_PROMPT_PREFIX, DEFAULT_CAPTIONING_BOT_INSTRUCTIONS
)

logger = logging.getLogger(__name__)

class ImportTab(BaseTab):
    """Import tab for uploading videos and images"""
    
    def __init__(self, app_state):
        super().__init__(app_state)
        self.id = "import_tab"
        self.title = "1️⃣  Import"
    
    def create(self, parent=None) -> gr.TabItem:
        """Create the Import tab UI components"""
        with gr.TabItem(self.title, id=self.id) as tab:
            with gr.Row():
                gr.Markdown("## Automatic splitting and captioning")
            
            with gr.Row():
                self.components["enable_automatic_video_split"] = gr.Checkbox(
                    label="Automatically split videos into smaller clips",
                    info="Note: a clip is a single camera shot, usually a few seconds",
                    value=True,
                    visible=True
                )
                self.components["enable_automatic_content_captioning"] = gr.Checkbox(
                    label="Automatically caption photos and videos",
                    info="Note: this uses LlaVA and takes some extra time to load and process",
                    value=False,
                    visible=True,
                )
                
            with gr.Row():
                with gr.Column(scale=3):
                    with gr.Row():
                        with gr.Column():
                            gr.Markdown("## Import files")
                            gr.Markdown("You can upload either:")
                            gr.Markdown("- A single MP4 video file")
                            gr.Markdown("- A ZIP archive containing multiple videos/images and optional caption files")
                            gr.Markdown("- A WebDataset shard (.tar file)")
                            gr.Markdown("- A ZIP archive containing WebDataset shards (.tar files)")
                                
                    with gr.Row():
                        self.components["files"] = gr.Files(
                            label="Upload Images, Videos, ZIP or WebDataset",
                            file_types=[".jpg", ".jpeg", ".png", ".webp", ".webp", ".avif", ".heic", ".mp4", ".zip", ".tar"],
                            type="filepath"
                        )
       
                with gr.Column(scale=3):
                    with gr.Row():
                        with gr.Column():
                            gr.Markdown("## Import a YouTube video")
                            gr.Markdown("You can also use a YouTube video as reference, by pasting its URL here:")

                    with gr.Row():
                        self.components["youtube_url"] = gr.Textbox(
                            label="Import YouTube Video",
                            placeholder="https://www.youtube.com/watch?v=..."
                        )
                    with gr.Row():
                        self.components["youtube_download_btn"] = gr.Button("Download YouTube Video", variant="secondary")
            with gr.Row():
                self.components["import_status"] = gr.Textbox(label="Status", interactive=False)

        return tab
    
    def connect_events(self) -> None:
        """Connect event handlers to UI components"""
        # File upload event
        self.components["files"].upload(
            fn=lambda x: self.app.importer.process_uploaded_files(x),
            inputs=[self.components["files"]],
            outputs=[self.components["import_status"]]
        ).success(
            fn=self.update_titles_after_import,
            inputs=[
                self.components["enable_automatic_video_split"], 
                self.components["enable_automatic_content_captioning"], 
                self.app.tabs["caption_tab"].components["custom_prompt_prefix"]
            ],
            outputs=[
                self.app.tabs_component,  # Main tabs component 
                self.app.tabs["split_tab"].components["video_list"],
                self.app.tabs["split_tab"].components["detect_status"],
                self.app.tabs["split_tab"].components["split_title"],
                self.app.tabs["caption_tab"].components["caption_title"],
                self.app.tabs["train_tab"].components["train_title"]
            ]
        )
        
        # YouTube download event
        self.components["youtube_download_btn"].click(
            fn=self.app.importer.download_youtube_video,
            inputs=[self.components["youtube_url"]],
            outputs=[self.components["import_status"]]
        ).success(
            fn=self.on_import_success,
            inputs=[
                self.components["enable_automatic_video_split"],
                self.components["enable_automatic_content_captioning"],
                self.app.tabs["caption_tab"].components["custom_prompt_prefix"]
            ],
            outputs=[
                self.app.tabs_component,
                self.app.tabs["split_tab"].components["video_list"],
                self.app.tabs["split_tab"].components["detect_status"]
            ]
        )
        
    async def on_import_success(self, enable_splitting, enable_automatic_content_captioning, prompt_prefix):
        """Handle successful import of files"""
        videos = self.app.tabs["split_tab"].list_unprocessed_videos()
        
        # If scene detection isn't already running and there are videos to process,
        # and auto-splitting is enabled, start the detection
        if videos and not self.app.splitter.is_processing() and enable_splitting:
            await self.app.tabs["split_tab"].start_scene_detection(enable_splitting)
            msg = "Starting automatic scene detection..."
        else:
            # Just copy files without splitting if auto-split disabled
            for video_file in VIDEOS_TO_SPLIT_PATH.glob("*.mp4"):
                await self.app.splitter.process_video(video_file, enable_splitting=False)
            msg = "Copying videos without splitting..."
        
        self.app.tabs["caption_tab"].copy_files_to_training_dir(prompt_prefix)

        # Start auto-captioning if enabled, and handle async generator properly
        if enable_automatic_content_captioning:
            # Create a background task for captioning
            asyncio.create_task(self.app.tabs["caption_tab"]._process_caption_generator(
                DEFAULT_CAPTIONING_BOT_INSTRUCTIONS,
                prompt_prefix
            ))
        
        return {
            "tabs": gr.Tabs(selected="split_tab"),
            "video_list": videos,
            "detect_status": msg
        }
        
    async def update_titles_after_import(self, enable_splitting, enable_automatic_content_captioning, prompt_prefix):
        """Handle post-import updates including titles"""
        import_result = await self.on_import_success(enable_splitting, enable_automatic_content_captioning, prompt_prefix)
        titles = self.app.update_titles()
        return (
            import_result["tabs"],
            import_result["video_list"],
            import_result["detect_status"],
            *titles
        )