jbilcke-hf HF Staff commited on
Commit
e31e7be
·
2 Parent(s): 0e1d4ae 14ba40f
CLAUDE.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Video Model Studio - Guidelines for Claude
2
+
3
+ ## Build & Run Commands
4
+ - Setup: `./setup.sh` (with flash attention) or `./setup_no_captions.sh` (without)
5
+ - Run: `./run.sh` or `python3.10 app.py`
6
+ - Test: `python3 tests/test_dataset.py`
7
+ - Single model test: `bash tests/scripts/dummy_cogvideox_lora.sh`
8
+
9
+ ## Code Style
10
+ - Python version: 3.10 (required for flash-attention compatibility)
11
+ - Type hints: Use typing module annotations for all functions
12
+ - Docstrings: Google style with Args/Returns sections
13
+ - Error handling: Use try/except with specific exceptions, log errors
14
+ - Imports: Group standard lib, third-party, and project imports
15
+ - Naming: snake_case for functions/variables, PascalCase for classes
16
+ - Use Path objects from pathlib instead of string paths
17
+ - Format utility functions: Extract reusable logic to separate functions
18
+ - Environment variables: Use parse_bool_env for boolean env vars
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🎥
4
  colorFrom: gray
5
  colorTo: gray
6
  sdk: gradio
7
- sdk_version: 5.20.1
8
  app_file: app.py
9
  pinned: true
10
  license: apache-2.0
@@ -133,7 +133,11 @@ That said, please see the "RUN" section for info about environement variables.
133
 
134
  ### Dev mode on Hugging Face
135
 
136
- Enable dev mode in the space, then open VSCode in local or remote and run:
 
 
 
 
137
 
138
  ```
139
  pip install -r requirements.txt
@@ -141,6 +145,8 @@ pip install -r requirements.txt
141
 
142
  As this is not automatic, then click on "Restart" in the space dev mode UI widget.
143
 
 
 
144
  ### Full installation somewhere else
145
 
146
  I haven't tested it, but you can try to provided Dockerfile
@@ -217,3 +223,15 @@ By default `run.sh` will store stuff in `.data/` (located inside the current wor
217
  ```bash
218
  ./run.sh
219
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  colorFrom: gray
5
  colorTo: gray
6
  sdk: gradio
7
+ sdk_version: 5.23.3
8
  app_file: app.py
9
  pinned: true
10
  license: apache-2.0
 
133
 
134
  ### Dev mode on Hugging Face
135
 
136
+ I recommend to not use the dev mode for a production usage (ie not use dev mode when training a real model), unless you know what you are doing.
137
+
138
+ That's because the dev mode can be unstable and cause space restarts.
139
+
140
+ If you still want to open the dev mode in the space, then open VSCode in local or remote and run:
141
 
142
  ```
143
  pip install -r requirements.txt
 
145
 
146
  As this is not automatic, then click on "Restart" in the space dev mode UI widget.
147
 
148
+ Important: if you see errors like "API not found" etc, it might indicate an issue with the dev mode and Gradio, not an issue with VMS itself.
149
+
150
  ### Full installation somewhere else
151
 
152
  I haven't tested it, but you can try to provided Dockerfile
 
223
  ```bash
224
  ./run.sh
225
  ```
226
+
227
+ ### Environment Variables
228
+
229
+ - `STORAGE_PATH`: Specifies the base storage path (default: '.data')
230
+ - `HF_API_TOKEN`: Your Hugging Face API token for accessing models and publishing
231
+ - `USE_LARGE_DATASET`: Set to "true" or "1" to enable large dataset mode, which:
232
+ - Hides the caption list in the caption tab
233
+ - Disables preview and editing of individual captions
234
+ - Disables the dataset download button
235
+ - Use this when working with large datasets that would be too slow to display in the UI
236
+ - `PRELOAD_CAPTIONING_MODEL`: Preloads the captioning model at startup
237
+ - `ASK_USER_TO_DUPLICATE_SPACE`: Prompts users to duplicate the space
requirements.txt CHANGED
@@ -1,43 +1,18 @@
1
- numpy>=1.26.4
2
 
3
- # to quote a-r-r-o-w/finetrainers:
4
- # It is recommended to use Pytorch 2.5.1 or above for training. Previous versions can lead to completely black videos, OOM errors, or other issues and are not tested.
5
-
6
- # on some system (Python 3.13+) those do not work:
7
- torch==2.5.1
8
- torchvision==0.20.1
9
- torchao>=0.7.0
10
-
11
- # datasets 3.4.0 replaces decord by torchvision
12
- # let's free it for now
13
- datasets==3.3.2
14
-
15
- huggingface_hub
16
- hf_transfer>=0.1.8
17
- diffusers @ git+https://github.com/huggingface/diffusers.git@main
18
- transformers>=4.45.2
19
-
20
- accelerate
21
- bitsandbytes
22
- peft>=0.12.0
23
 
24
  # For GPU monitoring of NVIDIA chipsets
25
  pynvml
26
 
27
- # eva-decord is missing get_batch it seems
28
- #eva-decord==0.6.1
29
- decord
30
-
31
- finetrainers @ git+https://github.com/a-r-r-o-w/finetrainers.git@main
32
  # temporary fix for pip install bug:
33
  #finetrainers @ git+https://github.com/jbilcke-hf/finetrainers-patches.git@fix_missing_sft_trainer_files
34
 
35
- wandb
36
- pandas
37
- sentencepiece>=0.2.0
38
  imageio
39
  imageio-ffmpeg
40
- torchdata==0.11.0
41
 
42
  flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
43
 
@@ -58,7 +33,7 @@ av==14.1.0
58
  git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
59
 
60
  # for our frontend
61
- gradio==5.20.1
62
  gradio_toggle
63
 
64
  # used for the monitor
 
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  # For GPU monitoring of NVIDIA chipsets
4
  pynvml
5
 
6
+ finetrainers==0.1.0
7
+ #finetrainers @ git+https://github.com/a-r-r-o-w/finetrainers.git@main
 
 
 
8
  # temporary fix for pip install bug:
9
  #finetrainers @ git+https://github.com/jbilcke-hf/finetrainers-patches.git@fix_missing_sft_trainer_files
10
 
11
+ # it is recommended to always use the latest version
12
+ diffusers @ git+https://github.com/huggingface/diffusers.git@main
13
+
14
  imageio
15
  imageio-ffmpeg
 
16
 
17
  flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
18
 
 
33
  git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
34
 
35
  # for our frontend
36
+ gradio==5.23.3
37
  gradio_toggle
38
 
39
  # used for the monitor
requirements_without_flash_attention.txt CHANGED
@@ -1,44 +1,24 @@
1
- numpy>=1.26.4
2
 
3
- # to quote a-r-r-o-w/finetrainers:
4
- # It is recommended to use Pytorch 2.5.1 or above for training. Previous versions can lead to completely black videos, OOM errors, or other issues and are not tested.
5
-
6
- # on some system (Python 3.13+) those do not work:
7
- torch==2.5.1
8
- torchvision==0.20.1
9
- torchao>=0.7.0
10
-
11
- # datasets 3.4.0 replaces decord by torchvision
12
- # let's free it for now
13
- datasets==3.3.2
14
-
15
- huggingface_hub
16
- hf_transfer>=0.1.8
17
- diffusers @ git+https://github.com/huggingface/diffusers.git@main
18
- transformers>=4.45.2
19
-
20
- accelerate
21
- bitsandbytes
22
- peft>=0.12.0
23
 
24
  # For GPU monitoring of NVIDIA chipsets
25
- # you probably won't be able to install that on macOS
26
- # pynvml
27
 
28
  # eva-decord is missing get_batch it seems
29
- eva-decord==0.6.1
30
- # decord
31
 
32
- finetrainers @ git+https://github.com/a-r-r-o-w/finetrainers.git@main
 
33
  # temporary fix for pip install bug:
34
  #finetrainers @ git+https://github.com/jbilcke-hf/finetrainers-patches.git@fix_missing_sft_trainer_files
35
 
36
- wandb
37
- pandas
38
- sentencepiece>=0.2.0
39
  imageio
40
  imageio-ffmpeg
41
- torchdata==0.11.0
 
42
 
43
  # for youtube video download
44
  pytube
@@ -57,7 +37,7 @@ av==14.1.0
57
  git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
58
 
59
  # for our frontend
60
- gradio==5.20.1
61
  gradio_toggle
62
 
63
  # used for the monitor
 
 
1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  # For GPU monitoring of NVIDIA chipsets
4
+ pynvml
 
5
 
6
  # eva-decord is missing get_batch it seems
7
+ #eva-decord==0.6.1
8
+ #decord
9
 
10
+ finetrainers==0.1.0
11
+ #finetrainers @ git+https://github.com/a-r-r-o-w/finetrainers.git@main
12
  # temporary fix for pip install bug:
13
  #finetrainers @ git+https://github.com/jbilcke-hf/finetrainers-patches.git@fix_missing_sft_trainer_files
14
 
15
+ # it is recommended to always use the latest version
16
+ diffusers @ git+https://github.com/huggingface/diffusers.git@main
17
+
18
  imageio
19
  imageio-ffmpeg
20
+
21
+ #flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
22
 
23
  # for youtube video download
24
  pytube
 
37
  git+https://github.com/LLaVA-VL/LLaVA-NeXT.git
38
 
39
  # for our frontend
40
+ gradio==5.23.3
41
  gradio_toggle
42
 
43
  # used for the monitor
vms/config.py CHANGED
@@ -22,6 +22,9 @@ def parse_bool_env(env_value: Optional[str]) -> bool:
22
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
23
  ASK_USER_TO_DUPLICATE_SPACE = parse_bool_env(os.getenv("ASK_USER_TO_DUPLICATE_SPACE"))
24
 
 
 
 
25
  # Base storage path
26
  STORAGE_PATH = Path(os.environ.get('STORAGE_PATH', '.data'))
27
 
 
22
  HF_API_TOKEN = os.getenv("HF_API_TOKEN")
23
  ASK_USER_TO_DUPLICATE_SPACE = parse_bool_env(os.getenv("ASK_USER_TO_DUPLICATE_SPACE"))
24
 
25
+ # For large datasets that would be slow to display or download
26
+ USE_LARGE_DATASET = parse_bool_env(os.getenv("USE_LARGE_DATASET"))
27
+
28
  # Base storage path
29
  STORAGE_PATH = Path(os.environ.get('STORAGE_PATH', '.data'))
30
 
vms/ui/project/tabs/caption_tab.py CHANGED
@@ -11,7 +11,7 @@ from pathlib import Path
11
  import mimetypes
12
 
13
  from vms.utils import BaseTab, is_image_file, is_video_file, copy_files_to_training_dir
14
- from vms.config import DEFAULT_CAPTIONING_BOT_INSTRUCTIONS, DEFAULT_PROMPT_PREFIX, STAGING_PATH
15
 
16
  logger = logging.getLogger(__name__)
17
 
@@ -28,7 +28,10 @@ class CaptionTab(BaseTab):
28
  """Create the Caption tab UI components"""
29
  with gr.TabItem(self.title, id=self.id) as tab:
30
  with gr.Row():
31
- self.components["caption_title"] = gr.Markdown("## Captioning of 0 files (0 bytes)")
 
 
 
32
 
33
  with gr.Row():
34
  with gr.Column():
@@ -62,7 +65,7 @@ class CaptionTab(BaseTab):
62
  interactive=False
63
  )
64
 
65
- with gr.Row():
66
  with gr.Column():
67
  self.components["training_dataset"] = gr.Dataframe(
68
  headers=["name", "status"],
@@ -95,6 +98,10 @@ class CaptionTab(BaseTab):
95
  visible=True
96
  )
97
  self.components["original_file_path"] = gr.State(value=None)
 
 
 
 
98
 
99
  return tab
100
 
@@ -174,10 +181,16 @@ class CaptionTab(BaseTab):
174
 
175
  def refresh(self) -> Dict[str, Any]:
176
  """Refresh the dataset list with current data"""
177
- training_dataset = self.list_training_files_to_caption()
178
- return {
179
- "training_dataset": training_dataset
180
- }
 
 
 
 
 
 
181
 
182
  def show_refreshing_status(self) -> List[List[str]]:
183
  """Show a 'Refreshing...' status in the dataframe"""
@@ -318,6 +331,10 @@ class CaptionTab(BaseTab):
318
 
319
  def list_training_files_to_caption(self) -> List[List[str]]:
320
  """List all clips and images - both pending and captioned"""
 
 
 
 
321
  files = []
322
  already_listed = {}
323
 
 
11
  import mimetypes
12
 
13
  from vms.utils import BaseTab, is_image_file, is_video_file, copy_files_to_training_dir
14
+ from vms.config import DEFAULT_CAPTIONING_BOT_INSTRUCTIONS, DEFAULT_PROMPT_PREFIX, STAGING_PATH, TRAINING_VIDEOS_PATH, USE_LARGE_DATASET
15
 
16
  logger = logging.getLogger(__name__)
17
 
 
28
  """Create the Caption tab UI components"""
29
  with gr.TabItem(self.title, id=self.id) as tab:
30
  with gr.Row():
31
+ if USE_LARGE_DATASET:
32
+ self.components["caption_title"] = gr.Markdown("## Captioning (Large Dataset Mode)")
33
+ else:
34
+ self.components["caption_title"] = gr.Markdown("## Captioning of 0 files (0 bytes)")
35
 
36
  with gr.Row():
37
  with gr.Column():
 
65
  interactive=False
66
  )
67
 
68
+ with gr.Row(visible=not USE_LARGE_DATASET):
69
  with gr.Column():
70
  self.components["training_dataset"] = gr.Dataframe(
71
  headers=["name", "status"],
 
98
  visible=True
99
  )
100
  self.components["original_file_path"] = gr.State(value=None)
101
+
102
+ with gr.Row(visible=USE_LARGE_DATASET):
103
+ gr.Markdown("### Large Dataset Mode Active")
104
+ gr.Markdown("Caption preview and editing is disabled to improve performance with large datasets.")
105
 
106
  return tab
107
 
 
181
 
182
  def refresh(self) -> Dict[str, Any]:
183
  """Refresh the dataset list with current data"""
184
+ if USE_LARGE_DATASET:
185
+ # In large dataset mode, we don't attempt to list files
186
+ return {
187
+ "training_dataset": [["Large dataset mode enabled", "listing skipped"]]
188
+ }
189
+ else:
190
+ training_dataset = self.list_training_files_to_caption()
191
+ return {
192
+ "training_dataset": training_dataset
193
+ }
194
 
195
  def show_refreshing_status(self) -> List[List[str]]:
196
  """Show a 'Refreshing...' status in the dataframe"""
 
331
 
332
  def list_training_files_to_caption(self) -> List[List[str]]:
333
  """List all clips and images - both pending and captioned"""
334
+ # In large dataset mode, return a placeholder message instead of listing all files
335
+ if USE_LARGE_DATASET:
336
+ return [["Large dataset mode enabled", "listing skipped"]]
337
+
338
  files = []
339
  already_listed = {}
340
 
vms/ui/project/tabs/manage_tab.py CHANGED
@@ -10,7 +10,8 @@ from typing import Dict, Any, List, Optional
10
 
11
  from vms.utils import BaseTab, validate_model_repo
12
  from vms.config import (
13
- HF_API_TOKEN, VIDEOS_TO_SPLIT_PATH, STAGING_PATH
 
14
  )
15
 
16
  logger = logging.getLogger(__name__)
@@ -35,8 +36,13 @@ class ManageTab(BaseTab):
35
  self.components["download_dataset_btn"] = gr.DownloadButton(
36
  "📦 Download training dataset (.zip)",
37
  variant="secondary",
38
- size="lg"
 
39
  )
 
 
 
 
40
  self.components["download_model_btn"] = gr.DownloadButton(
41
  "🧠 Download weights (.safetensors)",
42
  variant="secondary",
 
10
 
11
  from vms.utils import BaseTab, validate_model_repo
12
  from vms.config import (
13
+ HF_API_TOKEN, VIDEOS_TO_SPLIT_PATH, STAGING_PATH, TRAINING_VIDEOS_PATH,
14
+ TRAINING_PATH, MODEL_PATH, OUTPUT_PATH, LOG_FILE_PATH, USE_LARGE_DATASET
15
  )
16
 
17
  logger = logging.getLogger(__name__)
 
36
  self.components["download_dataset_btn"] = gr.DownloadButton(
37
  "📦 Download training dataset (.zip)",
38
  variant="secondary",
39
+ size="lg",
40
+ visible=not USE_LARGE_DATASET
41
  )
42
+ # If we have a large dataset, display a message explaining why download is disabled
43
+ if USE_LARGE_DATASET:
44
+ gr.Markdown("📦 Training dataset download disabled for large datasets")
45
+
46
  self.components["download_model_btn"] = gr.DownloadButton(
47
  "🧠 Download weights (.safetensors)",
48
  variant="secondary",