jbilcke commited on
Commit
02e94ba
·
1 Parent(s): aa1e877
Files changed (4) hide show
  1. README.md +21 -0
  2. vms/config.py +1 -1
  3. vms/tabs/monitor_tab.py +1 -1
  4. vms/tabs/split_tab.py +1 -1
README.md CHANGED
@@ -73,6 +73,11 @@ VMS uses `Finetrainers` under the hood. In theory any model supported by Finetra
73
 
74
  In practice, a PR (pull request) will be necessary to adapt the UI a bit to accomodate for each model specificities.
75
 
 
 
 
 
 
76
  ### LTX-Video
77
 
78
  I have tested training a LTX-Video LoRA model using videos (not images), on a single A100 instance.
@@ -90,6 +95,22 @@ Do you want support for this one? Let me know in the comments!
90
 
91
  ## Limitations
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  ### One-user-per-space design
94
 
95
  Currently CMS can only support one training job at a time, anybody with access to your Gradio app will be able to upload or delete everything etc.
 
73
 
74
  In practice, a PR (pull request) will be necessary to adapt the UI a bit to accomodate for each model specificities.
75
 
76
+
77
+ ### Wan
78
+
79
+ I am currently testing Wan LoRA training!
80
+
81
  ### LTX-Video
82
 
83
  I have tested training a LTX-Video LoRA model using videos (not images), on a single A100 instance.
 
95
 
96
  ## Limitations
97
 
98
+ ### No AV1 on A100
99
+
100
+ If your dataset contains videos encoded using the AV1 codec, you might not be able to decode them (eg. during scene splitting) if your machine doesn't support hardware decoding.
101
+
102
+ Nvidia A100 don't support hardware AV1 decoding for instance.
103
+
104
+ It might be possible to convert them on server-side or use software decoding directly from Python, but I haven't looked into that yet (you can submit a PR if you have an idea).
105
+
106
+ My recommendation is to make sure your data comes in h264.
107
+
108
+ You can use FFmpeg to do this, eg:
109
+
110
+ ```bash
111
+ ffmpeg -i input_video_in_av1.mp4 -vcodec libx264 -acodec aac output_video_in_h264.mp4
112
+ ```
113
+
114
  ### One-user-per-space design
115
 
116
  Currently CMS can only support one training job at a time, anybody with access to your Gradio app will be able to upload or delete everything etc.
vms/config.py CHANGED
@@ -41,7 +41,7 @@ DEFAULT_PROMPT_PREFIX = "In the style of TOK, "
41
  # This is only use to debug things in local
42
  USE_MOCK_CAPTIONING_MODEL = parse_bool_env(os.environ.get('USE_MOCK_CAPTIONING_MODEL'))
43
 
44
- DEFAULT_CAPTIONING_BOT_INSTRUCTIONS = "Please write a full video description. Be synthetic, don't say things like ""this video features.."" etc. Instead, methodically list camera (close-up shot, medium-shot..), genre (music video, horror movie scene, video game footage, go pro footage, japanese anime, noir film, science-fiction, action movie, documentary..), characters (physical appearance, look, skin, facial features, haircut, clothing), scene (action, positions, movements), location (indoor, outdoor, place, building, country..), time and lighting (natural, golden hour, night time, LED lights, kelvin temperature etc), weather and climate (dusty, rainy, fog, haze, snowing..), era/settings."
45
 
46
  # Create directories
47
  STORAGE_PATH.mkdir(parents=True, exist_ok=True)
 
41
  # This is only use to debug things in local
42
  USE_MOCK_CAPTIONING_MODEL = parse_bool_env(os.environ.get('USE_MOCK_CAPTIONING_MODEL'))
43
 
44
+ DEFAULT_CAPTIONING_BOT_INSTRUCTIONS = "Please write a full video description. Be synthetic and methodically list camera (close-up shot, medium-shot..), genre (music video, horror movie scene, video game footage, go pro footage, japanese anime, noir film, science-fiction, action movie, documentary..), characters (physical appearance, look, skin, facial features, haircut, clothing), scene (action, positions, movements), location (indoor, outdoor, place, building, country..), time and lighting (natural, golden hour, night time, LED lights, kelvin temperature etc), weather and climate (dusty, rainy, fog, haze, snowing..), era/settings."
45
 
46
  # Create directories
47
  STORAGE_PATH.mkdir(parents=True, exist_ok=True)
vms/tabs/monitor_tab.py CHANGED
@@ -44,7 +44,7 @@ class MonitorTab(BaseTab):
44
  def __init__(self, app_state):
45
  super().__init__(app_state)
46
  self.id = "monitor_tab"
47
- self.title = "4️⃣ Monitor"
48
  self.refresh_interval = 8
49
 
50
  def create(self, parent=None) -> gr.TabItem:
 
44
  def __init__(self, app_state):
45
  super().__init__(app_state)
46
  self.id = "monitor_tab"
47
+ self.title = "5️⃣ Monitor"
48
  self.refresh_interval = 8
49
 
50
  def create(self, parent=None) -> gr.TabItem:
vms/tabs/split_tab.py CHANGED
@@ -32,7 +32,7 @@ class SplitTab(BaseTab):
32
  with gr.Column():
33
  self.components["video_list"] = gr.Dataframe(
34
  headers=["name", "status"],
35
- label="Videos to split",
36
  interactive=False,
37
  wrap=True
38
  )
 
32
  with gr.Column():
33
  self.components["video_list"] = gr.Dataframe(
34
  headers=["name", "status"],
35
+ label="Videos to split (note: Nvidia A100 cannot split videos encoded in AV1)",
36
  interactive=False,
37
  wrap=True
38
  )