jbilcke-hf HF Staff commited on
Commit
cb66746
·
1 Parent(s): 9846dba

finetrainers is broken, reverting

Browse files
finetrainers/patches/__init__.py CHANGED
@@ -17,12 +17,7 @@ def perform_patches_for_training(args: "BaseArgs", parallel_backend: "ParallelBa
17
  if parallel_backend.tensor_parallel_enabled:
18
  patch.patch_apply_rotary_emb_for_tp_compatibility()
19
 
20
- if args.model_name == ModelType.WAN:
21
- from .models.wan import patch
22
-
23
- patch.patch_time_text_image_embedding_forward()
24
-
25
  if args.training_type == TrainingType.LORA and len(args.layerwise_upcasting_modules) > 0:
26
- from .dependencies.peft import patch
27
 
28
  patch.patch_peft_move_adapter_to_device_of_base_layer()
 
17
  if parallel_backend.tensor_parallel_enabled:
18
  patch.patch_apply_rotary_emb_for_tp_compatibility()
19
 
 
 
 
 
 
20
  if args.training_type == TrainingType.LORA and len(args.layerwise_upcasting_modules) > 0:
21
+ from dependencies.peft import patch
22
 
23
  patch.patch_peft_move_adapter_to_device_of_base_layer()
finetrainers/patches/models/ltx_video/patch.py CHANGED
@@ -16,7 +16,7 @@ def patch_apply_rotary_emb_for_tp_compatibility() -> None:
16
 
17
 
18
  def _perform_ltx_transformer_forward_patch() -> None:
19
- LTXVideoTransformer3DModel.forward = _patched_LTXVideoTransformer3D_forward
20
 
21
 
22
  def _perform_ltx_apply_rotary_emb_tensor_parallel_compatibility_patch() -> None:
@@ -35,7 +35,7 @@ def _perform_ltx_apply_rotary_emb_tensor_parallel_compatibility_patch() -> None:
35
  diffusers.models.transformers.transformer_ltx.apply_rotary_emb = apply_rotary_emb
36
 
37
 
38
- def _patched_LTXVideoTransformer3D_forward(
39
  self,
40
  hidden_states: torch.Tensor,
41
  encoder_hidden_states: torch.Tensor,
 
16
 
17
 
18
  def _perform_ltx_transformer_forward_patch() -> None:
19
+ LTXVideoTransformer3DModel.forward = _patched_LTXVideoTransformer3Dforward
20
 
21
 
22
  def _perform_ltx_apply_rotary_emb_tensor_parallel_compatibility_patch() -> None:
 
35
  diffusers.models.transformers.transformer_ltx.apply_rotary_emb = apply_rotary_emb
36
 
37
 
38
+ def _patched_LTXVideoTransformer3Dforward(
39
  self,
40
  hidden_states: torch.Tensor,
41
  encoder_hidden_states: torch.Tensor,
finetrainers/patches/models/wan/patch.py DELETED
@@ -1,33 +0,0 @@
1
- from typing import Optional
2
-
3
- import diffusers
4
- import torch
5
-
6
-
7
- def patch_time_text_image_embedding_forward() -> None:
8
- _patch_time_text_image_embedding_forward()
9
-
10
-
11
- def _patch_time_text_image_embedding_forward() -> None:
12
- diffusers.models.transformers.transformer_wan.WanTimeTextImageEmbedding.forward = (
13
- _patched_WanTimeTextImageEmbedding_forward
14
- )
15
-
16
-
17
- def _patched_WanTimeTextImageEmbedding_forward(
18
- self,
19
- timestep: torch.Tensor,
20
- encoder_hidden_states: torch.Tensor,
21
- encoder_hidden_states_image: Optional[torch.Tensor] = None,
22
- ):
23
- # Some code has been removed compared to original implementation in Diffusers
24
- # Also, timestep is typed as that of encoder_hidden_states
25
- timestep = self.timesteps_proj(timestep).type_as(encoder_hidden_states)
26
- temb = self.time_embedder(timestep).type_as(encoder_hidden_states)
27
- timestep_proj = self.time_proj(self.act_fn(temb))
28
-
29
- encoder_hidden_states = self.text_embedder(encoder_hidden_states)
30
- if encoder_hidden_states_image is not None:
31
- encoder_hidden_states_image = self.image_embedder(encoder_hidden_states_image)
32
-
33
- return temb, timestep_proj, encoder_hidden_states, encoder_hidden_states_image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
finetrainers/trainer/sft_trainer/trainer.py CHANGED
@@ -334,7 +334,6 @@ class SFTTrainer:
334
  parallel_backend = self.state.parallel_backend
335
  train_state = self.state.train_state
336
  device = parallel_backend.device
337
- dtype = self.args.transformer_dtype
338
 
339
  memory_statistics = utils.get_memory_statistics()
340
  logger.info(f"Memory before training start: {json.dumps(memory_statistics, indent=4)}")
@@ -448,8 +447,8 @@ class SFTTrainer:
448
 
449
  logger.debug(f"Starting training step ({train_state.step}/{self.args.train_steps})")
450
 
451
- latent_model_conditions = utils.align_device_and_dtype(latent_model_conditions, device, dtype)
452
- condition_model_conditions = utils.align_device_and_dtype(condition_model_conditions, device, dtype)
453
  latent_model_conditions = utils.make_contiguous(latent_model_conditions)
454
  condition_model_conditions = utils.make_contiguous(condition_model_conditions)
455
 
 
334
  parallel_backend = self.state.parallel_backend
335
  train_state = self.state.train_state
336
  device = parallel_backend.device
 
337
 
338
  memory_statistics = utils.get_memory_statistics()
339
  logger.info(f"Memory before training start: {json.dumps(memory_statistics, indent=4)}")
 
447
 
448
  logger.debug(f"Starting training step ({train_state.step}/{self.args.train_steps})")
449
 
450
+ utils.align_device_and_dtype(latent_model_conditions, device, self.args.transformer_dtype)
451
+ utils.align_device_and_dtype(condition_model_conditions, device, self.args.transformer_dtype)
452
  latent_model_conditions = utils.make_contiguous(latent_model_conditions)
453
  condition_model_conditions = utils.make_contiguous(condition_model_conditions)
454
 
requirements.txt CHANGED
@@ -19,7 +19,8 @@ eva-decord==0.6.1
19
  wandb
20
  pandas
21
  sentencepiece>=0.2.0
22
- imageio-ffmpeg>=0.5.1
 
23
  torchdata==0.11.0
24
 
25
  flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
 
19
  wandb
20
  pandas
21
  sentencepiece>=0.2.0
22
+ imageio
23
+ imageio-ffmpeg
24
  torchdata==0.11.0
25
 
26
  flash-attn @ https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
requirements_without_flash_attention.txt CHANGED
@@ -20,7 +20,8 @@ eva-decord==0.6.1
20
  wandb
21
  pandas
22
  sentencepiece>=0.2.0
23
- imageio-ffmpeg>=0.5.1
 
24
  torchdata==0.11.0
25
 
26
  # for youtube video download
 
20
  wandb
21
  pandas
22
  sentencepiece>=0.2.0
23
+ imageio
24
+ imageio-ffmpeg
25
  torchdata==0.11.0
26
 
27
  # for youtube video download
vms/ui/project/tabs/preview_tab.py CHANGED
@@ -33,6 +33,25 @@ class PreviewTab(BaseTab):
33
 
34
  with gr.Row():
35
  with gr.Column(scale=2):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  self.components["prompt"] = gr.Textbox(
37
  label="Prompt",
38
  placeholder="Enter your prompt here...",
@@ -82,25 +101,7 @@ class PreviewTab(BaseTab):
82
  choices=self.get_model_version_choices(default_model),
83
  value=self.get_default_model_version(default_model)
84
  )
85
-
86
- # Add dropdown to choose between LoRA and original model
87
- has_lora = self.check_lora_model_exists()
88
- lora_choices = []
89
- default_lora_choice = ""
90
-
91
- if has_lora:
92
- lora_choices = ["Use LoRA model", "Use original model"]
93
- default_lora_choice = "Use LoRA model"
94
- else:
95
- lora_choices = ["Cannot find LoRA model", "Use original model"]
96
- default_lora_choice = "Use original model"
97
 
98
- self.components["use_lora"] = gr.Dropdown(
99
- choices=lora_choices,
100
- label="Model Selection",
101
- value=default_lora_choice
102
- )
103
-
104
  # Add image input for image-to-video models
105
  self.components["conditioning_image"] = gr.Image(
106
  label="Conditioning Image (for Image-to-Video models)",
 
33
 
34
  with gr.Row():
35
  with gr.Column(scale=2):
36
+
37
+ # Add dropdown to choose between LoRA and original model
38
+ has_lora = self.check_lora_model_exists()
39
+ lora_choices = []
40
+ default_lora_choice = ""
41
+
42
+ if has_lora:
43
+ lora_choices = ["Use LoRA model", "Use original model"]
44
+ default_lora_choice = "Use LoRA model"
45
+ else:
46
+ lora_choices = ["Cannot find LoRA model", "Use original model"]
47
+ default_lora_choice = "Use original model"
48
+
49
+ self.components["use_lora"] = gr.Dropdown(
50
+ choices=lora_choices,
51
+ label="Model Selection",
52
+ value=default_lora_choice
53
+ )
54
+
55
  self.components["prompt"] = gr.Textbox(
56
  label="Prompt",
57
  placeholder="Enter your prompt here...",
 
101
  choices=self.get_model_version_choices(default_model),
102
  value=self.get_default_model_version(default_model)
103
  )
 
 
 
 
 
 
 
 
 
 
 
 
104
 
 
 
 
 
 
 
105
  # Add image input for image-to-video models
106
  self.components["conditioning_image"] = gr.Image(
107
  label="Conditioning Image (for Image-to-Video models)",