caohy666 commited on
Commit
0e76ca0
·
1 Parent(s): a0ee85f

<fix> only leave lora weights loading in process_image_and_text.

Browse files
Files changed (1) hide show
  1. app.py +16 -19
app.py CHANGED
@@ -52,6 +52,9 @@ def init_basemodel():
52
  current_task = None
53
 
54
  # init models
 
 
 
55
  scheduler = diffusers.FlowMatchEulerDiscreteScheduler()
56
  vae = diffusers.AutoencoderKLHunyuanVideo.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
57
  subfolder="vae")
@@ -69,12 +72,25 @@ def init_basemodel():
69
  device = "cuda" if torch.cuda.is_available() else "cpu"
70
  weight_dtype = torch.bfloat16
71
 
 
72
  vae.requires_grad_(False).to(device, dtype=weight_dtype)
73
  text_encoder.requires_grad_(False).to(device, dtype=weight_dtype)
74
  text_encoder_2.requires_grad_(False).to(device, dtype=weight_dtype)
 
75
  vae.enable_tiling()
76
  vae.enable_slicing()
77
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
  @spaces.GPU
80
  def process_image_and_text(condition_image, target_prompt, condition_image_prompt, task, random_seed, inpainting, fill_x1, fill_x2, fill_y1, fill_y2):
@@ -83,14 +99,6 @@ def process_image_and_text(condition_image, target_prompt, condition_image_promp
83
  if pipe is None or current_task != task:
84
  current_task = task
85
 
86
- # init transformer
87
- global transformer
88
- transformer = HunyuanVideoTransformer3DModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
89
- subfolder="transformer",
90
- inference_subject_driven=task in ['subject_driven'])
91
- transformer.requires_grad_(False)
92
- transformer.to("cuda" if torch.cuda.is_available() else "cpu", dtype=torch.bfloat16)
93
-
94
  # insert LoRA
95
  lora_config = LoraConfig(
96
  r=16,
@@ -164,17 +172,6 @@ def process_image_and_text(condition_image, target_prompt, condition_image_promp
164
 
165
  transformer.requires_grad_(False)
166
 
167
- pipe = HunyuanVideoImageToVideoPipeline(
168
- text_encoder=text_encoder,
169
- tokenizer=tokenizer,
170
- transformer=transformer,
171
- vae=vae,
172
- scheduler=copy.deepcopy(scheduler),
173
- text_encoder_2=text_encoder_2,
174
- tokenizer_2=tokenizer_2,
175
- image_processor=image_processor,
176
- )
177
-
178
  # start generation
179
  c_txt = None if condition_image_prompt == "" else condition_image_prompt
180
  c_img = condition_image.resize((512, 512))
 
52
  current_task = None
53
 
54
  # init models
55
+ transformer = HunyuanVideoTransformer3DModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
56
+ subfolder="transformer",
57
+ inference_subject_driven=False)
58
  scheduler = diffusers.FlowMatchEulerDiscreteScheduler()
59
  vae = diffusers.AutoencoderKLHunyuanVideo.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
60
  subfolder="vae")
 
72
  device = "cuda" if torch.cuda.is_available() else "cpu"
73
  weight_dtype = torch.bfloat16
74
 
75
+ transformer.requires_grad_(False)
76
  vae.requires_grad_(False).to(device, dtype=weight_dtype)
77
  text_encoder.requires_grad_(False).to(device, dtype=weight_dtype)
78
  text_encoder_2.requires_grad_(False).to(device, dtype=weight_dtype)
79
+ transformer.to(device, dtype=weight_dtype)
80
  vae.enable_tiling()
81
  vae.enable_slicing()
82
 
83
+ pipe = HunyuanVideoImageToVideoPipeline(
84
+ text_encoder=text_encoder,
85
+ tokenizer=tokenizer,
86
+ transformer=transformer,
87
+ vae=vae,
88
+ scheduler=copy.deepcopy(scheduler),
89
+ text_encoder_2=text_encoder_2,
90
+ tokenizer_2=tokenizer_2,
91
+ image_processor=image_processor,
92
+ )
93
+
94
 
95
  @spaces.GPU
96
  def process_image_and_text(condition_image, target_prompt, condition_image_prompt, task, random_seed, inpainting, fill_x1, fill_x2, fill_y1, fill_y2):
 
99
  if pipe is None or current_task != task:
100
  current_task = task
101
 
 
 
 
 
 
 
 
 
102
  # insert LoRA
103
  lora_config = LoraConfig(
104
  r=16,
 
172
 
173
  transformer.requires_grad_(False)
174
 
 
 
 
 
 
 
 
 
 
 
 
175
  # start generation
176
  c_txt = None if condition_image_prompt == "" else condition_image_prompt
177
  c_img = condition_image.resize((512, 512))