Spaces:
Running
on
Zero
Running
on
Zero
<fix> only leave lora weights loading in process_image_and_text.
Browse files
app.py
CHANGED
@@ -52,6 +52,9 @@ def init_basemodel():
|
|
52 |
current_task = None
|
53 |
|
54 |
# init models
|
|
|
|
|
|
|
55 |
scheduler = diffusers.FlowMatchEulerDiscreteScheduler()
|
56 |
vae = diffusers.AutoencoderKLHunyuanVideo.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
57 |
subfolder="vae")
|
@@ -69,12 +72,25 @@ def init_basemodel():
|
|
69 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
70 |
weight_dtype = torch.bfloat16
|
71 |
|
|
|
72 |
vae.requires_grad_(False).to(device, dtype=weight_dtype)
|
73 |
text_encoder.requires_grad_(False).to(device, dtype=weight_dtype)
|
74 |
text_encoder_2.requires_grad_(False).to(device, dtype=weight_dtype)
|
|
|
75 |
vae.enable_tiling()
|
76 |
vae.enable_slicing()
|
77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
@spaces.GPU
|
80 |
def process_image_and_text(condition_image, target_prompt, condition_image_prompt, task, random_seed, inpainting, fill_x1, fill_x2, fill_y1, fill_y2):
|
@@ -83,14 +99,6 @@ def process_image_and_text(condition_image, target_prompt, condition_image_promp
|
|
83 |
if pipe is None or current_task != task:
|
84 |
current_task = task
|
85 |
|
86 |
-
# init transformer
|
87 |
-
global transformer
|
88 |
-
transformer = HunyuanVideoTransformer3DModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
89 |
-
subfolder="transformer",
|
90 |
-
inference_subject_driven=task in ['subject_driven'])
|
91 |
-
transformer.requires_grad_(False)
|
92 |
-
transformer.to("cuda" if torch.cuda.is_available() else "cpu", dtype=torch.bfloat16)
|
93 |
-
|
94 |
# insert LoRA
|
95 |
lora_config = LoraConfig(
|
96 |
r=16,
|
@@ -164,17 +172,6 @@ def process_image_and_text(condition_image, target_prompt, condition_image_promp
|
|
164 |
|
165 |
transformer.requires_grad_(False)
|
166 |
|
167 |
-
pipe = HunyuanVideoImageToVideoPipeline(
|
168 |
-
text_encoder=text_encoder,
|
169 |
-
tokenizer=tokenizer,
|
170 |
-
transformer=transformer,
|
171 |
-
vae=vae,
|
172 |
-
scheduler=copy.deepcopy(scheduler),
|
173 |
-
text_encoder_2=text_encoder_2,
|
174 |
-
tokenizer_2=tokenizer_2,
|
175 |
-
image_processor=image_processor,
|
176 |
-
)
|
177 |
-
|
178 |
# start generation
|
179 |
c_txt = None if condition_image_prompt == "" else condition_image_prompt
|
180 |
c_img = condition_image.resize((512, 512))
|
|
|
52 |
current_task = None
|
53 |
|
54 |
# init models
|
55 |
+
transformer = HunyuanVideoTransformer3DModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
56 |
+
subfolder="transformer",
|
57 |
+
inference_subject_driven=False)
|
58 |
scheduler = diffusers.FlowMatchEulerDiscreteScheduler()
|
59 |
vae = diffusers.AutoencoderKLHunyuanVideo.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
60 |
subfolder="vae")
|
|
|
72 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
73 |
weight_dtype = torch.bfloat16
|
74 |
|
75 |
+
transformer.requires_grad_(False)
|
76 |
vae.requires_grad_(False).to(device, dtype=weight_dtype)
|
77 |
text_encoder.requires_grad_(False).to(device, dtype=weight_dtype)
|
78 |
text_encoder_2.requires_grad_(False).to(device, dtype=weight_dtype)
|
79 |
+
transformer.to(device, dtype=weight_dtype)
|
80 |
vae.enable_tiling()
|
81 |
vae.enable_slicing()
|
82 |
|
83 |
+
pipe = HunyuanVideoImageToVideoPipeline(
|
84 |
+
text_encoder=text_encoder,
|
85 |
+
tokenizer=tokenizer,
|
86 |
+
transformer=transformer,
|
87 |
+
vae=vae,
|
88 |
+
scheduler=copy.deepcopy(scheduler),
|
89 |
+
text_encoder_2=text_encoder_2,
|
90 |
+
tokenizer_2=tokenizer_2,
|
91 |
+
image_processor=image_processor,
|
92 |
+
)
|
93 |
+
|
94 |
|
95 |
@spaces.GPU
|
96 |
def process_image_and_text(condition_image, target_prompt, condition_image_prompt, task, random_seed, inpainting, fill_x1, fill_x2, fill_y1, fill_y2):
|
|
|
99 |
if pipe is None or current_task != task:
|
100 |
current_task = task
|
101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
102 |
# insert LoRA
|
103 |
lora_config = LoraConfig(
|
104 |
r=16,
|
|
|
172 |
|
173 |
transformer.requires_grad_(False)
|
174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
# start generation
|
176 |
c_txt = None if condition_image_prompt == "" else condition_image_prompt
|
177 |
c_img = condition_image.resize((512, 512))
|