Spaces:
Running
on
Zero
Running
on
Zero
<fix> move to gpu earlier.
Browse files
app.py
CHANGED
@@ -52,20 +52,30 @@ def init_basemodel():
|
|
52 |
current_task = None
|
53 |
|
54 |
# init models
|
|
|
|
|
55 |
transformer = HunyuanVideoTransformer3DModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
56 |
subfolder="transformer",
|
57 |
inference_subject_driven=False,
|
58 |
-
low_cpu_mem_usage=True)
|
|
|
|
|
59 |
scheduler = diffusers.FlowMatchEulerDiscreteScheduler()
|
60 |
vae = diffusers.AutoencoderKLHunyuanVideo.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
61 |
subfolder="vae",
|
62 |
-
low_cpu_mem_usage=True)
|
|
|
|
|
63 |
text_encoder = transformers.LlavaForConditionalGeneration.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
64 |
subfolder="text_encoder",
|
65 |
-
low_cpu_mem_usage=True)
|
|
|
|
|
66 |
text_encoder_2 = transformers.CLIPTextModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
67 |
subfolder="text_encoder_2",
|
68 |
-
low_cpu_mem_usage=True)
|
|
|
|
|
69 |
tokenizer = transformers.AutoTokenizer.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
70 |
subfolder="tokenizer")
|
71 |
tokenizer_2 = transformers.CLIPTokenizer.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
@@ -73,14 +83,6 @@ def init_basemodel():
|
|
73 |
image_processor = transformers.CLIPImageProcessor.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
74 |
subfolder="image_processor")
|
75 |
|
76 |
-
device = "cuda" if torch.cuda.is_available() else "cpu"
|
77 |
-
weight_dtype = torch.bfloat16
|
78 |
-
|
79 |
-
transformer.requires_grad_(False)
|
80 |
-
vae.requires_grad_(False).to(device, dtype=weight_dtype)
|
81 |
-
text_encoder.requires_grad_(False).to(device, dtype=weight_dtype)
|
82 |
-
text_encoder_2.requires_grad_(False).to(device, dtype=weight_dtype)
|
83 |
-
transformer.to(device, dtype=weight_dtype)
|
84 |
vae.enable_tiling()
|
85 |
vae.enable_slicing()
|
86 |
|
|
|
52 |
current_task = None
|
53 |
|
54 |
# init models
|
55 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
56 |
+
weight_dtype = torch.bfloat16
|
57 |
transformer = HunyuanVideoTransformer3DModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
58 |
subfolder="transformer",
|
59 |
inference_subject_driven=False,
|
60 |
+
low_cpu_mem_usage=True).requires_grad_(False).to(device, dtype=weight_dtype)
|
61 |
+
torch.cuda.empty_cache()
|
62 |
+
gc.collect()
|
63 |
scheduler = diffusers.FlowMatchEulerDiscreteScheduler()
|
64 |
vae = diffusers.AutoencoderKLHunyuanVideo.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
65 |
subfolder="vae",
|
66 |
+
low_cpu_mem_usage=True).requires_grad_(False).to(device, dtype=weight_dtype)
|
67 |
+
torch.cuda.empty_cache()
|
68 |
+
gc.collect()
|
69 |
text_encoder = transformers.LlavaForConditionalGeneration.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
70 |
subfolder="text_encoder",
|
71 |
+
low_cpu_mem_usage=True).requires_grad_(False).to(device, dtype=weight_dtype)
|
72 |
+
torch.cuda.empty_cache()
|
73 |
+
gc.collect()
|
74 |
text_encoder_2 = transformers.CLIPTextModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
75 |
subfolder="text_encoder_2",
|
76 |
+
low_cpu_mem_usage=True).requires_grad_(False).to(device, dtype=weight_dtype)
|
77 |
+
torch.cuda.empty_cache()
|
78 |
+
gc.collect()
|
79 |
tokenizer = transformers.AutoTokenizer.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
80 |
subfolder="tokenizer")
|
81 |
tokenizer_2 = transformers.CLIPTokenizer.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
|
|
83 |
image_processor = transformers.CLIPImageProcessor.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
|
84 |
subfolder="image_processor")
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
vae.enable_tiling()
|
87 |
vae.enable_slicing()
|
88 |
|