caohy666 commited on
Commit
b187df6
·
1 Parent(s): 734f79a

<feat> lower RAM usage when loading models

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -59,23 +59,27 @@ def init_basemodel():
59
  transformer = HunyuanVideoTransformer3DModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
60
  subfolder="transformer",
61
  inference_subject_driven=False,
62
- low_cpu_mem_usage=True).requires_grad_(False).to(device, dtype=weight_dtype)
 
63
  torch.cuda.empty_cache()
64
  gc.collect()
65
  scheduler = diffusers.FlowMatchEulerDiscreteScheduler()
66
  vae = diffusers.AutoencoderKLHunyuanVideo.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
67
  subfolder="vae",
68
- low_cpu_mem_usage=True).requires_grad_(False).to(device, dtype=weight_dtype)
 
69
  torch.cuda.empty_cache()
70
  gc.collect()
71
  text_encoder = transformers.LlavaForConditionalGeneration.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
72
  subfolder="text_encoder",
73
- low_cpu_mem_usage=True).requires_grad_(False).to(device, dtype=weight_dtype)
 
74
  torch.cuda.empty_cache()
75
  gc.collect()
76
  text_encoder_2 = transformers.CLIPTextModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
77
  subfolder="text_encoder_2",
78
- low_cpu_mem_usage=True).requires_grad_(False).to(device, dtype=weight_dtype)
 
79
  torch.cuda.empty_cache()
80
  gc.collect()
81
  tokenizer = transformers.AutoTokenizer.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
 
59
  transformer = HunyuanVideoTransformer3DModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
60
  subfolder="transformer",
61
  inference_subject_driven=False,
62
+ low_cpu_mem_usage=True,
63
+ torch_dtype=weight_dtype).requires_grad_(False).to(device)
64
  torch.cuda.empty_cache()
65
  gc.collect()
66
  scheduler = diffusers.FlowMatchEulerDiscreteScheduler()
67
  vae = diffusers.AutoencoderKLHunyuanVideo.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
68
  subfolder="vae",
69
+ low_cpu_mem_usage=True,
70
+ torch_dtype=weight_dtype).requires_grad_(False).to(device)
71
  torch.cuda.empty_cache()
72
  gc.collect()
73
  text_encoder = transformers.LlavaForConditionalGeneration.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
74
  subfolder="text_encoder",
75
+ low_cpu_mem_usage=True,
76
+ torch_dtype=weight_dtype).requires_grad_(False).to(device)
77
  torch.cuda.empty_cache()
78
  gc.collect()
79
  text_encoder_2 = transformers.CLIPTextModel.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',
80
  subfolder="text_encoder_2",
81
+ low_cpu_mem_usage=True,
82
+ torch_dtype=weight_dtype).requires_grad_(False).to(device)
83
  torch.cuda.empty_cache()
84
  gc.collect()
85
  tokenizer = transformers.AutoTokenizer.from_pretrained('hunyuanvideo-community/HunyuanVideo-I2V',