|
import os |
|
import torch |
|
import onnx |
|
from pathlib import Path |
|
from diffusers import DiffusionPipeline, StableDiffusionPipeline |
|
import torch |
|
from utilities import load_calib_prompts |
|
from utilities import get_smoothquant_config |
|
import ammo.torch.quantization as atq |
|
import ammo.torch.opt as ato |
|
from utilities import filter_func, quantize_lvl |
|
|
|
|
|
|
|
|
|
|
|
|
|
pipeline = StableDiffusionPipeline.from_pretrained("wyyadd/sd-1.5", torch_dtype=torch.float16) |
|
|
|
pipeline.to("cuda") |
|
|
|
|
|
|
|
BATCH_SIZE = 4 |
|
cali_prompts = load_calib_prompts(batch_size=BATCH_SIZE, calib_data_path="./calibration-prompts.txt") |
|
|
|
quant_config = get_smoothquant_config(pipeline.unet, quant_level=3.0) |
|
|
|
def do_calibrate(base, calibration_prompts, **kwargs): |
|
for i_th, prompts in enumerate(calibration_prompts): |
|
print(prompts) |
|
if i_th >= kwargs["calib_size"]: |
|
return |
|
base( |
|
prompt=prompts, |
|
num_inference_steps=kwargs["n_steps"], |
|
negative_prompt=[ |
|
"normal quality, low quality, worst quality, low res, blurry, nsfw, nude" |
|
] |
|
* len(prompts), |
|
).images |
|
|
|
def calibration_loop(): |
|
do_calibrate( |
|
base=pipeline, |
|
calibration_prompts=cali_prompts, |
|
calib_size=384, |
|
n_steps=50, |
|
) |
|
|
|
|
|
quantized_model = atq.quantize(pipeline.unet, quant_config, forward_loop = calibration_loop) |
|
ato.save(quantized_model, 'base.unet15_2.int8.pt') |
|
|
|
quantize_lvl(quantized_model, quant_level=3.0) |
|
atq.disable_quantizer(quantized_model, filter_func) |
|
|
|
device1 = "cpu" |
|
quantized_model = quantized_model.to(torch.float32).to(device1) |
|
|
|
|
|
sample = torch.randn((1, 4, 128, 128), dtype=torch.float32, device=device1) |
|
timestep = torch.rand(1, dtype=torch.float32, device=device1) |
|
encoder_hidden_state = torch.randn((1, 77, 768), dtype=torch.float32, device=device1) |
|
|
|
import onnx |
|
from pathlib import Path |
|
|
|
output_path = Path('/home/tiennv/trang/Convert-_Unet_int8_Rebuild/Diffusion/onnx_unet15') |
|
output_path.mkdir(parents=True, exist_ok=True) |
|
|
|
dummy_inputs = (sample, timestep, encoder_hidden_state) |
|
|
|
onnx_output_path = output_path / "unet" / "model.onnx" |
|
onnx_output_path.parent.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
torch.onnx.export( |
|
quantized_model, |
|
dummy_inputs, |
|
str(onnx_output_path), |
|
export_params=True, |
|
opset_version=18, |
|
do_constant_folding=True, |
|
input_names=['sample', 'timestep', 'encoder_hidden_state'], |
|
output_names=['predict_noise'], |
|
dynamic_axes={ |
|
"sample": {0: "B", 2: "W", 3: 'H'}, |
|
"encoder_hidden_state": {0: "B", 1: "S", 2: 'D'}, |
|
"predict_noise": {0: 'B', 2: "W", 3: 'H'} |
|
} |
|
) |
|
|
|
|
|
unet_opt_graph = onnx.load(str(onnx_output_path)) |
|
unet_optimize_path = output_path / "unet_optimize" |
|
unet_optimize_path.mkdir(parents=True, exist_ok=True) |
|
unet_optimize_file = unet_optimize_path / "model.onnx" |
|
|
|
onnx.save_model( |
|
unet_opt_graph, |
|
str(unet_optimize_file), |
|
save_as_external_data=True, |
|
all_tensors_to_one_file=True, |
|
location="weights.pb", |
|
) |
|
|
|
|
|
|