|
import torch |
|
import pytorch_lightning as pl |
|
|
|
from pixel_generator.mage.taming.modules.diffusionmodules.model import Encoder, Decoder |
|
from pixel_generator.mage.taming.modules.vqvae.quantize import VectorQuantizer2 as VectorQuantizer |
|
|
|
|
|
class VQModel(pl.LightningModule): |
|
def __init__(self, |
|
ddconfig, |
|
n_embed, |
|
embed_dim, |
|
ckpt_path=None, |
|
ignore_keys=[], |
|
image_key="image", |
|
colorize_nlabels=None, |
|
monitor=None, |
|
remap=None, |
|
sane_index_shape=False, |
|
): |
|
super().__init__() |
|
self.image_key = image_key |
|
self.encoder = Encoder(**ddconfig) |
|
self.decoder = Decoder(**ddconfig) |
|
self.quantize = VectorQuantizer(n_embed, embed_dim, beta=0.25, |
|
remap=remap, sane_index_shape=sane_index_shape) |
|
if ckpt_path is not None: |
|
self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys) |
|
self.image_key = image_key |
|
if colorize_nlabels is not None: |
|
assert type(colorize_nlabels)==int |
|
self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1)) |
|
if monitor is not None: |
|
self.monitor = monitor |
|
|
|
def init_from_ckpt(self, path, ignore_keys=list()): |
|
sd = torch.load(path, map_location="cpu") |
|
if "state_dict" in sd.keys(): |
|
sd = sd["state_dict"] |
|
keys = list(sd.keys()) |
|
for k in keys: |
|
for ik in ignore_keys: |
|
if k.startswith(ik): |
|
print("Deleting key {} from state_dict.".format(k)) |
|
del sd[k] |
|
print("Strict load") |
|
self.load_state_dict(sd, strict=True) |
|
print(f"Restored from {path}") |
|
|
|
def encode(self, x): |
|
h = self.encoder(x) |
|
quant, emb_loss, info = self.quantize(h) |
|
return quant, emb_loss, info |
|
|
|
def decode(self, quant): |
|
dec = self.decoder(quant) |
|
return dec |
|
|
|
def decode_code(self, code_b): |
|
quant_b = self.quantize.embed_code(code_b) |
|
dec = self.decode(quant_b) |
|
return dec |
|
|
|
def forward(self, input): |
|
quant, diff, _ = self.encode(input) |
|
dec = self.decode(quant) |
|
return dec, diff |
|
|