Spaces:
Runtime error
Runtime error
File size: 3,637 Bytes
96a9519 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
from typing import *
import torch
from diffusers.models.autoencoder_tiny import AutoencoderTinyOutput
from diffusers.models.unet_2d_condition import UNet2DConditionOutput
from diffusers.models.vae import DecoderOutput
from polygraphy import cuda
from .utilities import Engine
class UNet2DConditionModelEngine:
def __init__(self, filepath: str, stream: cuda.Stream, use_cuda_graph: bool = False):
self.engine = Engine(filepath)
self.stream = stream
self.use_cuda_graph = use_cuda_graph
self.engine.load()
self.engine.activate()
def __call__(
self,
latent_model_input: torch.Tensor,
timestep: torch.Tensor,
encoder_hidden_states: torch.Tensor,
**kwargs,
) -> Any:
if timestep.dtype != torch.float32:
timestep = timestep.float()
self.engine.allocate_buffers(
shape_dict={
"sample": latent_model_input.shape,
"timestep": timestep.shape,
"encoder_hidden_states": encoder_hidden_states.shape,
"latent": latent_model_input.shape,
},
device=latent_model_input.device,
)
noise_pred = self.engine.infer(
{
"sample": latent_model_input,
"timestep": timestep,
"encoder_hidden_states": encoder_hidden_states,
},
self.stream,
use_cuda_graph=self.use_cuda_graph,
)["latent"]
return UNet2DConditionOutput(sample=noise_pred)
def to(self, *args, **kwargs):
pass
def forward(self, *args, **kwargs):
pass
class AutoencoderKLEngine:
def __init__(
self,
encoder_path: str,
decoder_path: str,
stream: cuda.Stream,
scaling_factor: int,
use_cuda_graph: bool = False,
):
self.encoder = Engine(encoder_path)
self.decoder = Engine(decoder_path)
self.stream = stream
self.vae_scale_factor = scaling_factor
self.use_cuda_graph = use_cuda_graph
self.encoder.load()
self.decoder.load()
self.encoder.activate()
self.decoder.activate()
def encode(self, images: torch.Tensor, **kwargs):
self.encoder.allocate_buffers(
shape_dict={
"images": images.shape,
"latent": (
images.shape[0],
4,
images.shape[2] // self.vae_scale_factor,
images.shape[3] // self.vae_scale_factor,
),
},
device=images.device,
)
latents = self.encoder.infer(
{"images": images},
self.stream,
use_cuda_graph=self.use_cuda_graph,
)["latent"]
return AutoencoderTinyOutput(latents=latents)
def decode(self, latent: torch.Tensor, **kwargs):
self.decoder.allocate_buffers(
shape_dict={
"latent": latent.shape,
"images": (
latent.shape[0],
3,
latent.shape[2] * self.vae_scale_factor,
latent.shape[3] * self.vae_scale_factor,
),
},
device=latent.device,
)
images = self.decoder.infer(
{"latent": latent},
self.stream,
use_cuda_graph=self.use_cuda_graph,
)["images"]
return DecoderOutput(sample=images)
def to(self, *args, **kwargs):
pass
def forward(self, *args, **kwargs):
pass
|