|
""" |
|
Autoencoder configuration for Hugging Face Transformers. |
|
""" |
|
|
|
from transformers import PretrainedConfig |
|
from typing import List, Optional |
|
|
|
|
|
class AutoencoderConfig(PretrainedConfig): |
|
""" |
|
Configuration class for Autoencoder models. |
|
|
|
This configuration class stores the configuration of an autoencoder model. It is used to instantiate |
|
an autoencoder model according to the specified arguments, defining the model architecture. |
|
|
|
Args: |
|
input_dim (int, optional): Dimensionality of the input data. Defaults to 784. |
|
hidden_dims (List[int], optional): List of hidden layer dimensions for the encoder. |
|
The decoder will use the reverse of this list. Defaults to [512, 256, 128]. |
|
latent_dim (int, optional): Dimensionality of the latent space. Defaults to 64. |
|
activation (str, optional): Activation function to use. Options: "relu", "tanh", "sigmoid", |
|
"leaky_relu", "gelu", "swish", "silu", "elu", "prelu", "relu6", "hardtanh", |
|
"hardsigmoid", "hardswish", "mish", "softplus", "softsign", "tanhshrink", "threshold". |
|
Defaults to "relu". |
|
dropout_rate (float, optional): Dropout rate for regularization. Defaults to 0.1. |
|
use_batch_norm (bool, optional): Whether to use batch normalization. Defaults to True. |
|
tie_weights (bool, optional): Whether to tie encoder and decoder weights. Defaults to False. |
|
reconstruction_loss (str, optional): Type of reconstruction loss. Options: "mse", "bce", "l1", |
|
"huber", "smooth_l1", "kl_div", "cosine", "focal", "dice", "tversky", "ssim", "perceptual". |
|
Defaults to "mse". |
|
autoencoder_type (str, optional): Type of autoencoder architecture. Options: "classic", |
|
"variational", "beta_vae", "denoising", "sparse", "contractive", "recurrent". Defaults to "classic". |
|
beta (float, optional): Beta parameter for beta-VAE. Defaults to 1.0. |
|
temperature (float, optional): Temperature parameter for Gumbel softmax or other operations. Defaults to 1.0. |
|
noise_factor (float, optional): Noise factor for denoising autoencoders. Defaults to 0.1. |
|
rnn_type (str, optional): Type of RNN cell for recurrent autoencoders. Options: "lstm", "gru", "rnn". |
|
Defaults to "lstm". |
|
num_layers (int, optional): Number of RNN layers for recurrent autoencoders. Defaults to 2. |
|
bidirectional (bool, optional): Whether to use bidirectional RNN for encoding. Defaults to True. |
|
sequence_length (int, optional): Fixed sequence length. If None, supports variable length sequences. |
|
Defaults to None. |
|
teacher_forcing_ratio (float, optional): Ratio of teacher forcing during training for recurrent decoders. |
|
Defaults to 0.5. |
|
use_learnable_preprocessing (bool, optional): Whether to use learnable preprocessing. Defaults to False. |
|
preprocessing_type (str, optional): Type of learnable preprocessing. Options: "none", "neural_scaler", |
|
"normalizing_flow", "minmax_scaler", "robust_scaler", "yeo_johnson". Defaults to "none". |
|
preprocessing_hidden_dim (int, optional): Hidden dimension for preprocessing networks. Defaults to 64. |
|
preprocessing_num_layers (int, optional): Number of layers in preprocessing networks. Defaults to 2. |
|
learn_inverse_preprocessing (bool, optional): Whether to learn inverse preprocessing for reconstruction. |
|
Defaults to True. |
|
flow_coupling_layers (int, optional): Number of coupling layers for normalizing flows. Defaults to 4. |
|
**kwargs: Additional keyword arguments passed to the parent class. |
|
""" |
|
|
|
model_type = "autoencoder" |
|
|
|
def __init__( |
|
self, |
|
input_dim: int = 784, |
|
hidden_dims: List[int] = None, |
|
latent_dim: int = 64, |
|
activation: str = "relu", |
|
dropout_rate: float = 0.1, |
|
use_batch_norm: bool = True, |
|
tie_weights: bool = False, |
|
reconstruction_loss: str = "mse", |
|
autoencoder_type: str = "classic", |
|
beta: float = 1.0, |
|
temperature: float = 1.0, |
|
noise_factor: float = 0.1, |
|
|
|
rnn_type: str = "lstm", |
|
num_layers: int = 2, |
|
bidirectional: bool = True, |
|
sequence_length: Optional[int] = None, |
|
teacher_forcing_ratio: float = 0.5, |
|
|
|
use_learnable_preprocessing: bool = False, |
|
preprocessing_type: str = "none", |
|
preprocessing_hidden_dim: int = 64, |
|
preprocessing_num_layers: int = 2, |
|
learn_inverse_preprocessing: bool = True, |
|
flow_coupling_layers: int = 4, |
|
**kwargs, |
|
): |
|
|
|
if hidden_dims is None: |
|
hidden_dims = [512, 256, 128] |
|
|
|
|
|
valid_activations = [ |
|
"relu", "tanh", "sigmoid", "leaky_relu", "gelu", "swish", "silu", |
|
"elu", "prelu", "relu6", "hardtanh", "hardsigmoid", "hardswish", |
|
"mish", "softplus", "softsign", "tanhshrink", "threshold" |
|
] |
|
if activation not in valid_activations: |
|
raise ValueError( |
|
f"`activation` must be one of {valid_activations}, got {activation}." |
|
) |
|
|
|
|
|
valid_losses = [ |
|
"mse", "bce", "l1", "huber", "smooth_l1", "kl_div", "cosine", |
|
"focal", "dice", "tversky", "ssim", "perceptual" |
|
] |
|
if reconstruction_loss not in valid_losses: |
|
raise ValueError( |
|
f"`reconstruction_loss` must be one of {valid_losses}, got {reconstruction_loss}." |
|
) |
|
|
|
|
|
valid_types = ["classic", "variational", "beta_vae", "denoising", "sparse", "contractive", "recurrent"] |
|
if autoencoder_type not in valid_types: |
|
raise ValueError( |
|
f"`autoencoder_type` must be one of {valid_types}, got {autoencoder_type}." |
|
) |
|
|
|
|
|
valid_rnn_types = ["lstm", "gru", "rnn"] |
|
if rnn_type not in valid_rnn_types: |
|
raise ValueError( |
|
f"`rnn_type` must be one of {valid_rnn_types}, got {rnn_type}." |
|
) |
|
|
|
if not (0.0 <= dropout_rate <= 1.0): |
|
raise ValueError(f"`dropout_rate` must be between 0.0 and 1.0, got {dropout_rate}.") |
|
|
|
if input_dim <= 0: |
|
raise ValueError(f"`input_dim` must be positive, got {input_dim}.") |
|
|
|
if latent_dim <= 0: |
|
raise ValueError(f"`latent_dim` must be positive, got {latent_dim}.") |
|
|
|
if not all(dim > 0 for dim in hidden_dims): |
|
raise ValueError("All dimensions in `hidden_dims` must be positive.") |
|
|
|
if beta <= 0: |
|
raise ValueError(f"`beta` must be positive, got {beta}.") |
|
|
|
if num_layers <= 0: |
|
raise ValueError(f"`num_layers` must be positive, got {num_layers}.") |
|
|
|
if not (0.0 <= teacher_forcing_ratio <= 1.0): |
|
raise ValueError(f"`teacher_forcing_ratio` must be between 0.0 and 1.0, got {teacher_forcing_ratio}.") |
|
|
|
if sequence_length is not None and sequence_length <= 0: |
|
raise ValueError(f"`sequence_length` must be positive when specified, got {sequence_length}.") |
|
|
|
|
|
valid_preprocessing = [ |
|
"none", |
|
"neural_scaler", |
|
"normalizing_flow", |
|
"minmax_scaler", |
|
"robust_scaler", |
|
"yeo_johnson", |
|
] |
|
if preprocessing_type not in valid_preprocessing: |
|
raise ValueError( |
|
f"`preprocessing_type` must be one of {valid_preprocessing}, got {preprocessing_type}." |
|
) |
|
|
|
if preprocessing_hidden_dim <= 0: |
|
raise ValueError(f"`preprocessing_hidden_dim` must be positive, got {preprocessing_hidden_dim}.") |
|
|
|
if preprocessing_num_layers <= 0: |
|
raise ValueError(f"`preprocessing_num_layers` must be positive, got {preprocessing_num_layers}.") |
|
|
|
if flow_coupling_layers <= 0: |
|
raise ValueError(f"`flow_coupling_layers` must be positive, got {flow_coupling_layers}.") |
|
|
|
|
|
self.input_dim = input_dim |
|
self.hidden_dims = hidden_dims |
|
self.latent_dim = latent_dim |
|
self.activation = activation |
|
self.dropout_rate = dropout_rate |
|
self.use_batch_norm = use_batch_norm |
|
self.tie_weights = tie_weights |
|
self.reconstruction_loss = reconstruction_loss |
|
self.autoencoder_type = autoencoder_type |
|
self.beta = beta |
|
self.temperature = temperature |
|
self.noise_factor = noise_factor |
|
self.rnn_type = rnn_type |
|
self.num_layers = num_layers |
|
self.bidirectional = bidirectional |
|
self.sequence_length = sequence_length |
|
self.teacher_forcing_ratio = teacher_forcing_ratio |
|
self.use_learnable_preprocessing = use_learnable_preprocessing |
|
self.preprocessing_type = preprocessing_type |
|
self.preprocessing_hidden_dim = preprocessing_hidden_dim |
|
self.preprocessing_num_layers = preprocessing_num_layers |
|
self.learn_inverse_preprocessing = learn_inverse_preprocessing |
|
self.flow_coupling_layers = flow_coupling_layers |
|
|
|
|
|
super().__init__(**kwargs) |
|
|
|
@property |
|
def decoder_dims(self) -> List[int]: |
|
"""Get decoder dimensions (reverse of encoder hidden dims).""" |
|
return list(reversed(self.hidden_dims)) |
|
|
|
@property |
|
def is_variational(self) -> bool: |
|
"""Check if this is a variational autoencoder.""" |
|
return self.autoencoder_type in ["variational", "beta_vae"] |
|
|
|
@property |
|
def is_denoising(self) -> bool: |
|
"""Check if this is a denoising autoencoder.""" |
|
return self.autoencoder_type == "denoising" |
|
|
|
@property |
|
def is_sparse(self) -> bool: |
|
"""Check if this is a sparse autoencoder.""" |
|
return self.autoencoder_type == "sparse" |
|
|
|
@property |
|
def is_contractive(self) -> bool: |
|
"""Check if this is a contractive autoencoder.""" |
|
return self.autoencoder_type == "contractive" |
|
|
|
@property |
|
def is_recurrent(self) -> bool: |
|
"""Check if this is a recurrent autoencoder.""" |
|
return self.autoencoder_type == "recurrent" |
|
|
|
@property |
|
def rnn_hidden_size(self) -> int: |
|
"""Get the RNN hidden size (same as latent_dim for recurrent AE).""" |
|
return self.latent_dim |
|
|
|
@property |
|
def rnn_output_size(self) -> int: |
|
"""Get the RNN output size considering bidirectionality.""" |
|
return self.latent_dim * (2 if self.bidirectional else 1) |
|
|
|
@property |
|
def has_preprocessing(self) -> bool: |
|
"""Check if learnable preprocessing is enabled.""" |
|
return self.use_learnable_preprocessing and self.preprocessing_type != "none" |
|
|
|
@property |
|
def is_neural_scaler(self) -> bool: |
|
"""Check if using neural scaler preprocessing.""" |
|
return self.preprocessing_type == "neural_scaler" |
|
|
|
@property |
|
def is_normalizing_flow(self) -> bool: |
|
"""Check if using normalizing flow preprocessing.""" |
|
return self.preprocessing_type == "normalizing_flow" |
|
|
|
@property |
|
def is_minmax_scaler(self) -> bool: |
|
"""Check if using learnable MinMax scaler preprocessing.""" |
|
return self.preprocessing_type == "minmax_scaler" |
|
|
|
@property |
|
def is_robust_scaler(self) -> bool: |
|
"""Check if using learnable Robust scaler preprocessing.""" |
|
return self.preprocessing_type == "robust_scaler" |
|
|
|
@property |
|
def is_yeo_johnson(self) -> bool: |
|
"""Check if using learnable Yeo-Johnson power transform preprocessing.""" |
|
return self.preprocessing_type == "yeo_johnson" |
|
|
|
def to_dict(self): |
|
""" |
|
Serializes this instance to a Python dictionary. |
|
""" |
|
output = super().to_dict() |
|
return output |
|
|