{ "_class_name": "AutoencoderDC", "_diffusers_version": "0.32.0.dev0", "attention_head_dim": 32, "decoder_act_fns": "silu", "decoder_block_out_channels": [ 128, 256, 512, 512, 1024, 1024 ], "decoder_block_types": [ "ResBlock", "ResBlock", "ResBlock", "EfficientViTBlock", "EfficientViTBlock", "EfficientViTBlock" ], "decoder_layers_per_block": [ 3, 3, 3, 3, 3, 3 ], "decoder_norm_types": "rms_norm", "decoder_qkv_multiscales": [ [], [], [], [ 5 ], [ 5 ], [ 5 ] ], "downsample_block_type": "Conv", "encoder_block_out_channels": [ 128, 256, 512, 512, 1024, 1024 ], "encoder_block_types": [ "ResBlock", "ResBlock", "ResBlock", "EfficientViTBlock", "EfficientViTBlock", "EfficientViTBlock" ], "encoder_layers_per_block": [ 2, 2, 2, 3, 3, 3 ], "encoder_qkv_multiscales": [ [], [], [], [ 5 ], [ 5 ], [ 5 ] ], "in_channels": 3, "latent_channels": 32, "scaling_factor": 0.41407, "upsample_block_type": "interpolate" }