from transformers import PretrainedConfig class CSDConfig(PretrainedConfig): def __init__( self, vit_input_resolution: int = 224, vit_patch_size: int = 14, vit_width: int = 1024, vit_layers: int = 24, vit_heads: int = 16, vit_output_dim: int = 768, **kwargs ) -> None: super(CSDConfig, self).__init__(**kwargs) self.vit_input_resolution = vit_input_resolution self.vit_patch_size = vit_patch_size self.vit_width = vit_width self.vit_layers = vit_layers self.vit_heads = vit_heads self.vit_output_dim = vit_output_dim