thaicapgen-clip-wangchan / configuration_clipcap.py
Natthaphon's picture
init
be12733
from transformers import PretrainedConfig, AutoConfig
class CLIPEncoderDecoderConfig(PretrainedConfig):
model_type = "clip-encoder-decoder"
def __init__(
self,
**kwargs):
super().__init__(**kwargs)
self.encoder = AutoConfig.from_pretrained('microsoft/swinv2-large-patch4-window12to16-192to256-22kto1k-ft')
self.decoder = AutoConfig.from_pretrained('airesearch/wangchanberta-base-att-spm-uncased')
self.is_encoder_decoder = True
@classmethod
def from_encoder_decoder_configs(
cls, encoder_config: PretrainedConfig, decoder_config: PretrainedConfig, **kwargs
) -> PretrainedConfig:
r"""
Instantiate a [`VisionEncoderDecoderConfig`] (or a derived class) from a pre-trained encoder model
configuration and decoder model configuration.
Returns:
[`VisionEncoderDecoderConfig`]: An instance of a configuration object
"""
decoder_config.is_decoder = True
decoder_config.add_cross_attention = True
return cls(encoder=encoder_config.to_dict(), decoder=decoder_config.to_dict(), **kwargs)