t5-base-fa / model_config.yaml
arxyzan's picture
Update model_config.yaml
b1ad6b7
raw
history blame
475 Bytes
name: t5_text_generation
config_type: model
vocab_size: 32103
d_model: 768
d_kv: 64
d_ff: 2048
num_layers: 12
num_decoder_layers: 12
num_heads: 12
relative_attention_num_buckets: 32
relative_attention_max_distance: 128
dropout_rate: 0.1
layer_norm_epsilon: 1.0e-06
initializer_factor: 1.0
feed_forward_proj: gated-gelu
is_encoder_decoder: true
tie_word_embeddings: false
use_cache: true
pad_token_id: 0
decoder_start_token_id: 0
eos_token_id: 1
min_length: 0
max_length: 100