name: t5_text_generation | |
config_type: model | |
vocab_size: 32103 | |
d_model: 768 | |
d_kv: 64 | |
d_ff: 2048 | |
num_layers: 12 | |
num_decoder_layers: 12 | |
num_heads: 12 | |
relative_attention_num_buckets: 32 | |
relative_attention_max_distance: 128 | |
dropout_rate: 0.1 | |
layer_norm_epsilon: 1.0e-06 | |
initializer_factor: 1.0 | |
feed_forward_proj: gated-gelu | |
is_encoder_decoder: true | |
tie_word_embeddings: false | |
use_cache: true | |
pad_token_id: 0 | |
decoder_start_token_id: 0 | |
eos_token_id: 1 | |
min_length: 0 | |
max_length: 100 | |