t5-large-nl36-finnish / large_nl36.gin
aapot
Add 150k train step and HF flax model
c262fb6
raw
history blame contribute delete
731 Bytes
# T5.1.1 Efficient large nl36 model.
import seqio
include 't5x/examples/t5/t5_1_1/base.gin' # imports vocab, optimizer and model.
# ------------------- Network specification overrides --------------------------
network.Transformer.config = @network.T5Config()
network.T5Config:
emb_dim = 1024
num_heads = 16
num_encoder_layers = 36
num_decoder_layers = 36
head_dim = 64
mlp_dim = 4096
# ------------------- Model specification overrides --------------------------
VOCABULARY = @seqio.SentencePieceVocabulary()
seqio.SentencePieceVocabulary.sentencepiece_model_file = "spiece.model"
MODEL = @models.EncoderDecoderModel()
models.EncoderDecoderModel:
input_vocabulary = %VOCABULARY
output_vocabulary = %VOCABULARY