t5-small-nl16-finnish / model-info.txt
aapot
Add 50k train step model
7460603
raw
history blame
26.1 kB
Variable decoder/decoder/encoder_decoder_attention/key/kernel size 4194304 shape (embed=512, layers=16, heads=8, kv=64) partition spec (None, None, 'model', None)
Variable decoder/decoder/encoder_decoder_attention/out/kernel size 4194304 shape (heads=8, layers=16, kv=64, embed=512) partition spec ('model', None, None, None)
Variable decoder/decoder/encoder_decoder_attention/query/kernel size 4194304 shape (embed=512, layers=16, heads=8, kv=64) partition spec (None, None, 'model', None)
Variable decoder/decoder/encoder_decoder_attention/value/kernel size 4194304 shape (embed=512, layers=16, heads=8, kv=64) partition spec (None, None, 'model', None)
Variable decoder/decoder/mlp/wi_0/kernel size 16777216 shape (embed=512, layers=16, mlp=2048) partition spec (None, None, 'model')
Variable decoder/decoder/mlp/wi_1/kernel size 16777216 shape (embed=512, layers=16, mlp=2048) partition spec (None, None, 'model')
Variable decoder/decoder/mlp/wo/kernel size 16777216 shape (mlp=2048, layers=16, embed=512) partition spec ('model', None, None)
Variable decoder/decoder/pre_cross_attention_layer_norm/scale size 8192 shape (embed=512, layers=16) partition spec (None, None)
Variable decoder/decoder/pre_mlp_layer_norm/scale size 8192 shape (embed=512, layers=16) partition spec (None, None)
Variable decoder/decoder/pre_self_attention_layer_norm/scale size 8192 shape (embed=512, layers=16) partition spec (None, None)
Variable decoder/decoder/relpos_bias/rel_embedding size 4096 shape (heads=8, layers=16, relpos_buckets=32) partition spec ('model', None, None)
Variable decoder/decoder/self_attention/key/kernel size 4194304 shape (embed=512, layers=16, heads=8, kv=64) partition spec (None, None, 'model', None)
Variable decoder/decoder/self_attention/out/kernel size 4194304 shape (heads=8, layers=16, kv=64, embed=512) partition spec ('model', None, None, None)
Variable decoder/decoder/self_attention/query/kernel size 4194304 shape (embed=512, layers=16, heads=8, kv=64) partition spec (None, None, 'model', None)
Variable decoder/decoder/self_attention/value/kernel size 4194304 shape (embed=512, layers=16, heads=8, kv=64) partition spec (None, None, 'model', None)
Variable decoder/decoder_norm/scale size 512 shape (embed=512) partition spec (None,)
Variable decoder/logits_dense/kernel size 16449536 shape (embed=512, vocab=32128) partition spec (None, 'model')
Variable encoder/encoder/attention/key/kernel size 4194304 shape (embed=512, layers=16, heads=8, kv=64) partition spec (None, None, 'model', None)
Variable encoder/encoder/attention/out/kernel size 4194304 shape (heads=8, layers=16, kv=64, embed=512) partition spec ('model', None, None, None)
Variable encoder/encoder/attention/query/kernel size 4194304 shape (embed=512, layers=16, heads=8, kv=64) partition spec (None, None, 'model', None)
Variable encoder/encoder/attention/value/kernel size 4194304 shape (embed=512, layers=16, heads=8, kv=64) partition spec (None, None, 'model', None)
Variable encoder/encoder/mlp/wi_0/kernel size 16777216 shape (embed=512, layers=16, mlp=2048) partition spec (None, None, 'model')
Variable encoder/encoder/mlp/wi_1/kernel size 16777216 shape (embed=512, layers=16, mlp=2048) partition spec (None, None, 'model')
Variable encoder/encoder/mlp/wo/kernel size 16777216 shape (mlp=2048, layers=16, embed=512) partition spec ('model', None, None)
Variable encoder/encoder/pre_attention_layer_norm/scale size 8192 shape (embed=512, layers=16) partition spec (None, None)
Variable encoder/encoder/pre_mlp_layer_norm/scale size 8192 shape (embed=512, layers=16) partition spec (None, None)
Variable encoder/encoder/relpos_bias/rel_embedding size 4096 shape (heads=8, layers=16, relpos_buckets=32) partition spec ('model', None, None)
Variable encoder/encoder_norm/scale size 512 shape (embed=512) partition spec (None,)
Variable token_embedder/embedding size 16449536 shape (vocab=32128, embed=512) partition spec ('model', None)
Total number of parameters: 183944192
Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/v_col size 8192 shape (16, 8, 64) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/key/kernel/v_row size 8192 shape (512, 16) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/v_col size 8192 shape (8, 16, 64) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/out/kernel/v_row size 8192 shape (16, 512) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/v_col size 8192 shape (16, 8, 64) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/query/kernel/v_row size 8192 shape (512, 16) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/v_col size 8192 shape (16, 8, 64) partition spec None
Variable param_states/decoder/decoder/encoder_decoder_attention/value/kernel/v_row size 8192 shape (512, 16) partition spec None
Variable param_states/decoder/decoder/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/mlp/wi_0/kernel/v_col size 32768 shape (16, 2048) partition spec None
Variable param_states/decoder/decoder/mlp/wi_0/kernel/v_row size 8192 shape (512, 16) partition spec None
Variable param_states/decoder/decoder/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/mlp/wi_1/kernel/v_col size 32768 shape (16, 2048) partition spec None
Variable param_states/decoder/decoder/mlp/wi_1/kernel/v_row size 8192 shape (512, 16) partition spec None
Variable param_states/decoder/decoder/mlp/wo/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/mlp/wo/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/mlp/wo/kernel/v_col size 32768 shape (2048, 16) partition spec None
Variable param_states/decoder/decoder/mlp/wo/kernel/v_row size 8192 shape (16, 512) partition spec None
Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/v size 8192 shape (embed=512, layers=16) partition spec (None, None)
Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_cross_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/v size 8192 shape (embed=512, layers=16) partition spec (None, None)
Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/v size 8192 shape (embed=512, layers=16) partition spec (None, None)
Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/pre_self_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/relpos_bias/rel_embedding/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/relpos_bias/rel_embedding/v size 4096 shape (heads=8, layers=16, relpos_buckets=32) partition spec ('model', None, None)
Variable param_states/decoder/decoder/relpos_bias/rel_embedding/v_col size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/relpos_bias/rel_embedding/v_row size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/key/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/key/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/key/kernel/v_col size 8192 shape (16, 8, 64) partition spec None
Variable param_states/decoder/decoder/self_attention/key/kernel/v_row size 8192 shape (512, 16) partition spec None
Variable param_states/decoder/decoder/self_attention/out/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/out/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/out/kernel/v_col size 8192 shape (8, 16, 64) partition spec None
Variable param_states/decoder/decoder/self_attention/out/kernel/v_row size 8192 shape (16, 512) partition spec None
Variable param_states/decoder/decoder/self_attention/query/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/query/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/query/kernel/v_col size 8192 shape (16, 8, 64) partition spec None
Variable param_states/decoder/decoder/self_attention/query/kernel/v_row size 8192 shape (512, 16) partition spec None
Variable param_states/decoder/decoder/self_attention/value/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/value/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder/self_attention/value/kernel/v_col size 8192 shape (16, 8, 64) partition spec None
Variable param_states/decoder/decoder/self_attention/value/kernel/v_row size 8192 shape (512, 16) partition spec None
Variable param_states/decoder/decoder_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder_norm/scale/v size 512 shape (embed=512) partition spec (None,)
Variable param_states/decoder/decoder_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/decoder/decoder_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/decoder/logits_dense/kernel/m size 1 shape (1,) partition spec None
Variable param_states/decoder/logits_dense/kernel/v size 1 shape (1,) partition spec None
Variable param_states/decoder/logits_dense/kernel/v_col size 32128 shape (32128,) partition spec None
Variable param_states/decoder/logits_dense/kernel/v_row size 512 shape (512,) partition spec None
Variable param_states/encoder/encoder/attention/key/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/key/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/key/kernel/v_col size 8192 shape (16, 8, 64) partition spec None
Variable param_states/encoder/encoder/attention/key/kernel/v_row size 8192 shape (512, 16) partition spec None
Variable param_states/encoder/encoder/attention/out/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/out/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/out/kernel/v_col size 8192 shape (8, 16, 64) partition spec None
Variable param_states/encoder/encoder/attention/out/kernel/v_row size 8192 shape (16, 512) partition spec None
Variable param_states/encoder/encoder/attention/query/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/query/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/query/kernel/v_col size 8192 shape (16, 8, 64) partition spec None
Variable param_states/encoder/encoder/attention/query/kernel/v_row size 8192 shape (512, 16) partition spec None
Variable param_states/encoder/encoder/attention/value/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/value/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/attention/value/kernel/v_col size 8192 shape (16, 8, 64) partition spec None
Variable param_states/encoder/encoder/attention/value/kernel/v_row size 8192 shape (512, 16) partition spec None
Variable param_states/encoder/encoder/mlp/wi_0/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/mlp/wi_0/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/mlp/wi_0/kernel/v_col size 32768 shape (16, 2048) partition spec None
Variable param_states/encoder/encoder/mlp/wi_0/kernel/v_row size 8192 shape (512, 16) partition spec None
Variable param_states/encoder/encoder/mlp/wi_1/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/mlp/wi_1/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/mlp/wi_1/kernel/v_col size 32768 shape (16, 2048) partition spec None
Variable param_states/encoder/encoder/mlp/wi_1/kernel/v_row size 8192 shape (512, 16) partition spec None
Variable param_states/encoder/encoder/mlp/wo/kernel/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/mlp/wo/kernel/v size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/mlp/wo/kernel/v_col size 32768 shape (2048, 16) partition spec None
Variable param_states/encoder/encoder/mlp/wo/kernel/v_row size 8192 shape (16, 512) partition spec None
Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/v size 8192 shape (embed=512, layers=16) partition spec (None, None)
Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/pre_attention_layer_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/v size 8192 shape (embed=512, layers=16) partition spec (None, None)
Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/pre_mlp_layer_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/relpos_bias/rel_embedding/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/relpos_bias/rel_embedding/v size 4096 shape (heads=8, layers=16, relpos_buckets=32) partition spec ('model', None, None)
Variable param_states/encoder/encoder/relpos_bias/rel_embedding/v_col size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder/relpos_bias/rel_embedding/v_row size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder_norm/scale/m size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder_norm/scale/v size 512 shape (embed=512) partition spec (None,)
Variable param_states/encoder/encoder_norm/scale/v_col size 1 shape (1,) partition spec None
Variable param_states/encoder/encoder_norm/scale/v_row size 1 shape (1,) partition spec None
Variable param_states/token_embedder/embedding/m size 1 shape (1,) partition spec None
Variable param_states/token_embedder/embedding/v size 1 shape (1,) partition spec None
Variable param_states/token_embedder/embedding/v_col size 32128 shape (32128,) partition spec None
Variable param_states/token_embedder/embedding/v_row size 512 shape (512,) partition spec None
Variable step size 1 shape () partition spec None