Variable decoder/decoder_norm/scale                                                       size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_0/encoder_decoder_attention/key/kernel                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_0/encoder_decoder_attention/out/kernel                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable decoder/layers_0/encoder_decoder_attention/query/kernel                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_0/encoder_decoder_attention/value/kernel                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_0/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable decoder/layers_0/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable decoder/layers_0/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable decoder/layers_0/pre_cross_attention_layer_norm/scale                            size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_0/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_0/pre_self_attention_layer_norm/scale                             size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_0/self_attention/key/kernel                                       size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_0/self_attention/out/kernel                                       size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable decoder/layers_0/self_attention/query/kernel                                     size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_0/self_attention/value/kernel                                     size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_1/encoder_decoder_attention/key/kernel                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_1/encoder_decoder_attention/out/kernel                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable decoder/layers_1/encoder_decoder_attention/query/kernel                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_1/encoder_decoder_attention/value/kernel                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_1/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable decoder/layers_1/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable decoder/layers_1/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable decoder/layers_1/pre_cross_attention_layer_norm/scale                            size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_1/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_1/pre_self_attention_layer_norm/scale                             size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_1/self_attention/key/kernel                                       size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_1/self_attention/out/kernel                                       size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable decoder/layers_1/self_attention/query/kernel                                     size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_1/self_attention/value/kernel                                     size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_2/encoder_decoder_attention/key/kernel                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_2/encoder_decoder_attention/out/kernel                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable decoder/layers_2/encoder_decoder_attention/query/kernel                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_2/encoder_decoder_attention/value/kernel                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_2/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable decoder/layers_2/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable decoder/layers_2/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable decoder/layers_2/pre_cross_attention_layer_norm/scale                            size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_2/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_2/pre_self_attention_layer_norm/scale                             size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_2/self_attention/key/kernel                                       size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_2/self_attention/out/kernel                                       size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable decoder/layers_2/self_attention/query/kernel                                     size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_2/self_attention/value/kernel                                     size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_3/encoder_decoder_attention/key/kernel                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_3/encoder_decoder_attention/out/kernel                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable decoder/layers_3/encoder_decoder_attention/query/kernel                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_3/encoder_decoder_attention/value/kernel                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_3/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable decoder/layers_3/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable decoder/layers_3/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable decoder/layers_3/pre_cross_attention_layer_norm/scale                            size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_3/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_3/pre_self_attention_layer_norm/scale                             size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_3/self_attention/key/kernel                                       size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_3/self_attention/out/kernel                                       size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable decoder/layers_3/self_attention/query/kernel                                     size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_3/self_attention/value/kernel                                     size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_4/encoder_decoder_attention/key/kernel                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_4/encoder_decoder_attention/out/kernel                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable decoder/layers_4/encoder_decoder_attention/query/kernel                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_4/encoder_decoder_attention/value/kernel                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_4/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable decoder/layers_4/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable decoder/layers_4/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable decoder/layers_4/pre_cross_attention_layer_norm/scale                            size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_4/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_4/pre_self_attention_layer_norm/scale                             size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_4/self_attention/key/kernel                                       size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_4/self_attention/out/kernel                                       size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable decoder/layers_4/self_attention/query/kernel                                     size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_4/self_attention/value/kernel                                     size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_5/encoder_decoder_attention/key/kernel                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_5/encoder_decoder_attention/out/kernel                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable decoder/layers_5/encoder_decoder_attention/query/kernel                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_5/encoder_decoder_attention/value/kernel                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_5/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable decoder/layers_5/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable decoder/layers_5/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable decoder/layers_5/pre_cross_attention_layer_norm/scale                            size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_5/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_5/pre_self_attention_layer_norm/scale                             size 1536         shape (embed=1536)                             partition spec (None,)
Variable decoder/layers_5/self_attention/key/kernel                                       size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_5/self_attention/out/kernel                                       size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable decoder/layers_5/self_attention/query/kernel                                     size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/layers_5/self_attention/value/kernel                                     size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable decoder/logits_dense/kernel                                                      size 589824       shape (embed=1536, vocab=384)                  partition spec (None, 'model')
Variable decoder/relpos_bias/rel_embedding                                                size 384          shape (heads=12, relpos_buckets=32)            partition spec ('model', None)
Variable encoder/encoder_norm/scale                                                       size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_0/attention/key/kernel                                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_0/attention/out/kernel                                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_0/attention/query/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_0/attention/value/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_0/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_0/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_0/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_0/pre_attention_layer_norm/scale                                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_0/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_1/attention/key/kernel                                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_1/attention/out/kernel                                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_1/attention/query/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_1/attention/value/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_1/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_1/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_1/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_1/pre_attention_layer_norm/scale                                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_1/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_10/attention/key/kernel                                           size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_10/attention/out/kernel                                           size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_10/attention/query/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_10/attention/value/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_10/mlp/wi_0/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_10/mlp/wi_1/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_10/mlp/wo/kernel                                                  size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_10/pre_attention_layer_norm/scale                                 size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_10/pre_mlp_layer_norm/scale                                       size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_11/attention/key/kernel                                           size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_11/attention/out/kernel                                           size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_11/attention/query/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_11/attention/value/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_11/mlp/wi_0/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_11/mlp/wi_1/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_11/mlp/wo/kernel                                                  size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_11/pre_attention_layer_norm/scale                                 size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_11/pre_mlp_layer_norm/scale                                       size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_12/attention/key/kernel                                           size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_12/attention/out/kernel                                           size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_12/attention/query/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_12/attention/value/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_12/mlp/wi_0/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_12/mlp/wi_1/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_12/mlp/wo/kernel                                                  size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_12/pre_attention_layer_norm/scale                                 size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_12/pre_mlp_layer_norm/scale                                       size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_13/attention/key/kernel                                           size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_13/attention/out/kernel                                           size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_13/attention/query/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_13/attention/value/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_13/mlp/wi_0/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_13/mlp/wi_1/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_13/mlp/wo/kernel                                                  size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_13/pre_attention_layer_norm/scale                                 size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_13/pre_mlp_layer_norm/scale                                       size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_14/attention/key/kernel                                           size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_14/attention/out/kernel                                           size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_14/attention/query/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_14/attention/value/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_14/mlp/wi_0/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_14/mlp/wi_1/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_14/mlp/wo/kernel                                                  size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_14/pre_attention_layer_norm/scale                                 size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_14/pre_mlp_layer_norm/scale                                       size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_15/attention/key/kernel                                           size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_15/attention/out/kernel                                           size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_15/attention/query/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_15/attention/value/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_15/mlp/wi_0/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_15/mlp/wi_1/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_15/mlp/wo/kernel                                                  size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_15/pre_attention_layer_norm/scale                                 size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_15/pre_mlp_layer_norm/scale                                       size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_16/attention/key/kernel                                           size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_16/attention/out/kernel                                           size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_16/attention/query/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_16/attention/value/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_16/mlp/wi_0/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_16/mlp/wi_1/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_16/mlp/wo/kernel                                                  size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_16/pre_attention_layer_norm/scale                                 size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_16/pre_mlp_layer_norm/scale                                       size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_17/attention/key/kernel                                           size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_17/attention/out/kernel                                           size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_17/attention/query/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_17/attention/value/kernel                                         size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_17/mlp/wi_0/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_17/mlp/wi_1/kernel                                                size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_17/mlp/wo/kernel                                                  size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_17/pre_attention_layer_norm/scale                                 size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_17/pre_mlp_layer_norm/scale                                       size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_2/attention/key/kernel                                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_2/attention/out/kernel                                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_2/attention/query/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_2/attention/value/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_2/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_2/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_2/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_2/pre_attention_layer_norm/scale                                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_2/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_3/attention/key/kernel                                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_3/attention/out/kernel                                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_3/attention/query/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_3/attention/value/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_3/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_3/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_3/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_3/pre_attention_layer_norm/scale                                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_3/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_4/attention/key/kernel                                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_4/attention/out/kernel                                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_4/attention/query/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_4/attention/value/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_4/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_4/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_4/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_4/pre_attention_layer_norm/scale                                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_4/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_5/attention/key/kernel                                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_5/attention/out/kernel                                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_5/attention/query/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_5/attention/value/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_5/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_5/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_5/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_5/pre_attention_layer_norm/scale                                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_5/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_6/attention/key/kernel                                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_6/attention/out/kernel                                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_6/attention/query/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_6/attention/value/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_6/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_6/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_6/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_6/pre_attention_layer_norm/scale                                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_6/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_7/attention/key/kernel                                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_7/attention/out/kernel                                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_7/attention/query/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_7/attention/value/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_7/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_7/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_7/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_7/pre_attention_layer_norm/scale                                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_7/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_8/attention/key/kernel                                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_8/attention/out/kernel                                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_8/attention/query/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_8/attention/value/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_8/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_8/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_8/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_8/pre_attention_layer_norm/scale                                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_8/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_9/attention/key/kernel                                            size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_9/attention/out/kernel                                            size 1179648      shape (joined_kv=768, embed=1536)              partition spec ('model', None)
Variable encoder/layers_9/attention/query/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_9/attention/value/kernel                                          size 1179648      shape (embed=1536, joined_kv=768)              partition spec (None, 'model')
Variable encoder/layers_9/mlp/wi_0/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_9/mlp/wi_1/kernel                                                 size 6094848      shape (embed=1536, mlp=3968)                   partition spec (None, 'model')
Variable encoder/layers_9/mlp/wo/kernel                                                   size 6094848      shape (mlp=3968, embed=1536)                   partition spec ('model', None)
Variable encoder/layers_9/pre_attention_layer_norm/scale                                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/layers_9/pre_mlp_layer_norm/scale                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable encoder/relpos_bias/rel_embedding                                                size 384          shape (heads=12, relpos_buckets=32)            partition spec ('model', None)
Variable token_embedder/embedding                                                         size 589824       shape (vocab=384, embed=1536)                  partition spec ('model', None)
Total number of parameters: 581653248

Variable param_states/decoder/decoder_norm/scale/m                                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder_norm/scale/v                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/decoder_norm/scale/v_col                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/decoder_norm/scale/v_row                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/key/kernel/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/key/kernel/v             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/key/kernel/v_col         size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/key/kernel/v_row         size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/out/kernel/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/out/kernel/v             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/out/kernel/v_col         size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/out/kernel/v_row         size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/query/kernel/m           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/query/kernel/v           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/query/kernel/v_col       size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/query/kernel/v_row       size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/value/kernel/m           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/value/kernel/v           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/value/kernel/v_col       size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_0/encoder_decoder_attention/value/kernel/v_row       size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_0/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_0/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_0/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_0/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_0/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_0/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_0/pre_cross_attention_layer_norm/scale/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/pre_cross_attention_layer_norm/scale/v             size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_0/pre_cross_attention_layer_norm/scale/v_col         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/pre_cross_attention_layer_norm/scale/v_row         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_0/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/pre_self_attention_layer_norm/scale/m              size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/pre_self_attention_layer_norm/scale/v              size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_0/pre_self_attention_layer_norm/scale/v_col          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/pre_self_attention_layer_norm/scale/v_row          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/self_attention/key/kernel/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/self_attention/key/kernel/v                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/self_attention/key/kernel/v_col                    size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_0/self_attention/key/kernel/v_row                    size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_0/self_attention/out/kernel/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/self_attention/out/kernel/v                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/self_attention/out/kernel/v_col                    size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_0/self_attention/out/kernel/v_row                    size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_0/self_attention/query/kernel/m                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/self_attention/query/kernel/v                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/self_attention/query/kernel/v_col                  size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_0/self_attention/query/kernel/v_row                  size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_0/self_attention/value/kernel/m                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/self_attention/value/kernel/v                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_0/self_attention/value/kernel/v_col                  size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_0/self_attention/value/kernel/v_row                  size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/key/kernel/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/key/kernel/v             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/key/kernel/v_col         size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/key/kernel/v_row         size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/out/kernel/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/out/kernel/v             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/out/kernel/v_col         size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/out/kernel/v_row         size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/query/kernel/m           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/query/kernel/v           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/query/kernel/v_col       size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/query/kernel/v_row       size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/value/kernel/m           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/value/kernel/v           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/value/kernel/v_col       size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_1/encoder_decoder_attention/value/kernel/v_row       size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_1/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_1/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_1/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_1/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_1/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_1/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_1/pre_cross_attention_layer_norm/scale/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/pre_cross_attention_layer_norm/scale/v             size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_1/pre_cross_attention_layer_norm/scale/v_col         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/pre_cross_attention_layer_norm/scale/v_row         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_1/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/pre_self_attention_layer_norm/scale/m              size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/pre_self_attention_layer_norm/scale/v              size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_1/pre_self_attention_layer_norm/scale/v_col          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/pre_self_attention_layer_norm/scale/v_row          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/self_attention/key/kernel/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/self_attention/key/kernel/v                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/self_attention/key/kernel/v_col                    size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_1/self_attention/key/kernel/v_row                    size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_1/self_attention/out/kernel/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/self_attention/out/kernel/v                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/self_attention/out/kernel/v_col                    size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_1/self_attention/out/kernel/v_row                    size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_1/self_attention/query/kernel/m                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/self_attention/query/kernel/v                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/self_attention/query/kernel/v_col                  size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_1/self_attention/query/kernel/v_row                  size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_1/self_attention/value/kernel/m                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/self_attention/value/kernel/v                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_1/self_attention/value/kernel/v_col                  size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_1/self_attention/value/kernel/v_row                  size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/key/kernel/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/key/kernel/v             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/key/kernel/v_col         size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/key/kernel/v_row         size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/out/kernel/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/out/kernel/v             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/out/kernel/v_col         size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/out/kernel/v_row         size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/query/kernel/m           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/query/kernel/v           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/query/kernel/v_col       size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/query/kernel/v_row       size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/value/kernel/m           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/value/kernel/v           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/value/kernel/v_col       size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_2/encoder_decoder_attention/value/kernel/v_row       size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_2/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_2/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_2/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_2/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_2/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_2/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_2/pre_cross_attention_layer_norm/scale/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/pre_cross_attention_layer_norm/scale/v             size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_2/pre_cross_attention_layer_norm/scale/v_col         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/pre_cross_attention_layer_norm/scale/v_row         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_2/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/pre_self_attention_layer_norm/scale/m              size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/pre_self_attention_layer_norm/scale/v              size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_2/pre_self_attention_layer_norm/scale/v_col          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/pre_self_attention_layer_norm/scale/v_row          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/self_attention/key/kernel/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/self_attention/key/kernel/v                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/self_attention/key/kernel/v_col                    size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_2/self_attention/key/kernel/v_row                    size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_2/self_attention/out/kernel/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/self_attention/out/kernel/v                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/self_attention/out/kernel/v_col                    size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_2/self_attention/out/kernel/v_row                    size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_2/self_attention/query/kernel/m                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/self_attention/query/kernel/v                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/self_attention/query/kernel/v_col                  size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_2/self_attention/query/kernel/v_row                  size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_2/self_attention/value/kernel/m                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/self_attention/value/kernel/v                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_2/self_attention/value/kernel/v_col                  size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_2/self_attention/value/kernel/v_row                  size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/key/kernel/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/key/kernel/v             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/key/kernel/v_col         size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/key/kernel/v_row         size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/out/kernel/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/out/kernel/v             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/out/kernel/v_col         size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/out/kernel/v_row         size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/query/kernel/m           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/query/kernel/v           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/query/kernel/v_col       size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/query/kernel/v_row       size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/value/kernel/m           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/value/kernel/v           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/value/kernel/v_col       size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_3/encoder_decoder_attention/value/kernel/v_row       size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_3/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_3/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_3/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_3/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_3/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_3/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_3/pre_cross_attention_layer_norm/scale/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/pre_cross_attention_layer_norm/scale/v             size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_3/pre_cross_attention_layer_norm/scale/v_col         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/pre_cross_attention_layer_norm/scale/v_row         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_3/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/pre_self_attention_layer_norm/scale/m              size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/pre_self_attention_layer_norm/scale/v              size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_3/pre_self_attention_layer_norm/scale/v_col          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/pre_self_attention_layer_norm/scale/v_row          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/self_attention/key/kernel/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/self_attention/key/kernel/v                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/self_attention/key/kernel/v_col                    size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_3/self_attention/key/kernel/v_row                    size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_3/self_attention/out/kernel/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/self_attention/out/kernel/v                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/self_attention/out/kernel/v_col                    size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_3/self_attention/out/kernel/v_row                    size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_3/self_attention/query/kernel/m                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/self_attention/query/kernel/v                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/self_attention/query/kernel/v_col                  size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_3/self_attention/query/kernel/v_row                  size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_3/self_attention/value/kernel/m                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/self_attention/value/kernel/v                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_3/self_attention/value/kernel/v_col                  size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_3/self_attention/value/kernel/v_row                  size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/key/kernel/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/key/kernel/v             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/key/kernel/v_col         size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/key/kernel/v_row         size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/out/kernel/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/out/kernel/v             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/out/kernel/v_col         size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/out/kernel/v_row         size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/query/kernel/m           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/query/kernel/v           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/query/kernel/v_col       size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/query/kernel/v_row       size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/value/kernel/m           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/value/kernel/v           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/value/kernel/v_col       size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_4/encoder_decoder_attention/value/kernel/v_row       size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_4/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_4/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_4/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_4/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_4/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_4/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_4/pre_cross_attention_layer_norm/scale/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/pre_cross_attention_layer_norm/scale/v             size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_4/pre_cross_attention_layer_norm/scale/v_col         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/pre_cross_attention_layer_norm/scale/v_row         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_4/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/pre_self_attention_layer_norm/scale/m              size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/pre_self_attention_layer_norm/scale/v              size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_4/pre_self_attention_layer_norm/scale/v_col          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/pre_self_attention_layer_norm/scale/v_row          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/self_attention/key/kernel/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/self_attention/key/kernel/v                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/self_attention/key/kernel/v_col                    size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_4/self_attention/key/kernel/v_row                    size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_4/self_attention/out/kernel/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/self_attention/out/kernel/v                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/self_attention/out/kernel/v_col                    size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_4/self_attention/out/kernel/v_row                    size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_4/self_attention/query/kernel/m                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/self_attention/query/kernel/v                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/self_attention/query/kernel/v_col                  size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_4/self_attention/query/kernel/v_row                  size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_4/self_attention/value/kernel/m                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/self_attention/value/kernel/v                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_4/self_attention/value/kernel/v_col                  size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_4/self_attention/value/kernel/v_row                  size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/key/kernel/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/key/kernel/v             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/key/kernel/v_col         size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/key/kernel/v_row         size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/out/kernel/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/out/kernel/v             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/out/kernel/v_col         size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/out/kernel/v_row         size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/query/kernel/m           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/query/kernel/v           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/query/kernel/v_col       size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/query/kernel/v_row       size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/value/kernel/m           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/value/kernel/v           size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/value/kernel/v_col       size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_5/encoder_decoder_attention/value/kernel/v_row       size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_5/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_5/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_5/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_5/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_5/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/decoder/layers_5/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_5/pre_cross_attention_layer_norm/scale/m             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/pre_cross_attention_layer_norm/scale/v             size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_5/pre_cross_attention_layer_norm/scale/v_col         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/pre_cross_attention_layer_norm/scale/v_row         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_5/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/pre_self_attention_layer_norm/scale/m              size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/pre_self_attention_layer_norm/scale/v              size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/decoder/layers_5/pre_self_attention_layer_norm/scale/v_col          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/pre_self_attention_layer_norm/scale/v_row          size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/self_attention/key/kernel/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/self_attention/key/kernel/v                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/self_attention/key/kernel/v_col                    size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_5/self_attention/key/kernel/v_row                    size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_5/self_attention/out/kernel/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/self_attention/out/kernel/v                        size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/self_attention/out/kernel/v_col                    size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_5/self_attention/out/kernel/v_row                    size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_5/self_attention/query/kernel/m                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/self_attention/query/kernel/v                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/self_attention/query/kernel/v_col                  size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_5/self_attention/query/kernel/v_row                  size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/layers_5/self_attention/value/kernel/m                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/self_attention/value/kernel/v                      size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/layers_5/self_attention/value/kernel/v_col                  size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/layers_5/self_attention/value/kernel/v_row                  size 768          shape (768,)                                   partition spec None
Variable param_states/decoder/logits_dense/kernel/m                                       size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/logits_dense/kernel/v                                       size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/logits_dense/kernel/v_col                                   size 1536         shape (1536,)                                  partition spec None
Variable param_states/decoder/logits_dense/kernel/v_row                                   size 384          shape (384,)                                   partition spec None
Variable param_states/decoder/relpos_bias/rel_embedding/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/relpos_bias/rel_embedding/v                                 size 384          shape (heads=12, relpos_buckets=32)            partition spec ('model', None)
Variable param_states/decoder/relpos_bias/rel_embedding/v_col                             size 1            shape (1,)                                     partition spec None
Variable param_states/decoder/relpos_bias/rel_embedding/v_row                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder_norm/scale/m                                        size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder_norm/scale/v                                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/encoder_norm/scale/v_col                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/encoder_norm/scale/v_row                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/attention/key/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/attention/key/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/attention/key/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_0/attention/key/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_0/attention/out/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/attention/out/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/attention/out/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_0/attention/out/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_0/attention/query/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/attention/query/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/attention/query/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_0/attention/query/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_0/attention/value/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/attention/value/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/attention/value/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_0/attention/value/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_0/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_0/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_0/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_0/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_0/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_0/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_0/pre_attention_layer_norm/scale/m                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/pre_attention_layer_norm/scale/v                   size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_0/pre_attention_layer_norm/scale/v_col               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/pre_attention_layer_norm/scale/v_row               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_0/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_0/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/attention/key/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/attention/key/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/attention/key/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_1/attention/key/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_1/attention/out/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/attention/out/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/attention/out/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_1/attention/out/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_1/attention/query/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/attention/query/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/attention/query/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_1/attention/query/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_1/attention/value/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/attention/value/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/attention/value/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_1/attention/value/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_1/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_1/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_1/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_1/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_1/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_1/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_1/pre_attention_layer_norm/scale/m                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/pre_attention_layer_norm/scale/v                   size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_1/pre_attention_layer_norm/scale/v_col               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/pre_attention_layer_norm/scale/v_row               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_1/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_1/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/attention/key/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/attention/key/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/attention/key/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_10/attention/key/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_10/attention/out/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/attention/out/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/attention/out/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_10/attention/out/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_10/attention/query/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/attention/query/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/attention/query/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_10/attention/query/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_10/attention/value/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/attention/value/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/attention/value/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_10/attention/value/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_10/mlp/wi_0/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/mlp/wi_0/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/mlp/wi_0/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_10/mlp/wi_0/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_10/mlp/wi_1/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/mlp/wi_1/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/mlp/wi_1/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_10/mlp/wi_1/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_10/mlp/wo/kernel/m                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/mlp/wo/kernel/v                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/mlp/wo/kernel/v_col                               size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_10/mlp/wo/kernel/v_row                               size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_10/pre_attention_layer_norm/scale/m                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/pre_attention_layer_norm/scale/v                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_10/pre_attention_layer_norm/scale/v_col              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/pre_attention_layer_norm/scale/v_row              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/pre_mlp_layer_norm/scale/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/pre_mlp_layer_norm/scale/v                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_10/pre_mlp_layer_norm/scale/v_col                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_10/pre_mlp_layer_norm/scale/v_row                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/attention/key/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/attention/key/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/attention/key/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_11/attention/key/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_11/attention/out/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/attention/out/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/attention/out/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_11/attention/out/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_11/attention/query/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/attention/query/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/attention/query/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_11/attention/query/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_11/attention/value/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/attention/value/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/attention/value/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_11/attention/value/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_11/mlp/wi_0/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/mlp/wi_0/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/mlp/wi_0/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_11/mlp/wi_0/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_11/mlp/wi_1/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/mlp/wi_1/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/mlp/wi_1/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_11/mlp/wi_1/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_11/mlp/wo/kernel/m                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/mlp/wo/kernel/v                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/mlp/wo/kernel/v_col                               size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_11/mlp/wo/kernel/v_row                               size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_11/pre_attention_layer_norm/scale/m                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/pre_attention_layer_norm/scale/v                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_11/pre_attention_layer_norm/scale/v_col              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/pre_attention_layer_norm/scale/v_row              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/pre_mlp_layer_norm/scale/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/pre_mlp_layer_norm/scale/v                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_11/pre_mlp_layer_norm/scale/v_col                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_11/pre_mlp_layer_norm/scale/v_row                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/attention/key/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/attention/key/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/attention/key/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_12/attention/key/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_12/attention/out/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/attention/out/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/attention/out/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_12/attention/out/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_12/attention/query/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/attention/query/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/attention/query/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_12/attention/query/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_12/attention/value/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/attention/value/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/attention/value/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_12/attention/value/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_12/mlp/wi_0/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/mlp/wi_0/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/mlp/wi_0/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_12/mlp/wi_0/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_12/mlp/wi_1/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/mlp/wi_1/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/mlp/wi_1/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_12/mlp/wi_1/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_12/mlp/wo/kernel/m                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/mlp/wo/kernel/v                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/mlp/wo/kernel/v_col                               size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_12/mlp/wo/kernel/v_row                               size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_12/pre_attention_layer_norm/scale/m                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/pre_attention_layer_norm/scale/v                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_12/pre_attention_layer_norm/scale/v_col              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/pre_attention_layer_norm/scale/v_row              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/pre_mlp_layer_norm/scale/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/pre_mlp_layer_norm/scale/v                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_12/pre_mlp_layer_norm/scale/v_col                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_12/pre_mlp_layer_norm/scale/v_row                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/attention/key/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/attention/key/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/attention/key/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_13/attention/key/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_13/attention/out/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/attention/out/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/attention/out/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_13/attention/out/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_13/attention/query/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/attention/query/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/attention/query/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_13/attention/query/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_13/attention/value/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/attention/value/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/attention/value/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_13/attention/value/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_13/mlp/wi_0/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/mlp/wi_0/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/mlp/wi_0/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_13/mlp/wi_0/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_13/mlp/wi_1/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/mlp/wi_1/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/mlp/wi_1/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_13/mlp/wi_1/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_13/mlp/wo/kernel/m                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/mlp/wo/kernel/v                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/mlp/wo/kernel/v_col                               size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_13/mlp/wo/kernel/v_row                               size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_13/pre_attention_layer_norm/scale/m                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/pre_attention_layer_norm/scale/v                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_13/pre_attention_layer_norm/scale/v_col              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/pre_attention_layer_norm/scale/v_row              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/pre_mlp_layer_norm/scale/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/pre_mlp_layer_norm/scale/v                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_13/pre_mlp_layer_norm/scale/v_col                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_13/pre_mlp_layer_norm/scale/v_row                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/attention/key/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/attention/key/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/attention/key/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_14/attention/key/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_14/attention/out/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/attention/out/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/attention/out/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_14/attention/out/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_14/attention/query/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/attention/query/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/attention/query/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_14/attention/query/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_14/attention/value/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/attention/value/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/attention/value/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_14/attention/value/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_14/mlp/wi_0/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/mlp/wi_0/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/mlp/wi_0/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_14/mlp/wi_0/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_14/mlp/wi_1/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/mlp/wi_1/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/mlp/wi_1/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_14/mlp/wi_1/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_14/mlp/wo/kernel/m                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/mlp/wo/kernel/v                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/mlp/wo/kernel/v_col                               size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_14/mlp/wo/kernel/v_row                               size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_14/pre_attention_layer_norm/scale/m                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/pre_attention_layer_norm/scale/v                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_14/pre_attention_layer_norm/scale/v_col              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/pre_attention_layer_norm/scale/v_row              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/pre_mlp_layer_norm/scale/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/pre_mlp_layer_norm/scale/v                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_14/pre_mlp_layer_norm/scale/v_col                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_14/pre_mlp_layer_norm/scale/v_row                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/attention/key/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/attention/key/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/attention/key/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_15/attention/key/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_15/attention/out/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/attention/out/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/attention/out/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_15/attention/out/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_15/attention/query/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/attention/query/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/attention/query/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_15/attention/query/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_15/attention/value/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/attention/value/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/attention/value/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_15/attention/value/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_15/mlp/wi_0/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/mlp/wi_0/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/mlp/wi_0/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_15/mlp/wi_0/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_15/mlp/wi_1/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/mlp/wi_1/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/mlp/wi_1/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_15/mlp/wi_1/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_15/mlp/wo/kernel/m                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/mlp/wo/kernel/v                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/mlp/wo/kernel/v_col                               size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_15/mlp/wo/kernel/v_row                               size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_15/pre_attention_layer_norm/scale/m                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/pre_attention_layer_norm/scale/v                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_15/pre_attention_layer_norm/scale/v_col              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/pre_attention_layer_norm/scale/v_row              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/pre_mlp_layer_norm/scale/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/pre_mlp_layer_norm/scale/v                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_15/pre_mlp_layer_norm/scale/v_col                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_15/pre_mlp_layer_norm/scale/v_row                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/attention/key/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/attention/key/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/attention/key/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_16/attention/key/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_16/attention/out/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/attention/out/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/attention/out/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_16/attention/out/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_16/attention/query/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/attention/query/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/attention/query/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_16/attention/query/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_16/attention/value/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/attention/value/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/attention/value/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_16/attention/value/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_16/mlp/wi_0/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/mlp/wi_0/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/mlp/wi_0/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_16/mlp/wi_0/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_16/mlp/wi_1/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/mlp/wi_1/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/mlp/wi_1/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_16/mlp/wi_1/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_16/mlp/wo/kernel/m                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/mlp/wo/kernel/v                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/mlp/wo/kernel/v_col                               size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_16/mlp/wo/kernel/v_row                               size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_16/pre_attention_layer_norm/scale/m                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/pre_attention_layer_norm/scale/v                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_16/pre_attention_layer_norm/scale/v_col              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/pre_attention_layer_norm/scale/v_row              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/pre_mlp_layer_norm/scale/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/pre_mlp_layer_norm/scale/v                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_16/pre_mlp_layer_norm/scale/v_col                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_16/pre_mlp_layer_norm/scale/v_row                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/attention/key/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/attention/key/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/attention/key/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_17/attention/key/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_17/attention/out/kernel/m                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/attention/out/kernel/v                            size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/attention/out/kernel/v_col                        size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_17/attention/out/kernel/v_row                        size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_17/attention/query/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/attention/query/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/attention/query/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_17/attention/query/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_17/attention/value/kernel/m                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/attention/value/kernel/v                          size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/attention/value/kernel/v_col                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_17/attention/value/kernel/v_row                      size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_17/mlp/wi_0/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/mlp/wi_0/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/mlp/wi_0/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_17/mlp/wi_0/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_17/mlp/wi_1/kernel/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/mlp/wi_1/kernel/v                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/mlp/wi_1/kernel/v_col                             size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_17/mlp/wi_1/kernel/v_row                             size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_17/mlp/wo/kernel/m                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/mlp/wo/kernel/v                                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/mlp/wo/kernel/v_col                               size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_17/mlp/wo/kernel/v_row                               size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_17/pre_attention_layer_norm/scale/m                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/pre_attention_layer_norm/scale/v                  size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_17/pre_attention_layer_norm/scale/v_col              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/pre_attention_layer_norm/scale/v_row              size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/pre_mlp_layer_norm/scale/m                        size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/pre_mlp_layer_norm/scale/v                        size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_17/pre_mlp_layer_norm/scale/v_col                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_17/pre_mlp_layer_norm/scale/v_row                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/attention/key/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/attention/key/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/attention/key/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_2/attention/key/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_2/attention/out/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/attention/out/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/attention/out/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_2/attention/out/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_2/attention/query/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/attention/query/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/attention/query/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_2/attention/query/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_2/attention/value/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/attention/value/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/attention/value/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_2/attention/value/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_2/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_2/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_2/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_2/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_2/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_2/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_2/pre_attention_layer_norm/scale/m                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/pre_attention_layer_norm/scale/v                   size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_2/pre_attention_layer_norm/scale/v_col               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/pre_attention_layer_norm/scale/v_row               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_2/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_2/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/attention/key/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/attention/key/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/attention/key/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_3/attention/key/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_3/attention/out/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/attention/out/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/attention/out/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_3/attention/out/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_3/attention/query/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/attention/query/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/attention/query/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_3/attention/query/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_3/attention/value/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/attention/value/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/attention/value/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_3/attention/value/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_3/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_3/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_3/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_3/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_3/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_3/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_3/pre_attention_layer_norm/scale/m                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/pre_attention_layer_norm/scale/v                   size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_3/pre_attention_layer_norm/scale/v_col               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/pre_attention_layer_norm/scale/v_row               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_3/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_3/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/attention/key/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/attention/key/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/attention/key/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_4/attention/key/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_4/attention/out/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/attention/out/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/attention/out/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_4/attention/out/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_4/attention/query/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/attention/query/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/attention/query/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_4/attention/query/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_4/attention/value/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/attention/value/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/attention/value/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_4/attention/value/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_4/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_4/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_4/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_4/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_4/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_4/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_4/pre_attention_layer_norm/scale/m                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/pre_attention_layer_norm/scale/v                   size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_4/pre_attention_layer_norm/scale/v_col               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/pre_attention_layer_norm/scale/v_row               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_4/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_4/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/attention/key/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/attention/key/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/attention/key/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_5/attention/key/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_5/attention/out/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/attention/out/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/attention/out/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_5/attention/out/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_5/attention/query/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/attention/query/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/attention/query/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_5/attention/query/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_5/attention/value/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/attention/value/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/attention/value/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_5/attention/value/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_5/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_5/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_5/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_5/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_5/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_5/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_5/pre_attention_layer_norm/scale/m                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/pre_attention_layer_norm/scale/v                   size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_5/pre_attention_layer_norm/scale/v_col               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/pre_attention_layer_norm/scale/v_row               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_5/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_5/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/attention/key/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/attention/key/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/attention/key/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_6/attention/key/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_6/attention/out/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/attention/out/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/attention/out/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_6/attention/out/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_6/attention/query/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/attention/query/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/attention/query/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_6/attention/query/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_6/attention/value/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/attention/value/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/attention/value/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_6/attention/value/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_6/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_6/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_6/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_6/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_6/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_6/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_6/pre_attention_layer_norm/scale/m                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/pre_attention_layer_norm/scale/v                   size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_6/pre_attention_layer_norm/scale/v_col               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/pre_attention_layer_norm/scale/v_row               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_6/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_6/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/attention/key/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/attention/key/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/attention/key/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_7/attention/key/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_7/attention/out/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/attention/out/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/attention/out/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_7/attention/out/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_7/attention/query/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/attention/query/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/attention/query/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_7/attention/query/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_7/attention/value/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/attention/value/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/attention/value/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_7/attention/value/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_7/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_7/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_7/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_7/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_7/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_7/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_7/pre_attention_layer_norm/scale/m                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/pre_attention_layer_norm/scale/v                   size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_7/pre_attention_layer_norm/scale/v_col               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/pre_attention_layer_norm/scale/v_row               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_7/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_7/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/attention/key/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/attention/key/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/attention/key/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_8/attention/key/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_8/attention/out/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/attention/out/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/attention/out/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_8/attention/out/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_8/attention/query/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/attention/query/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/attention/query/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_8/attention/query/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_8/attention/value/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/attention/value/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/attention/value/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_8/attention/value/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_8/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_8/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_8/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_8/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_8/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_8/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_8/pre_attention_layer_norm/scale/m                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/pre_attention_layer_norm/scale/v                   size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_8/pre_attention_layer_norm/scale/v_col               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/pre_attention_layer_norm/scale/v_row               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_8/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_8/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/attention/key/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/attention/key/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/attention/key/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_9/attention/key/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_9/attention/out/kernel/m                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/attention/out/kernel/v                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/attention/out/kernel/v_col                         size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_9/attention/out/kernel/v_row                         size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_9/attention/query/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/attention/query/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/attention/query/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_9/attention/query/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_9/attention/value/kernel/m                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/attention/value/kernel/v                           size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/attention/value/kernel/v_col                       size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_9/attention/value/kernel/v_row                       size 768          shape (768,)                                   partition spec None
Variable param_states/encoder/layers_9/mlp/wi_0/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/mlp/wi_0/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/mlp/wi_0/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_9/mlp/wi_0/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_9/mlp/wi_1/kernel/m                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/mlp/wi_1/kernel/v                                  size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/mlp/wi_1/kernel/v_col                              size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_9/mlp/wi_1/kernel/v_row                              size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_9/mlp/wo/kernel/m                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/mlp/wo/kernel/v                                    size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/mlp/wo/kernel/v_col                                size 3968         shape (3968,)                                  partition spec None
Variable param_states/encoder/layers_9/mlp/wo/kernel/v_row                                size 1536         shape (1536,)                                  partition spec None
Variable param_states/encoder/layers_9/pre_attention_layer_norm/scale/m                   size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/pre_attention_layer_norm/scale/v                   size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_9/pre_attention_layer_norm/scale/v_col               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/pre_attention_layer_norm/scale/v_row               size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/pre_mlp_layer_norm/scale/m                         size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/pre_mlp_layer_norm/scale/v                         size 1536         shape (embed=1536)                             partition spec (None,)
Variable param_states/encoder/layers_9/pre_mlp_layer_norm/scale/v_col                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/layers_9/pre_mlp_layer_norm/scale/v_row                     size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/relpos_bias/rel_embedding/m                                 size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/relpos_bias/rel_embedding/v                                 size 384          shape (heads=12, relpos_buckets=32)            partition spec ('model', None)
Variable param_states/encoder/relpos_bias/rel_embedding/v_col                             size 1            shape (1,)                                     partition spec None
Variable param_states/encoder/relpos_bias/rel_embedding/v_row                             size 1            shape (1,)                                     partition spec None
Variable param_states/token_embedder/embedding/m                                          size 1            shape (1,)                                     partition spec None
Variable param_states/token_embedder/embedding/v                                          size 1            shape (1,)                                     partition spec None
Variable param_states/token_embedder/embedding/v_col                                      size 1536         shape (1536,)                                  partition spec None
Variable param_states/token_embedder/embedding/v_row                                      size 384          shape (384,)                                   partition spec None
Variable step                                                                             size 1            shape ()                                       partition spec None