add t5x checkpoint
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -0
- README.md +16 -1
- t5x/checkpoint_1500000/checkpoint +3 -0
- t5x/checkpoint_1500000/state.param_states.decoder.decoder_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.decoder_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.decoder.relpos_bias.rel_embedding.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.decoder.relpos_bias.rel_embedding.v/0.0 +3 -0
- t5x/checkpoint_1500000/state.param_states.encoder.encoder_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.encoder.encoder_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray +1 -0
- t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/0 +0 -0
- t5x/checkpoint_1500000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/.zarray +1 -0
.gitattributes
CHANGED
@@ -33,4 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
|
|
|
|
|
36 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
33 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
34 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
35 |
|
36 |
+
*.0 filter=lfs diff=lfs merge=lfs -text
|
37 |
+
checkpoint filter=lfs diff=lfs merge=lfs -text
|
38 |
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
@@ -72,4 +72,19 @@ pipe = pipeline("text2text-generation", "benjamin/compoundpiece")
|
|
72 |
|
73 |
pipe("Hauswirtschaftslehre", max_length=32)
|
74 |
# [{'generated_text': 'Haus-Wirtschaft-Lehre'}]
|
75 |
-
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
pipe("Hauswirtschaftslehre", max_length=32)
|
74 |
# [{'generated_text': 'Haus-Wirtschaft-Lehre'}]
|
75 |
+
```
|
76 |
+
|
77 |
+
# Citation
|
78 |
+
|
79 |
+
```
|
80 |
+
@article{minixhofer2023compoundpiece,
|
81 |
+
title={CompoundPiece: Evaluating and Improving Decompounding Performance of Language Models},
|
82 |
+
author={Minixhofer, Benjamin and Pfeiffer, Jonas and Vuli{\'c}, Ivan},
|
83 |
+
journal={arXiv preprint arXiv:2305.14214},
|
84 |
+
year={2023}
|
85 |
+
}
|
86 |
+
```
|
87 |
+
|
88 |
+
# License
|
89 |
+
|
90 |
+
MIT
|
t5x/checkpoint_1500000/checkpoint
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b735c62bdd032a03100c813ffeb6e1361c648c9730c1fab11bd2f8ccbea18153
|
3 |
+
size 2799177
|
t5x/checkpoint_1500000/state.param_states.decoder.decoder_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.decoder_norm.scale.v/0
ADDED
Binary file (5.46 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (5.52 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.49 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (5.57 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (5.33 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.28 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (5.32 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (5.32 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.27 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (5.43 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (5.37 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.25 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (5.43 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (5.33 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.24 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (5.41 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0
ADDED
Binary file (5.45 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.31 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/0
ADDED
Binary file (5.4 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.decoder.relpos_bias.rel_embedding.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[12,32],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[12,32],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.decoder.relpos_bias.rel_embedding.v/0.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e83028768d50ce1afbd2e2622478d89f86ab3b5cc3b67db7febd8dd06e9a5f0f
|
3 |
+
size 1486
|
t5x/checkpoint_1500000/state.param_states.encoder.encoder_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.encoder.encoder_norm.scale.v/0
ADDED
Binary file (5.39 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0
ADDED
Binary file (5.57 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|
t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/0
ADDED
Binary file (5.52 kB). View file
|
|
t5x/checkpoint_1500000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/.zarray
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
|