benjamin commited on
Commit
aeb59fc
1 Parent(s): 38d2519

add t5x checkpoint

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +2 -0
  2. README.md +16 -1
  3. t5x/checkpoint_1500000/checkpoint +3 -0
  4. t5x/checkpoint_1500000/state.param_states.decoder.decoder_norm.scale.v/.zarray +1 -0
  5. t5x/checkpoint_1500000/state.param_states.decoder.decoder_norm.scale.v/0 +0 -0
  6. t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  7. t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  8. t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  9. t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/0 +0 -0
  10. t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  11. t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/0 +0 -0
  12. t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  13. t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  14. t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  15. t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/0 +0 -0
  16. t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  17. t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/0 +0 -0
  18. t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  19. t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  20. t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  21. t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/0 +0 -0
  22. t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  23. t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/0 +0 -0
  24. t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  25. t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  26. t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  27. t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/0 +0 -0
  28. t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  29. t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/0 +0 -0
  30. t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  31. t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  32. t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  33. t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/0 +0 -0
  34. t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  35. t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/0 +0 -0
  36. t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/.zarray +1 -0
  37. t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0 +0 -0
  38. t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  39. t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/0 +0 -0
  40. t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/.zarray +1 -0
  41. t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/0 +0 -0
  42. t5x/checkpoint_1500000/state.param_states.decoder.relpos_bias.rel_embedding.v/.zarray +1 -0
  43. t5x/checkpoint_1500000/state.param_states.decoder.relpos_bias.rel_embedding.v/0.0 +3 -0
  44. t5x/checkpoint_1500000/state.param_states.encoder.encoder_norm.scale.v/.zarray +1 -0
  45. t5x/checkpoint_1500000/state.param_states.encoder.encoder_norm.scale.v/0 +0 -0
  46. t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray +1 -0
  47. t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0 +0 -0
  48. t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray +1 -0
  49. t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/0 +0 -0
  50. t5x/checkpoint_1500000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/.zarray +1 -0
.gitattributes CHANGED
@@ -33,4 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
 
 
 
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
 
36
+ *.0 filter=lfs diff=lfs merge=lfs -text
37
+ checkpoint filter=lfs diff=lfs merge=lfs -text
38
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -72,4 +72,19 @@ pipe = pipeline("text2text-generation", "benjamin/compoundpiece")
72
 
73
  pipe("Hauswirtschaftslehre", max_length=32)
74
  # [{'generated_text': 'Haus-Wirtschaft-Lehre'}]
75
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  pipe("Hauswirtschaftslehre", max_length=32)
74
  # [{'generated_text': 'Haus-Wirtschaft-Lehre'}]
75
+ ```
76
+
77
+ # Citation
78
+
79
+ ```
80
+ @article{minixhofer2023compoundpiece,
81
+ title={CompoundPiece: Evaluating and Improving Decompounding Performance of Language Models},
82
+ author={Minixhofer, Benjamin and Pfeiffer, Jonas and Vuli{\'c}, Ivan},
83
+ journal={arXiv preprint arXiv:2305.14214},
84
+ year={2023}
85
+ }
86
+ ```
87
+
88
+ # License
89
+
90
+ MIT
t5x/checkpoint_1500000/checkpoint ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b735c62bdd032a03100c813ffeb6e1361c648c9730c1fab11bd2f8ccbea18153
3
+ size 2799177
t5x/checkpoint_1500000/state.param_states.decoder.decoder_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.decoder_norm.scale.v/0 ADDED
Binary file (5.46 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (5.52 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (5.49 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_0.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (5.57 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (5.33 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (5.28 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_1.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (5.32 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (5.32 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (5.27 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_2.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (5.43 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (5.37 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (5.25 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_3.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (5.43 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (5.33 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (5.24 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_4.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (5.41 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_cross_attention_layer_norm.scale.v/0 ADDED
Binary file (5.45 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (5.31 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.layers_5.pre_self_attention_layer_norm.scale.v/0 ADDED
Binary file (5.4 kB). View file
 
t5x/checkpoint_1500000/state.param_states.decoder.relpos_bias.rel_embedding.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[12,32],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[12,32],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.decoder.relpos_bias.rel_embedding.v/0.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e83028768d50ce1afbd2e2622478d89f86ab3b5cc3b67db7febd8dd06e9a5f0f
3
+ size 1486
t5x/checkpoint_1500000/state.param_states.encoder.encoder_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.encoder.encoder_norm.scale.v/0 ADDED
Binary file (5.39 kB). View file
 
t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_attention_layer_norm.scale.v/0 ADDED
Binary file (5.57 kB). View file
 
t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}
t5x/checkpoint_1500000/state.param_states.encoder.layers_0.pre_mlp_layer_norm.scale.v/0 ADDED
Binary file (5.52 kB). View file
 
t5x/checkpoint_1500000/state.param_states.encoder.layers_1.pre_attention_layer_norm.scale.v/.zarray ADDED
@@ -0,0 +1 @@
 
 
1
+ {"chunks":[1536],"compressor":{"id":"gzip","level":1},"dimension_separator":".","dtype":"<f4","fill_value":null,"filters":null,"order":"C","shape":[1536],"zarr_format":2}