add python model, readme

Files changed (3) hide show

README.md ADDED Viewed

+---
+license: mit
+language:
+- af
+- az
+- be
+- bg
+- bn
+- ca
+- cs
+- cy
+- da
+- de
+- el
+- en
+- eo
+- es
+- et
+- eu
+- fa
+- fi
+- fr
+- fy
+- ga
+- gl
+- gu
+- he
+- hi
+- hu
+- hy
+- id
+- is
+- it
+- ka
+- kk
+- ky
+- la
+- lt
+- lv
+- mg
+- mk
+- ml
+- mt
+- nl
+- pa
+- pl
+- pt
+- ro
+- ru
+- sk
+- sq
+- sv
+- ta
+- te
+- th
+- tr
+- uk
+- yi
+- yo
+datasets:
+- benjamin/compoundpiece
+---
+Compound normalization model from [CompoundPiece: Evaluating and Improving Decompounding Performance of Language Models](https://arxiv.org/abs/2305.14214).
+## Usage
+```
+from transformers import pipeline
+pipe = pipeline("text2text-generation", "benjamin/compoundpiece")
+pipe("Hauswirtschaftslehre", max_length=32)
+# [{'generated_text': 'Haus-Wirtschaft-Lehre'}]
+```

config.json CHANGED Viewed

@@ -1,5 +1,4 @@
 {
-  "_name_or_path": "/home/patrick/t5/byt5-base",
   "architectures": [
     "T5ForConditionalGeneration"
   ],
@@ -26,6 +25,7 @@
   "relative_attention_num_buckets": 32,
   "tie_word_embeddings": false,
   "tokenizer_class": "ByT5Tokenizer",
   "transformers_version": "4.26.0",
   "use_cache": true,
   "vocab_size": 384

 {
   "architectures": [
     "T5ForConditionalGeneration"
   ],
   "relative_attention_num_buckets": 32,
   "tie_word_embeddings": false,
   "tokenizer_class": "ByT5Tokenizer",
+  "torch_dtype": "float32",
   "transformers_version": "4.26.0",
   "use_cache": true,
   "vocab_size": 384

pytorch_model.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:0776ceaa762993d843dc71ae3c7f0688477855533d367423b345aafcc5fca828
+size 2326696509