Update the model and add an eval section
Browse files- README.md +17 -0
- config.json +2 -2
- pytorch_model.bin +2 -2
- source.spm +2 -2
- target.spm +2 -2
- vocab.json +0 -0
README.md
CHANGED
@@ -41,3 +41,20 @@ from transformers import pipeline
|
|
41 |
tako_translator = pipeline('translation', model='staka/takomt')
|
42 |
tako_translator('This is a cat.')
|
43 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
tako_translator = pipeline('translation', model='staka/takomt')
|
42 |
tako_translator('This is a cat.')
|
43 |
```
|
44 |
+
|
45 |
+
### Eval results
|
46 |
+
|
47 |
+
The results of the evaluation using [tatoeba](https://tatoeba.org/ja)(randomly selected 500 sentences) are as follows::
|
48 |
+
|
49 |
+
|source |target |BLEU(*1)|
|
50 |
+
|-------|-------|--------|
|
51 |
+
|de |ja |27.8 |
|
52 |
+
|en |ja |28.4 |
|
53 |
+
|es |ja |32.0 |
|
54 |
+
|fr |ja |27.9 |
|
55 |
+
|it |ja |24.3 |
|
56 |
+
|ru |ja |27.3 |
|
57 |
+
|uk |ja |29.8 |
|
58 |
+
|
59 |
+
|
60 |
+
(*1) sacrebleu --tokenize ja-mecab
|
config.json
CHANGED
@@ -22,7 +22,7 @@
|
|
22 |
"encoder_attention_heads": 8,
|
23 |
"encoder_ffn_dim": 2048,
|
24 |
"encoder_layerdrop": 0.0,
|
25 |
-
"encoder_layers":
|
26 |
"eos_token_id": 0,
|
27 |
"forced_eos_token_id": 0,
|
28 |
"gradient_checkpointing": false,
|
@@ -33,7 +33,7 @@
|
|
33 |
"model_type": "marian",
|
34 |
"normalize_embedding": false,
|
35 |
"num_beams": 12,
|
36 |
-
"num_hidden_layers":
|
37 |
"pad_token_id": 96000,
|
38 |
"scale_embedding": true,
|
39 |
"static_position_embeddings": true,
|
|
|
22 |
"encoder_attention_heads": 8,
|
23 |
"encoder_ffn_dim": 2048,
|
24 |
"encoder_layerdrop": 0.0,
|
25 |
+
"encoder_layers": 6,
|
26 |
"eos_token_id": 0,
|
27 |
"forced_eos_token_id": 0,
|
28 |
"gradient_checkpointing": false,
|
|
|
33 |
"model_type": "marian",
|
34 |
"normalize_embedding": false,
|
35 |
"num_beams": 12,
|
36 |
+
"num_hidden_layers": 6,
|
37 |
"pad_token_id": 96000,
|
38 |
"scale_embedding": true,
|
39 |
"static_position_embeddings": true,
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:62833e6e935aee7a15a0a5b7cd3c3a4f136d858b8d5b598993062b72f7f9fd2d
|
3 |
+
size 285160131
|
source.spm
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7269a36b6dc795a71c9f52e747a55623caa5a729c06a939ed42544e76546af50
|
3 |
+
size 2100964
|
target.spm
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7269a36b6dc795a71c9f52e747a55623caa5a729c06a939ed42544e76546af50
|
3 |
+
size 2100964
|
vocab.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|