Allow only unidic_lite (#2)
Browse files- Allow only unidic_lite (1c892637d6d1ff8c268547b8188aca6c3a0cbd7d)
- Delete line (bd73a41d9ffdf9b62d0b44cbe28567fa99d7e3fb)
- README.md +8 -0
- distilbert_japanese_tokenizer.py +0 -16
README.md
CHANGED
@@ -28,6 +28,14 @@ sentence = "LINE株式会社で[MASK]の研究・開発をしている。"
|
|
28 |
print(model(**tokenizer(sentence, return_tensors="pt")))
|
29 |
```
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
## Model architecture
|
32 |
|
33 |
The model architecture is the DitilBERT base model; 6 layers, 768 dimensions of hidden states, 12 attention heads, 66M parameters.
|
|
|
28 |
print(model(**tokenizer(sentence, return_tensors="pt")))
|
29 |
```
|
30 |
|
31 |
+
### Requirements
|
32 |
+
|
33 |
+
```txt
|
34 |
+
fugashi
|
35 |
+
sentencepiece
|
36 |
+
unidic-lite
|
37 |
+
```
|
38 |
+
|
39 |
## Model architecture
|
40 |
|
41 |
The model architecture is the DitilBERT base model; 6 layers, 768 dimensions of hidden states, 12 attention heads, 66M parameters.
|
distilbert_japanese_tokenizer.py
CHANGED
@@ -485,22 +485,6 @@ class MecabTokenizer:
|
|
485 |
)
|
486 |
|
487 |
dic_dir = unidic_lite.DICDIR
|
488 |
-
elif mecab_dic == "unidic":
|
489 |
-
try:
|
490 |
-
import unidic
|
491 |
-
except ModuleNotFoundError as error:
|
492 |
-
raise error.__class__(
|
493 |
-
"The unidic dictionary is not installed. "
|
494 |
-
"See https://github.com/polm/unidic-py for installation."
|
495 |
-
)
|
496 |
-
|
497 |
-
dic_dir = unidic.DICDIR
|
498 |
-
if not os.path.isdir(dic_dir):
|
499 |
-
raise RuntimeError(
|
500 |
-
"The unidic dictionary itself is not found. "
|
501 |
-
"See https://github.com/polm/unidic-py for installation."
|
502 |
-
)
|
503 |
-
|
504 |
else:
|
505 |
raise ValueError("Invalid mecab_dic is specified.")
|
506 |
|
|
|
485 |
)
|
486 |
|
487 |
dic_dir = unidic_lite.DICDIR
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
488 |
else:
|
489 |
raise ValueError("Invalid mecab_dic is specified.")
|
490 |
|