Update README.md
Browse files
README.md
CHANGED
@@ -2,8 +2,6 @@
|
|
2 |
language: ja
|
3 |
thumbnail: https://github.com/rinnakk/japanese-gpt2/blob/master/rinna.png
|
4 |
tags:
|
5 |
-
- ja
|
6 |
-
- japanese
|
7 |
- roberta
|
8 |
- masked-lm
|
9 |
- nlp
|
@@ -114,22 +112,24 @@ The model was trained on [Japanese CC-100](http://data.statmt.org/cc-100/ja.txt.
|
|
114 |
The model uses a [sentencepiece](https://github.com/google/sentencepiece)-based tokenizer, the vocabulary was trained on the Japanese Wikipedia using the official sentencepiece training script.
|
115 |
|
116 |
# How to cite
|
117 |
-
|
118 |
@misc{rinna-japanese-roberta-base,
|
119 |
title = {rinna/japanese-roberta-base},
|
120 |
author = {Zhao, Tianyu and Sawada, Kei},
|
121 |
-
url = {https://huggingface.co/rinna/japanese-roberta-base}
|
122 |
}
|
123 |
|
124 |
-
@inproceedings{
|
125 |
title = {Release of Pre-Trained Models for the {J}apanese Language},
|
126 |
author = {Sawada, Kei and Zhao, Tianyu and Shing, Makoto and Mitsui, Kentaro and Kaga, Akio and Hono, Yukiya and Wakatsuki, Toshiaki and Mitsuda, Koh},
|
127 |
booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
|
128 |
month = {5},
|
129 |
year = {2024},
|
130 |
-
|
|
|
|
|
131 |
}
|
132 |
-
|
133 |
|
134 |
# Licenese
|
135 |
[The MIT license](https://opensource.org/licenses/MIT)
|
|
|
2 |
language: ja
|
3 |
thumbnail: https://github.com/rinnakk/japanese-gpt2/blob/master/rinna.png
|
4 |
tags:
|
|
|
|
|
5 |
- roberta
|
6 |
- masked-lm
|
7 |
- nlp
|
|
|
112 |
The model uses a [sentencepiece](https://github.com/google/sentencepiece)-based tokenizer, the vocabulary was trained on the Japanese Wikipedia using the official sentencepiece training script.
|
113 |
|
114 |
# How to cite
|
115 |
+
```bibtex
|
116 |
@misc{rinna-japanese-roberta-base,
|
117 |
title = {rinna/japanese-roberta-base},
|
118 |
author = {Zhao, Tianyu and Sawada, Kei},
|
119 |
+
url = {https://huggingface.co/rinna/japanese-roberta-base}
|
120 |
}
|
121 |
|
122 |
+
@inproceedings{sawada-etal-2024-release-pre,
|
123 |
title = {Release of Pre-Trained Models for the {J}apanese Language},
|
124 |
author = {Sawada, Kei and Zhao, Tianyu and Shing, Makoto and Mitsui, Kentaro and Kaga, Akio and Hono, Yukiya and Wakatsuki, Toshiaki and Mitsuda, Koh},
|
125 |
booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
|
126 |
month = {5},
|
127 |
year = {2024},
|
128 |
+
pages = {13898--13905},
|
129 |
+
url = {https://aclanthology.org/2024.lrec-main.1213},
|
130 |
+
note = {\url{https://arxiv.org/abs/2404.01657}}
|
131 |
}
|
132 |
+
```
|
133 |
|
134 |
# Licenese
|
135 |
[The MIT license](https://opensource.org/licenses/MIT)
|