Update README.md
Browse files
README.md
CHANGED
@@ -113,5 +113,23 @@ The model was trained on [Japanese CC-100](http://data.statmt.org/cc-100/ja.txt.
|
|
113 |
# Tokenization
|
114 |
The model uses a [sentencepiece](https://github.com/google/sentencepiece)-based tokenizer, the vocabulary was trained on the Japanese Wikipedia using the official sentencepiece training script.
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
# Licenese
|
117 |
[The MIT license](https://opensource.org/licenses/MIT)
|
|
|
113 |
# Tokenization
|
114 |
The model uses a [sentencepiece](https://github.com/google/sentencepiece)-based tokenizer, the vocabulary was trained on the Japanese Wikipedia using the official sentencepiece training script.
|
115 |
|
116 |
+
# How to cite
|
117 |
+
~~~
|
118 |
+
@misc{rinna-japanese-roberta-base,
|
119 |
+
title = {rinna/japanese-roberta-base},
|
120 |
+
author = {Zhao, Tianyu and Sawada, Kei},
|
121 |
+
url = {https://huggingface.co/rinna/japanese-roberta-base},
|
122 |
+
}
|
123 |
+
|
124 |
+
@inproceedings{sawada2024release,
|
125 |
+
title = {Release of Pre-Trained Models for the {J}apanese Language},
|
126 |
+
author = {Sawada, Kei and Zhao, Tianyu and Shing, Makoto and Mitsui, Kentaro and Kaga, Akio and Hono, Yukiya and Wakatsuki, Toshiaki and Mitsuda, Koh},
|
127 |
+
booktitle = {Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)},
|
128 |
+
month = {5},
|
129 |
+
year = {2024},
|
130 |
+
url = {https://arxiv.org/abs/2404.01657},
|
131 |
+
}
|
132 |
+
~~~
|
133 |
+
|
134 |
# Licenese
|
135 |
[The MIT license](https://opensource.org/licenses/MIT)
|