kargaranamir
commited on
Commit
•
634773e
1
Parent(s):
2f01166
Update README.md
Browse files
README.md
CHANGED
@@ -24,41 +24,31 @@ You can use this model directly with a pipeline for masked language modeling:
|
|
24 |
Here is how to use this model to get the features of a given text in PyTorch:
|
25 |
|
26 |
```python
|
27 |
-
from transformers import AutoTokenizer, AutoModelForMaskedLM
|
28 |
|
29 |
-
tokenizer = AutoTokenizer.from_pretrained('cis-lmu/glot500-base')
|
30 |
-
model = AutoModelForMaskedLM.from_pretrained("cis-lmu/glot500-base")
|
31 |
|
32 |
-
# prepare input
|
33 |
-
text = "Replace me by any text you'd like."
|
34 |
-
encoded_input = tokenizer(text, return_tensors='pt')
|
35 |
|
36 |
-
# forward pass
|
37 |
-
output = model(**encoded_input)
|
38 |
```
|
39 |
|
40 |
### BibTeX entry and citation info
|
41 |
|
42 |
```bibtex
|
43 |
@inproceedings{imanigooghari-etal-2023-glot500,
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
Martins, Andr{\'e} and
|
54 |
-
Yvon, Fran{\c{c}}ois and
|
55 |
-
Sch{\"u}tze, Hinrich},
|
56 |
-
booktitle = "Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)",
|
57 |
-
month = jul,
|
58 |
-
year = "2023",
|
59 |
-
address = "Toronto, Canada",
|
60 |
-
publisher = "Association for Computational Linguistics",
|
61 |
-
url = "https://aclanthology.org/2023.acl-long.61",
|
62 |
-
pages = "1082--1117",
|
63 |
}
|
64 |
```
|
|
|
24 |
Here is how to use this model to get the features of a given text in PyTorch:
|
25 |
|
26 |
```python
|
27 |
+
>>> from transformers import AutoTokenizer, AutoModelForMaskedLM
|
28 |
|
29 |
+
>>> tokenizer = AutoTokenizer.from_pretrained('cis-lmu/glot500-base')
|
30 |
+
>>> model = AutoModelForMaskedLM.from_pretrained("cis-lmu/glot500-base")
|
31 |
|
32 |
+
>>> # prepare input
|
33 |
+
>>> text = "Replace me by any text you'd like."
|
34 |
+
>>> encoded_input = tokenizer(text, return_tensors='pt')
|
35 |
|
36 |
+
>>> # forward pass
|
37 |
+
>>> output = model(**encoded_input)
|
38 |
```
|
39 |
|
40 |
### BibTeX entry and citation info
|
41 |
|
42 |
```bibtex
|
43 |
@inproceedings{imanigooghari-etal-2023-glot500,
|
44 |
+
title = {Glot500: Scaling Multilingual Corpora and Language Models to 500 Languages},
|
45 |
+
author = {ImaniGooghari, Ayyoob and Lin, Peiqin and Kargaran, Amir Hossein and Severini, Silvia and Jalili Sabet, Masoud and Kassner, Nora and Ma, Chunlan and Schmid, Helmut and Martins, Andr{\'e} and Yvon, Fran{\c{c}}ois and Sch{\"u}tze, Hinrich},
|
46 |
+
year = 2023,
|
47 |
+
month = jul,
|
48 |
+
booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
|
49 |
+
publisher = {Association for Computational Linguistics},
|
50 |
+
address = {Toronto, Canada},
|
51 |
+
pages = {1082--1117},
|
52 |
+
url = {https://aclanthology.org/2023.acl-long.61}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
}
|
54 |
```
|