Update README.md
Browse files
README.md
CHANGED
@@ -21,4 +21,24 @@ Result from default T5 tokenizer (just as an example):
|
|
21 |
Result from this tokenizer:
|
22 |
```
|
23 |
['▁SELECT', '▁?answer', '▁WHERE', '▁{', '▁wd:Q8', '259', '46', '▁wdt:P371', '▁?X', '▁.', '▁?X', '▁wdt:P2048', '▁?answer', '}']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
```
|
|
|
21 |
Result from this tokenizer:
|
22 |
```
|
23 |
['▁SELECT', '▁?answer', '▁WHERE', '▁{', '▁wd:Q8', '259', '46', '▁wdt:P371', '▁?X', '▁.', '▁?X', '▁wdt:P2048', '▁?answer', '}']
|
24 |
+
```
|
25 |
+
|
26 |
+
# How to use
|
27 |
+
|
28 |
+
```python
|
29 |
+
from transformers import AutoTokenizer
|
30 |
+
tokenizer = AutoTokenizer.from_pretrained("InfAI/sparql-tokenizer")
|
31 |
+
tokenizer.tokenize("SELECT ?answer WHERE { wd:Q825946 wdt:P371 ?X . ?X wdt:P2048 ?answer}")
|
32 |
+
```
|
33 |
+
|
34 |
+
```
|
35 |
+
['▁SELECT', '▁?answer', '▁WHERE', '▁{', '▁wd:Q8', '259', '46', '▁wdt:P371', '▁?X', '▁.', '▁?X', '▁wdt:P2048', '▁?answer', '}']
|
36 |
+
```
|
37 |
+
|
38 |
+
```python
|
39 |
+
tokenizer("SELECT ?answer WHERE { wd:Q825946 wdt:P371 ?X . ?X wdt:P2048 ?answer}")
|
40 |
+
```
|
41 |
+
|
42 |
+
```
|
43 |
+
{'input_ids': [441, 444, 431, 422, 606, 1388, 720, 1791, 456, 418, 456, 3657, 444, 185], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
|
44 |
```
|