finiteautomata
commited on
Commit
•
d9fc116
1
Parent(s):
4738326
improve tokenization
Browse files- added_tokens.json +1 -1
- config.json +1 -1
- pytorch_model.bin +2 -2
- special_tokens_map.json +1 -1
- tokenizer.json +0 -0
added_tokens.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"url": 31003, "@usuario": 31002, "emoji": 31005, "hashtag": 31004}
|
config.json
CHANGED
@@ -32,5 +32,5 @@
|
|
32 |
"transformers_version": "4.6.1",
|
33 |
"type_vocab_size": 2,
|
34 |
"use_cache": true,
|
35 |
-
"vocab_size":
|
36 |
}
|
|
|
32 |
"transformers_version": "4.6.1",
|
33 |
"type_vocab_size": 2,
|
34 |
"use_cache": true,
|
35 |
+
"vocab_size": 31006
|
36 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6301fc83af4c375b6b6c1d8916ec87bd853c2715250b91645da1e859ba6e000a
|
3 |
+
size 439512342
|
special_tokens_map.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"
|
|
|
1 |
+
{"unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
|
tokenizer.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|