nioushasadjadi
commited on
Commit
·
eb1e311
1
Parent(s):
f15abb2
Download the tokenizer from Hugging Face Hub
Browse files- tokenizer.py +3 -1
tokenizer.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
from transformers import PreTrainedTokenizer
|
|
|
2 |
import json
|
3 |
import os
|
4 |
from itertools import product
|
@@ -109,7 +110,8 @@ class KmerTokenizer(PreTrainedTokenizer):
|
|
109 |
@classmethod
|
110 |
def from_pretrained(cls, pretrained_dir, **kwargs):
|
111 |
# Load vocabulary
|
112 |
-
vocab_file =
|
|
|
113 |
with open(vocab_file, "r", encoding="utf-8") as f:
|
114 |
vocab_content = json.load(f)
|
115 |
vocab = vocab_content["model"]["vocab"]
|
|
|
1 |
from transformers import PreTrainedTokenizer
|
2 |
+
from huggingface_hub import hf_hub_download
|
3 |
import json
|
4 |
import os
|
5 |
from itertools import product
|
|
|
110 |
@classmethod
|
111 |
def from_pretrained(cls, pretrained_dir, **kwargs):
|
112 |
# Load vocabulary
|
113 |
+
vocab_file = hf_hub_download(repo_id=pretrained_dir, filename="tokenizer.json")
|
114 |
+
# vocab_file = os.path.join(pretrained_dir, "tokenizer.json")
|
115 |
with open(vocab_file, "r", encoding="utf-8") as f:
|
116 |
vocab_content = json.load(f)
|
117 |
vocab = vocab_content["model"]["vocab"]
|