nioushasadjadi commited on
Commit
eb1e311
·
1 Parent(s): f15abb2

Download the tokenizer from Hugging Face Hub

Browse files
Files changed (1) hide show
  1. tokenizer.py +3 -1
tokenizer.py CHANGED
@@ -1,4 +1,5 @@
1
  from transformers import PreTrainedTokenizer
 
2
  import json
3
  import os
4
  from itertools import product
@@ -109,7 +110,8 @@ class KmerTokenizer(PreTrainedTokenizer):
109
  @classmethod
110
  def from_pretrained(cls, pretrained_dir, **kwargs):
111
  # Load vocabulary
112
- vocab_file = os.path.join(pretrained_dir, "tokenizer.json")
 
113
  with open(vocab_file, "r", encoding="utf-8") as f:
114
  vocab_content = json.load(f)
115
  vocab = vocab_content["model"]["vocab"]
 
1
  from transformers import PreTrainedTokenizer
2
+ from huggingface_hub import hf_hub_download
3
  import json
4
  import os
5
  from itertools import product
 
110
  @classmethod
111
  def from_pretrained(cls, pretrained_dir, **kwargs):
112
  # Load vocabulary
113
+ vocab_file = hf_hub_download(repo_id=pretrained_dir, filename="tokenizer.json")
114
+ # vocab_file = os.path.join(pretrained_dir, "tokenizer.json")
115
  with open(vocab_file, "r", encoding="utf-8") as f:
116
  vocab_content = json.load(f)
117
  vocab = vocab_content["model"]["vocab"]