lst-nectec
/

HoogBERTa

Inference Endpoints

Model card Files Files and versions Community

new5558 commited on Apr 5, 2023

Commit

69669b4

•

1 Parent(s): c94d6ae

Upload tokenizer

Files changed (2) hide show

tokenizer.json +1 -0
tokenizer_config.json +1 -0

tokenizer.json CHANGED Viewed

@@ -128,6 +128,7 @@
     "continuing_subword_prefix": null,
     "end_of_word_suffix": "</w>",
     "fuse_unk": false,
     "vocab": {
       "<s>": 0,
       "<pad>": 1,

     "continuing_subword_prefix": null,
     "end_of_word_suffix": "</w>",
     "fuse_unk": false,
+    "byte_fallback": false,
     "vocab": {
       "<s>": 0,
       "<pad>": 1,

tokenizer_config.json CHANGED Viewed

@@ -5,5 +5,6 @@
   "mask_token": "<mask>",
   "model_max_length": 512,
   "pad_token": "<pad>",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }

   "mask_token": "<mask>",
   "model_max_length": 512,
   "pad_token": "<pad>",
+  "special_tokens_map_file": "data/converted_model_mlm_huggingface/special_tokens_map.json",
   "tokenizer_class": "PreTrainedTokenizerFast"
 }