Upload 5 files

Browse files

Files changed (4) hide show

added_tokens.json +4 -0
special_tokens_map.json +6 -0
tokenizer_config.json +48 -0
vocab.json +1 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 47,
+  "<s>": 46
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "[PAD]",
+  "unk_token": "[UNK]"
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,48 @@

+{
+  "added_tokens_decoder": {
+    "44": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "45": {
+      "content": "[PAD]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "46": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "47": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2Processor",
+  "replace_word_delimiter_char": " ",
+  "target_lang": null,
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}

vocab.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"g": 0, "k": 1, "\u00ec": 2, "w": 3, "d": 4, "p": 5, "\u00f3": 6, "o": 7, "n": 8, "\u0127": 9, "x": 10, "\u0121": 11, "\u00f9": 12, "b": 13, "f": 14, "\u00e9": 15, "j": 16, "4": 17, "a": 18, "m": 19, "e": 20, "\u0107": 21, "\u017c": 22, "q": 23, "s": 24, "y": 25, "\u010b": 26, "\u00ed": 27, "-": 28, "t": 29, "h": 30, "r": 31, "i": 32, "\u00e1": 33, "u": 34, "v": 35, "c": 37, "\u00f2": 38, "`": 39, "l": 40, "\u00e0": 41, "\u00e8": 42, "z": 43, "|": 36, "[UNK]": 44, "[PAD]": 45}