santoshpandey
/

wav2vec2-large-xls-r-300m-nepali-colab

Automatic Speech Recognition

Generated from Trainer

Model card Files Files and versions Metrics Training metrics Community

santoshpandey commited on Sep 25, 2024

Commit

8ce7bcd

·

verified ·

1 Parent(s): 1d93adb

Upload tokenizer

Files changed (3) hide show

added_tokens.json +1 -1
tokenizer_config.json +17 -1
vocab.json +2 -2

added_tokens.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
   "</s>": 50,
-  "[PAD]": 49
 }

 {
   "</s>": 50,
+  "<s>": 49
 }

tokenizer_config.json CHANGED Viewed

@@ -1,6 +1,14 @@
 {
   "added_tokens_decoder": {
-    "49": {
       "content": "[PAD]",
       "lstrip": true,
       "normalized": false,
@@ -8,6 +16,14 @@
       "single_word": false,
       "special": false
     },
     "50": {
       "content": "</s>",
       "lstrip": false,

 {
   "added_tokens_decoder": {
+    "47": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "48": {
       "content": "[PAD]",
       "lstrip": true,
       "normalized": false,
       "single_word": false,
       "special": false
     },
+    "49": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
     "50": {
       "content": "</s>",
       "lstrip": false,

vocab.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
-  "[PAD]": 49,
-  "[UNK]": 49,
   "|": 0,
   "ँ": 1,
   "ं": 2,

 {
+  "[PAD]": 48,
+  "[UNK]": 47,
   "|": 0,
   "ँ": 1,
   "ं": 2,