UmarRamzan commited on
Commit
c273f23
·
1 Parent(s): 04687ec

w2v2-bert-ngram-urdu

Browse files
alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": [" ", "\u0623", "\u0624", "\u0626", "\u0627", "\u0628", "\u062a", "\u062b", "\u062c", "\u062d", "\u062e", "\u062f", "\u0630", "\u0631", "\u0632", "\u0633", "\u0634", "\u0635", "\u0636", "\u0637", "\u0638", "\u0639", "\u063a", "\u0641", "\u0642", "\u0643", "\u0644", "\u0645", "\u0646", "\u0647", "\u0648", "\u064a", "\u0679", "\u067e", "\u0686", "\u0688", "\u0691", "\u0698", "\u06a9", "\u06af", "\u06ba", "\u06be", "\u06c1", "\u06c2", "\u06c3", "\u06cc", "\u06d2", "\u06d3", "\ufb68", "\ufbad", "\ufbaf", "\ufbfe", "\ufdf2", "\ufdfa", "\ufe85", "\ufe97", "\ufe98", "\ufea9", "\ufeb2", "\ufee7", "\ufeee", "\u2047", "", "<s>", "</s>"], "is_bpe": false}
language_model/5gram.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e5454facbffe62a26c6781b6b779953e12aad7815b5eb3b03bdd7082f17181a
3
+ size 6366106
language_model/attrs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
language_model/unigrams.txt ADDED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json CHANGED
@@ -4,7 +4,7 @@
4
  "num_mel_bins": 80,
5
  "padding_side": "right",
6
  "padding_value": 1,
7
- "processor_class": "Wav2Vec2BertProcessor",
8
  "return_attention_mask": true,
9
  "sampling_rate": 16000,
10
  "stride": 2
 
4
  "num_mel_bins": 80,
5
  "padding_side": "right",
6
  "padding_value": 1,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
  "return_attention_mask": true,
9
  "sampling_rate": 16000,
10
  "stride": 2
special_tokens_map.json CHANGED
@@ -1,6 +1,30 @@
1
  {
2
- "bos_token": "<s>",
3
- "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": true,
19
+ "normalized": false,
20
+ "rstrip": true,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "[UNK]",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": true,
28
+ "single_word": false
29
+ }
30
  }
tokenizer_config.json CHANGED
@@ -39,6 +39,7 @@
39
  "eos_token": "</s>",
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "[PAD]",
 
42
  "replace_word_delimiter_char": " ",
43
  "target_lang": null,
44
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
 
39
  "eos_token": "</s>",
40
  "model_max_length": 1000000000000000019884624838656,
41
  "pad_token": "[PAD]",
42
+ "processor_class": "Wav2Vec2ProcessorWithLM",
43
  "replace_word_delimiter_char": " ",
44
  "target_lang": null,
45
  "tokenizer_class": "Wav2Vec2CTCTokenizer",