vitouphy commited on
Commit
796d933
·
1 Parent(s): 2c5d6e2

add evaluation

Browse files
README.md CHANGED
@@ -9,13 +9,38 @@ tags:
9
  - en
10
  - generated_from_trainer
11
  model-index:
12
- - name: ''
13
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ---
15
 
16
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
17
- should probably proofread and complete it, then remove this comment. -->
18
-
19
  #
20
 
21
  This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the librispeech_asr dataset.
@@ -23,6 +48,7 @@ It achieves the following results on the evaluation set:
23
  - Loss: 0.1444
24
  - Wer: 0.1167
25
 
 
26
  ## Model description
27
 
28
  More information needed
 
9
  - en
10
  - generated_from_trainer
11
  model-index:
12
+ - name: XLS-R-300M - English
13
+ results:
14
+ - task:
15
+ name: Automatic Speech Recognition
16
+ type: automatic-speech-recognition
17
+ dataset:
18
+ name: LibriSpeech ASR
19
+ type: librispeech_asr
20
+ args: clean
21
+ metrics:
22
+ - name: Test WER
23
+ type: wer
24
+ value: 12.29
25
+ - name: Test CER
26
+ type: cer
27
+ value: 3.34
28
+ - task:
29
+ name: Automatic Speech Recognition
30
+ type: automatic-speech-recognition
31
+ dataset:
32
+ name: Robust Speech Event - Dev Data
33
+ type: speech-recognition-community-v2/dev_data
34
+ args: en
35
+ metrics:
36
+ - name: Validation WER
37
+ type: wer
38
+ value: 36.75
39
+ - name: Validation CER
40
+ type: cer
41
+ value: 14.83
42
  ---
43
 
 
 
 
44
  #
45
 
46
  This model is a fine-tuned version of [facebook/wav2vec2-xls-r-300m](https://huggingface.co/facebook/wav2vec2-xls-r-300m) on the librispeech_asr dataset.
 
48
  - Loss: 0.1444
49
  - Wer: 0.1167
50
 
51
+
52
  ## Model description
53
 
54
  More information needed
eval.py CHANGED
@@ -44,7 +44,7 @@ def log_results(result: Dataset, args: Dict[str, str]):
44
  p.write(f"{i}" + "\n")
45
  p.write(batch["prediction"] + "\n")
46
  t.write(f"{i}" + "\n")
47
- t.write(batch["target"] + "\n")
48
 
49
  result.map(write_to_file, with_indices=True)
50
 
@@ -62,11 +62,6 @@ def normalize_text(text: str) -> str:
62
 
63
  for t in token_sequences_to_ignore:
64
  text = " ".join(text.split(t))
65
-
66
- kakasi = pykakasi.kakasi()
67
- tagger = fugashi.Tagger()
68
- text = "".join([item['hira'] for item in kakasi.convert(text)])
69
- text = " ".join([word.surface for word in tagger(text)])
70
 
71
  return text
72
 
@@ -97,7 +92,7 @@ def main(args):
97
  )
98
 
99
  batch["prediction"] = prediction["text"]
100
- batch["target"] = normalize_text(batch["text"])
101
  return batch
102
 
103
  # run inference on all examples
@@ -123,6 +118,9 @@ if __name__ == "__main__":
123
  parser.add_argument(
124
  "--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
125
  )
 
 
 
126
  parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
127
  parser.add_argument(
128
  "--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to 5 seconds."
 
44
  p.write(f"{i}" + "\n")
45
  p.write(batch["prediction"] + "\n")
46
  t.write(f"{i}" + "\n")
47
+ t.write(batch['target'] + "\n")
48
 
49
  result.map(write_to_file, with_indices=True)
50
 
 
62
 
63
  for t in token_sequences_to_ignore:
64
  text = " ".join(text.split(t))
 
 
 
 
 
65
 
66
  return text
67
 
 
92
  )
93
 
94
  batch["prediction"] = prediction["text"]
95
+ batch["target"] = normalize_text(batch[args.sentence_column])
96
  return batch
97
 
98
  # run inference on all examples
 
118
  parser.add_argument(
119
  "--config", type=str, required=True, help="Config of the dataset. *E.g.* `'en'` for Common Voice"
120
  )
121
+ parser.add_argument(
122
+ "--sentence_column", type=str, required=True, help="Name of column that holds text label"
123
+ )
124
  parser.add_argument("--split", type=str, required=True, help="Split of the dataset. *E.g.* `'test'`")
125
  parser.add_argument(
126
  "--chunk_length_s", type=float, default=None, help="Chunk length in seconds. Defaults to 5 seconds."
eval.sh CHANGED
@@ -1,17 +1,17 @@
 
 
 
 
 
 
 
 
1
  ./eval.py \
2
  --model_id . \
3
- --dataset "librispeech_asr" \
4
- --config clean \
5
- --split test \
 
6
  --chunk_length_s 5.0 \
7
  --stride_length_s 1.0 \
8
- --log_outputs
9
-
10
- # ./eval.py \
11
- # --model_id vitouphy/xls-r-300m-ja \
12
- # --dataset "speech-recognition-community-v2/dev_data" \
13
- # --config ja \
14
- # --split validation \
15
- # --chunk_length_s 5.0 \
16
- # --stride_length_s 1.0 \
17
- # --log_outputs
 
1
+ # ./eval.py \
2
+ # --model_id . \
3
+ # --dataset "librispeech_asr" \
4
+ # --config clean \
5
+ # --split test \
6
+ # --sentence_column "text" \
7
+ # --log_outputs
8
+
9
  ./eval.py \
10
  --model_id . \
11
+ --dataset "speech-recognition-community-v2/dev_data" \
12
+ --config en \
13
+ --split validation \
14
+ --sentence_column "sentence" \
15
  --chunk_length_s 5.0 \
16
  --stride_length_s 1.0 \
17
+ --log_outputs
 
 
 
 
 
 
 
 
 
librispeech_asr_clean_test_eval_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.12285073037127206
2
+ CER: 0.033364117500799206
log_librispeech_asr_clean_test_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
log_librispeech_asr_clean_test_targets.txt ADDED
The diff for this file is too large to render. See raw diff
 
log_speech-recognition-community-v2_dev_data_en_validation_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
log_speech-recognition-community-v2_dev_data_en_validation_targets.txt ADDED
The diff for this file is too large to render. See raw diff
 
speech-recognition-community-v2_dev_data_en_validation_eval_results.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ WER: 0.36347459029961926
2
+ CER: 0.14828747083722804