SMa2021 commited on
Commit
73f635f
1 Parent(s): c242078

adding wil metric

Browse files
Files changed (4) hide show
  1. README.md +25 -2
  2. app.py +6 -0
  3. requirements.txt +2 -0
  4. wil.py +92 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: Wil
3
  emoji: 🐠
4
  colorFrom: red
5
  colorTo: purple
@@ -7,6 +7,29 @@ sdk: gradio
7
  sdk_version: 3.27.0
8
  app_file: app.py
9
  pinned: false
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: WIL
3
  emoji: 🐠
4
  colorFrom: red
5
  colorTo: purple
 
7
  sdk_version: 3.27.0
8
  app_file: app.py
9
  pinned: false
10
+ tags:
11
+ - evaluate
12
+ - metric
13
  ---
14
 
15
+ Word Information Loss can be used to evaluate the performance of an automatic speech recognizer. It has information-theoretic backings, is symmetric between predictions and targets, and is bounded between 0 and 1.
16
+
17
+ The formula for WIL is
18
+
19
+ WIL = 1 - (C/P)(C/T)
20
+
21
+ where
22
+ C is the number of correct words,
23
+ P is the number of words in the prediction,
24
+ T is the number of words in the target.
25
+
26
+ This value measures the amount of information loss between two sentences. A score of 0 indicates that the prediction and target match perfectly.
27
+
28
+ Here is a comparison of WER and WIL: (assuming that X,Y,Z each represents a different word)
29
+ | Target | Prediction | WER | WIL |
30
+ | ------------- | ------------- | ------------- | ------------- |
31
+ | X | X | 1 | 1 |
32
+ | X | Y | 0 | 0 |
33
+ | X | XZZZ | 3 | 0.75 |
34
+ | XYYY | X | 0.75 | 0.75 |
35
+ | XYY | XZ | 0.67 | 0.83 |
app.py CHANGED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import evaluate
2
+ from evaluate.utils import launch_gradio_widget
3
+
4
+
5
+ module = evaluate.load("wil")
6
+ launch_gradio_widget(module)
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ git+https://github.com/huggingface/evaluate@0ca575d7aa0764ea646dcd5a27cb952e587ce9eb
2
+ jiwer
wil.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 The HuggingFace Evaluate Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """ Word Information Loss (WIL) metric. """
15
+
16
+ import datasets
17
+ from jiwer import process_words
18
+
19
+ import evaluate
20
+
21
+
22
+ _CITATION = """\
23
+ @inproceedings{inproceedings,
24
+ author = {Morris, Andrew and Maier, Viktoria and Green, Phil},
25
+ year = {2004},
26
+ month = {01},
27
+ pages = {},
28
+ title = {From WER and RIL to MER and WIL: improved evaluation measures for connected speech recognition.}
29
+ }y
30
+ """
31
+
32
+ _DESCRIPTION = """\
33
+ Word Information Loss can be used to evaluate the performance of an automatic speech recognizer. It has information-theoretic backings, is symmetric between predictions and targets, and is bounded between 0 and 1.
34
+ The formula for WIL is
35
+ WIL = 1 - (C/P)(C/T)
36
+ where
37
+ C is the number of correct words,
38
+ P is the number of words in the prediction,
39
+ T is the number of words in the target.
40
+
41
+ This value measures the amount of information loss between two sentences. A score of 0 indicates that the prediction and target match perfectly.
42
+ """
43
+
44
+ _KWARGS_DESCRIPTION = """
45
+ Compute the WIL between two sets of words.
46
+ Args:
47
+ targets: List of target words.
48
+ predictions: List of transcriptions to evaluate.
49
+ concatenate_texts (bool, default=False): Whether to concatenate the WIL of the concanated strings or the mean WIL for each pair.
50
+ Returns:
51
+ (float): the word information loss
52
+ Examples:
53
+ >>> predictions = ["this is a prediction", "there is an other sample"]
54
+ >>> targets = ["this is the target", "there is another one"]
55
+ >>> wil = evaluate.load("wil")
56
+ >>> wil_score = wil.compute(predictions=predictions, targets=targets)
57
+ >>> print(wil_score)
58
+ 0.775
59
+ >>> wil_score = wil.compute(predictions=targets, targets=predictions)
60
+ >>> print(wil_score)
61
+ 0.775
62
+ """
63
+
64
+
65
+ @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
66
+ class WIL(evaluate.Metric):
67
+ def _info(self):
68
+ return evaluate.MetricInfo(
69
+ description=_DESCRIPTION,
70
+ citation=_CITATION,
71
+ inputs_description=_KWARGS_DESCRIPTION,
72
+ features=datasets.Features(
73
+ {
74
+ "predictions": datasets.Value("string", id="sequence"),
75
+ "references": datasets.Value("string", id="sequence"),
76
+ }
77
+ ),
78
+ codebase_urls=["https://github.com/jitsi/jiwer/"],
79
+ reference_urls=[
80
+ "https://en.wikipedia.org/wiki/Word_error_rate",
81
+ ],
82
+ )
83
+
84
+ def _compute(self, predictions=None, references=None, concatenate_texts=False):
85
+ if concatenate_texts:
86
+ return process_words(references, predictions).wil
87
+ else:
88
+ total = 0
89
+ for prediction, reference in zip(predictions, references):
90
+ measures = process_words(reference, prediction).wil
91
+ total += measures
92
+ return total/len(predictions)