Spaces:
Runtime error
Runtime error
Update segmentation_scores.py
Browse files- segmentation_scores.py +7 -7
segmentation_scores.py
CHANGED
@@ -38,10 +38,10 @@ Calculates how good are predicted segmentations, using boundary, token and type
|
|
38 |
Args:
|
39 |
predictions: list of segmented utterances to score. Each predictions
|
40 |
should be a string with phonemes separated by spaces and estimated word boundaries
|
41 |
-
denoted by the token '
|
42 |
references: list of segmented utterances to score. Each predictions
|
43 |
should be a string with phonemes separated by spaces and gold word boundaries
|
44 |
-
denoted by the token '
|
45 |
Returns:
|
46 |
type_fscore: lexicon f1 score
|
47 |
type_precision: lexicon precision
|
@@ -57,7 +57,7 @@ Returns:
|
|
57 |
boundary_noedge_recall: boundary recall, excluding utterance boundaries
|
58 |
Examples:
|
59 |
>>> segmentation_scores = evaluate.load("transformersegmentation/segmentation_scores")
|
60 |
-
>>> results = segmentation_scores.compute(references=["w ɛ ɹ
|
61 |
>>> print(results)
|
62 |
{'type_fscore': 1.0, 'type_precision': 1.0, 'type_recall': 1.0, 'token_fscore': 1.0, 'token_precision': 1.0, 'token_recall': 1.0, 'boundary_all_fscore': 1.0, 'boundary_all_precision': 1.0, 'boundary_all_recall': 1.0, 'boundary_noedge_fscore': 1.0, 'boundary_noedge_precision': 1.0, 'boundary_noedge_recall': 1.0}
|
63 |
"""
|
@@ -227,11 +227,11 @@ class segmentation_scores(evaluate.Metric):
|
|
227 |
for utt in (utt for utt in text if utt.strip()):
|
228 |
# list of phones in the utterance with word seperator removed
|
229 |
phone_in_utterance = [
|
230 |
-
phone for phone in utt.split(" ") if phone != "
|
231 |
]
|
232 |
words_in_utterance = (
|
233 |
"".join(
|
234 |
-
" " if phone == "
|
235 |
)
|
236 |
.strip()
|
237 |
.split(" ")
|
@@ -252,9 +252,9 @@ class segmentation_scores(evaluate.Metric):
|
|
252 |
Parameters
|
253 |
----------
|
254 |
predictions : sequence of str
|
255 |
-
A suite of word utterances, each string using '
|
256 |
references : sequence of str
|
257 |
-
A suite of word utterances, each string using '
|
258 |
|
259 |
Returns
|
260 |
-------
|
|
|
38 |
Args:
|
39 |
predictions: list of segmented utterances to score. Each predictions
|
40 |
should be a string with phonemes separated by spaces and estimated word boundaries
|
41 |
+
denoted by the token 'WORD_BOUNDARY'.
|
42 |
references: list of segmented utterances to score. Each predictions
|
43 |
should be a string with phonemes separated by spaces and gold word boundaries
|
44 |
+
denoted by the token 'WORD_BOUNDARY'.
|
45 |
Returns:
|
46 |
type_fscore: lexicon f1 score
|
47 |
type_precision: lexicon precision
|
|
|
57 |
boundary_noedge_recall: boundary recall, excluding utterance boundaries
|
58 |
Examples:
|
59 |
>>> segmentation_scores = evaluate.load("transformersegmentation/segmentation_scores")
|
60 |
+
>>> results = segmentation_scores.compute(references=["w ɛ ɹ WORD_BOUNDARY ɪ z WORD_BOUNDARY ð ɪ s WORD_BOUNDARY", "l ɪ ɾ əl WORD_BOUNDARY aɪ z WORD_BOUNDARY"], predictions=["w ɛ ɹ WORD_BOUNDARY ɪ z WORD_BOUNDARY ð ɪ s WORD_BOUNDARY", "l ɪ ɾ əl WORD_BOUNDARY aɪ z WORD_BOUNDARY"])
|
61 |
>>> print(results)
|
62 |
{'type_fscore': 1.0, 'type_precision': 1.0, 'type_recall': 1.0, 'token_fscore': 1.0, 'token_precision': 1.0, 'token_recall': 1.0, 'boundary_all_fscore': 1.0, 'boundary_all_precision': 1.0, 'boundary_all_recall': 1.0, 'boundary_noedge_fscore': 1.0, 'boundary_noedge_precision': 1.0, 'boundary_noedge_recall': 1.0}
|
63 |
"""
|
|
|
227 |
for utt in (utt for utt in text if utt.strip()):
|
228 |
# list of phones in the utterance with word seperator removed
|
229 |
phone_in_utterance = [
|
230 |
+
phone for phone in utt.split(" ") if phone != "WORD_BOUNDARY"
|
231 |
]
|
232 |
words_in_utterance = (
|
233 |
"".join(
|
234 |
+
" " if phone == "WORD_BOUNDARY" else phone for phone in utt.split(" ")
|
235 |
)
|
236 |
.strip()
|
237 |
.split(" ")
|
|
|
252 |
Parameters
|
253 |
----------
|
254 |
predictions : sequence of str
|
255 |
+
A suite of word utterances, each string using 'WORD_BOUNDARY' as as word separator.
|
256 |
references : sequence of str
|
257 |
+
A suite of word utterances, each string using 'WORD_BOUNDARY' as as word separator.
|
258 |
|
259 |
Returns
|
260 |
-------
|