phone_errors / phone_distance.py
ginic's picture
Initial attempt at implementing phone distances
a91c31a
raw
history blame
5.68 kB
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Edit distances between Unicode International Phonetic Alphabet strings.
This is basically a Hugging Face wrapper around the panphone library's distance module.
"""
import evaluate
import datasets
import numpy as np
import panphone.distance
_CITATION = """\
@inproceedings{Mortensen-et-al:2016,
author = {David R. Mortensen and
Patrick Littell and
Akash Bharadwaj and
Kartik Goyal and
Chris Dyer and
Lori S. Levin},
title = {PanPhon: {A} Resource for Mapping {IPA} Segments to Articulatory Feature Vectors},
booktitle = {Proceedings of {COLING} 2016, the 26th International Conference on Computational Linguistics: Technical Papers},
pages = {3475--3484},
publisher = {{ACL}},
year = {2016}
}
"""
_DESCRIPTION = """\
TODO
"""
# TODO: Add description of the arguments of the module here
_KWARGS_DESCRIPTION = """
TODO
Calculates how good are predictions given some references, using certain scores
Args:
predictions: list of predictions to score. Each predictions
should be a string with tokens separated by spaces.
references: list of reference for each prediction. Each
reference should be a string with tokens separated by spaces.
Returns:
accuracy: description of the first score,
another_score: description of the second score,
Examples:
Examples should be written in doctest format, and should illustrate how
to use the function.
>>> my_new_module = evaluate.load("ginic/phone_distance")
"""
# TODO: Define external resources urls if needed
# BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class PhoneDistance(evaluate.Metric):
"""Class for computing distance between Unicode IPA strings """
def _info(self):
# TODO: Specifies the evaluate.EvaluationModuleInfo object
return evaluate.MetricInfo(
# This is the description that will appear on the modules page.
module_type="metric",
description=_DESCRIPTION,
citation=_CITATION,
inputs_description=_KWARGS_DESCRIPTION,
# This defines the format of each prediction and reference
features=datasets.Features({
'predictions': datasets.Value('string', id="sequence"),
'references': datasets.Value('string', id="sequence"),
}),
# Additional links to the codebase or references
codebase_urls=["https://github.com/dmort27/panphon", "https://huggingface.co/spaces/ginic/phone_distance/tree/main"],
reference_urls=["https://pypi.org/project/panphon/", "https://arxiv.org/abs/2308.03917"]
)
def _compute(self, predictions:list[str]|None=None, references:list[str]|None=None, feature_set:str="spe+", feature_model:str="segment", is_normalize_max_length:bool=False):
"""Computes phoneme error rates, phone feature error rate (Hamming feature edit distance) and feature error rates between prediction and reference strings
Args:
predictions (list[str], optional): Predicted transcriptions. Defaults to None.
references (list[str], optional): Reference transcriptions. Defaults to None.
feature_set (str, optional): Feature set to use in the feature model, see panphone documentation for details. Defaults to "spe+".
feature_model (str, optional): panphon.distance.Distance feature parsing model to be used, choose from "strict", "permissive", "segment". Defaults to "segment".
is_normalize_max_length (bool, optional): Set to true to normalize phone feature error rates by maximum length (measure won't be a true metric). Defaults to False.
Returns:
_type_: _description_
"""
distance_computer = panphone.distance.Distance(feature_set=feature_set, feature_model=feature_model)
phoneme_error_rates = []
feature_error_rates = []
hamming_distances = []
for p, r in zip(predictions, references):
if is_normalize_max_length:
hd = distance_computer.hamming_feature_edit_distance_div_maxlen(p, r)
else:
hd = distance_computer.hamming_feature_edit_distance(p, r)
hamming_distances.append(hd)
per = distance_computer.phone_error_rate(p, r)
phoneme_error_rates.append(per)
fer = distance_computer.feature_error_rate(p, r)
feature_error_rates.append(fer)
return {
"phoneme_error_rates": phoneme_error_rates,
"mean_phoneme_error_rate": np.mean(phoneme_error_rates),
"phone_feature_error_rates": hamming_distances,
"mean_phone_feature_error_rates": np.mean(hamming_distances),
"feature_error_rates": feature_error_rates,
"mean_feature_error_rates": np.mean(feature_error_rates)
}