Spaces:
Sleeping
Sleeping
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
"""An implementation for calculating CharCut, a character-based machine translation evaluation metric.""" | |
from typing import Iterable, Union | |
import datasets | |
from charcut import calculate_charcut | |
from datasets import Sequence, Value | |
import evaluate | |
_CITATION = """\ | |
@inproceedings{lardilleux-lepage-2017-charcut, | |
title = "{CHARCUT}: Human-Targeted Character-Based {MT} Evaluation with Loose Differences", | |
author = "Lardilleux, Adrien and | |
Lepage, Yves", | |
booktitle = "Proceedings of the 14th International Conference on Spoken Language Translation", | |
month = dec # " 14-15", | |
year = "2017", | |
address = "Tokyo, Japan", | |
publisher = "International Workshop on Spoken Language Translation", | |
url = "https://aclanthology.org/2017.iwslt-1.20", | |
pages = "146--153" | |
} | |
""" | |
_DESCRIPTION = """\ | |
CharCut compares outputs of MT systems with reference translations. The matching algorithm is based on an iterative | |
search for longest common substrings, combined with a length-based threshold that limits short and noisy character | |
matches. As a similarity metric this is not new, but to the best of our knowledge it was never applied to highlighting | |
and scoring of MT outputs. It has the neat effect of keeping character-based differences readable by humans.""" | |
_KWARGS_DESCRIPTION = """ | |
Calculates how good predictions are given some references. | |
Args: | |
predictions: a list of predictions to score. Each prediction should be a string with | |
tokens separated by spaces. | |
references: a list of reference for each prediction. Each reference should be a string with | |
tokens separated by spaces. | |
Returns: | |
charcut_mt: the CharCut score | |
Examples: | |
>>> charcut_mt = evaluate.load("charcut_mt") | |
>>> preds = ["this week the saudis denied information published in the new york times", | |
... "this is in fact an estimate"] | |
>>> refs = ["saudi arabia denied this week information published in the american new york times", | |
... "this is actually an estimate"] | |
>>> charcut_mt.compute(references=refs, predictions=preds) | |
{'charcut_mt': 0.1971153846153846} | |
""" | |
class Charcut(evaluate.Metric): | |
"""Character-based MT evaluation.""" | |
def _info(self): | |
return evaluate.MetricInfo( | |
# This is the description that will appear on the modules page. | |
module_type="metric", | |
description=_DESCRIPTION, | |
citation=_CITATION, | |
inputs_description=_KWARGS_DESCRIPTION, | |
# This defines the format of each prediction and reference | |
features=[ | |
datasets.Features( | |
{"predictions": Value("string", id="prediction"), "references": Value("string", id="reference")} | |
), | |
], | |
# Homepage of the module for documentation | |
homepage="https://github.com/BramVanroy/CharCut", | |
# Additional links to the codebase or references | |
codebase_urls=["https://github.com/BramVanroy/CharCut", "https://github.com/alardill/CharCut"], | |
) | |
def _compute(self, predictions: Iterable[str], references: Iterable[str]): | |
return {"charcut_mt": calculate_charcut(predictions, references)[0]} | |