Spaces:

murinj
/

hter

Runtime error

File size: 3,290 Bytes

# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""HTER metric."""

import datasets


import evaluate


_DESCRIPTION = """
HTER (Half Total Error Rate) is a metric that combines the False Accept Rate (FAR) and False Reject Rate (FRR) to provide a comprehensive evaluation of a system's performance. It can be computed with:
  HTER = (FAR + FRR) / 2
  Where:
  FAR (False Accept Rate) = FP / (FP + TN)
  FRR (False Reject Rate) = FN / (FN + TP)
  TP: True positive
  TN: True negative
  FP: False positive
  FN: False negative
"""


_KWARGS_DESCRIPTION = """
Args:
    predictions (`list` of `int`): Predicted labels.
    references (`list` of `int`): Ground truth labels.
   
Returns:
    HTER (`float` or `int`): HTER score. Minimum possible value is 0. Maximum possible value is 1.0.

Examples:

    Example 1-A simple example
        >>> hter_metric = evaluate.load("murinj/hter")
        >>> results = hter_metric.compute(references=[0, 0], predictions=[0, 1])
        >>> print(results)
        {'HTER': 0.25}

"""


_CITATION = """
}
"""


@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class Hter(evaluate.Metric):
    def _info(self):
        return evaluate.MetricInfo(
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            features=datasets.Features(
                {
                    "predictions": datasets.Value("int32"),
                    "references": datasets.Value("int32"),
                }
            ),
        )

    def _compute(self, predictions, references):
        TP = 0  # 预测为欺骗（1），真实为欺骗（1）→ 正确拒绝攻击
        TN = 0  # 预测为真实（0），真实为真实（0）→ 正确接受真实
        FP = 0  # 预测为欺骗（1），真实为真实（0）→ 错误拒绝真实（导致FRR）
        FN = 0  # 预测为真实（0），真实为欺骗（1）→ 错误接受欺骗（导致FAR）

        for pred, ref in zip(predictions, references):
            if pred == 1 and ref == 1:
                TP += 1
            elif pred == 0 and ref == 0:
                TN += 1
            elif pred == 1 and ref == 0:
                FP += 1
            elif pred == 0 and ref == 1:
                FN += 1

        # FAR = 欺骗样本中被错误接受的比例 = FN / (TP + FN)
        FAR = FN / (TP + FN) if (TP + FN) > 0 else 0
        # FRR = 真实样本中被错误拒绝的比例 = FP / (TN + FP)
        FRR = FP / (TN + FP) if (TN + FP) > 0 else 0

        HTER = (FAR + FRR) / 2.0

        # 计算 HTER
        HTER = (FAR + FRR) / 2.0
        return {
            "HTER": HTER
        }