File size: 3,290 Bytes
d275ac7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f5f2008
d275ac7
 
 
 
c6db808
d275ac7
 
f5f2008
d275ac7
 
 
 
 
 
 
 
 
 
ace4a3e
d275ac7
 
 
 
 
 
 
 
 
 
 
 
 
 
649b3ae
 
 
 
d275ac7
 
 
 
 
 
 
 
 
 
 
649b3ae
 
 
 
 
 
d275ac7
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""HTER metric."""

import datasets


import evaluate


_DESCRIPTION = """
HTER (Half Total Error Rate) is a metric that combines the False Accept Rate (FAR) and False Reject Rate (FRR) to provide a comprehensive evaluation of a system's performance. It can be computed with:
  HTER = (FAR + FRR) / 2
  Where:
  FAR (False Accept Rate) = FP / (FP + TN)
  FRR (False Reject Rate) = FN / (FN + TP)
  TP: True positive
  TN: True negative
  FP: False positive
  FN: False negative
"""


_KWARGS_DESCRIPTION = """
Args:
    predictions (`list` of `int`): Predicted labels.
    references (`list` of `int`): Ground truth labels.
   
Returns:
    HTER (`float` or `int`): HTER score. Minimum possible value is 0. Maximum possible value is 1.0.

Examples:

    Example 1-A simple example
        >>> hter_metric = evaluate.load("murinj/hter")
        >>> results = hter_metric.compute(references=[0, 0], predictions=[0, 1])
        >>> print(results)
        {'HTER': 0.25}

"""


_CITATION = """
}
"""


@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class Hter(evaluate.Metric):
    def _info(self):
        return evaluate.MetricInfo(
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            features=datasets.Features(
                {
                    "predictions": datasets.Value("int32"),
                    "references": datasets.Value("int32"),
                }
            ),
        )

    def _compute(self, predictions, references):
        TP = 0  # 预测为欺骗(1),真实为欺骗(1)→ 正确拒绝攻击
        TN = 0  # 预测为真实(0),真实为真实(0)→ 正确接受真实
        FP = 0  # 预测为欺骗(1),真实为真实(0)→ 错误拒绝真实(导致FRR)
        FN = 0  # 预测为真实(0),真实为欺骗(1)→ 错误接受欺骗(导致FAR)

        for pred, ref in zip(predictions, references):
            if pred == 1 and ref == 1:
                TP += 1
            elif pred == 0 and ref == 0:
                TN += 1
            elif pred == 1 and ref == 0:
                FP += 1
            elif pred == 0 and ref == 1:
                FN += 1

        # FAR = 欺骗样本中被错误接受的比例 = FN / (TP + FN)
        FAR = FN / (TP + FN) if (TP + FN) > 0 else 0
        # FRR = 真实样本中被错误拒绝的比例 = FP / (TN + FP)
        FRR = FP / (TN + FP) if (TN + FP) > 0 else 0

        HTER = (FAR + FRR) / 2.0

        # 计算 HTER
        HTER = (FAR + FRR) / 2.0
        return {
            "HTER": HTER
        }