File size: 5,482 Bytes
5caedb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
from typing import Any, Dict, List, Tuple, Union

import numpy as np
import pandas as pd
from numpy.typing import NDArray
from sklearn.metrics import log_loss, roc_auc_score

from llm_studio.python_configs.base import DefaultConfigProblemBase


def accuracy_score(
    cfg: DefaultConfigProblemBase,
    results: Dict,
    val_df: pd.DataFrame,
    raw_results: bool = False,
) -> Union[NDArray, Tuple[NDArray, List[str]]]:
    """Calculate accuracy score.

    Only considers the predicted value (results["predictions"]) and target value
    (results["target_text"]).
    It supports both binary and multiclass classification.

    Args:
        cfg: DefaultConfigProblemBase, ignored
        results: Dict, model results including 'predictions' and 'target_text'
        val_df: pd.DataFrame, validation dataframe
        raw_results: bool, ignored

    Returns:
        Numpy array of 0.0 or 1.0 for each sample

    Raises:
        ValueError: If input data is invalid or inconsistent
    """
    predictions = np.array(results["predictions"])
    target = np.array(
        [[int(t) for t in text.split(",")] for text in results["target_text"]]
    )

    # Input validation
    if len(target) != len(predictions):
        raise ValueError(
            f"Length of target ({len(target)}) and predicted ({len(predictions)}) "
            "should be the same."
        )
    if len(target) == 0:
        raise ValueError("No data to calculate accuracy score")

    return (predictions == target).mean(axis=1).reshape(-1).astype("float")


def auc_score(
    cfg: DefaultConfigProblemBase,
    results: Dict,
    val_df: pd.DataFrame,
    raw_results: bool = False,
) -> Union[NDArray, Tuple[NDArray, List[str]]]:
    """Calculate Area Under the ROC Curve (AUC) score.

    This function computes the AUC score using the predicted logits and target values.
    It supports both binary and multiclass classification.

    Args:
        cfg: DefaultConfigProblemBase, configuration
        results: Dict, model results including 'logits' and 'target_text'
        val_df: pd.DataFrame, ignored
        raw_results: bool, ignored

    Returns:
        float: AUC score for binary classification
        NDArray: AUC scores for multiclass classification (one-vs-rest)

    Raises:
        ValueError: If input data is invalid or inconsistent
    """
    logits = np.array(results["logits"])
    target = np.array(
        [[int(t) for t in text.split(",")] for text in results["target_text"]]
    )

    # Input validation
    if len(target) != len(logits):
        raise ValueError(
            f"Length of target ({len(target)}) and logits ({len(logits)}) "
            "should be the same."
        )
    if len(target) == 0:
        raise ValueError("No data to calculate AUC score.")

    if target.shape[1] == 1 and cfg.dataset.num_classes > 1:
        target = np.eye(cfg.dataset.num_classes)[target.reshape(-1)]
    return roc_auc_score(target, logits, multi_class="ovr")


def logloss_score(
    cfg: DefaultConfigProblemBase,
    results: Dict,
    val_df: pd.DataFrame,
    raw_results: bool = False,
) -> Union[NDArray, Tuple[NDArray, List[str]]]:
    """Calculate the Log Loss (Cross-Entropy Loss) score.

    This function computes the log loss using the predicted probabilities and target.
    It supports binary, multiclass, and multilabel classification.

    Args:
        cfg: DefaultConfigProblemBase, configuration
        results: Dict, model results including 'probabilities' and 'target_text'
        val_df: pd.DataFrame, ignored
        raw_results: bool, ignored

    Returns:
        float: Log Loss score

    Raises:
        ValueError: If input data is invalid or inconsistent
    """
    predictions = np.array(results["probabilities"])
    target = np.array(
        [[int(t) for t in text.split(",")] for text in results["target_text"]]
    )

    # Input validation
    if len(target) != len(predictions):
        raise ValueError(
            f"Length of target ({len(target)}) and predictions ({len(predictions)}) "
            "should be the same."
        )
    if len(target) == 0:
        raise ValueError("No data to calculate log loss.")

    # Handle multilabel case
    if len(cfg.dataset.answer_column) > 1:
        log_losses = []
        for col in range(len(cfg.dataset.answer_column)):
            log_losses.append(log_loss(target[:, col], predictions[:, col]))
        return np.mean(log_losses)

    # Handle binary and multiclass cases
    if cfg.dataset.num_classes > 1:
        target = np.eye(cfg.dataset.num_classes)[target.reshape(-1)]
    return log_loss(target, predictions)


class Metrics:
    """
    Metrics factory. Returns:
        - metric value
        - should it be maximized or minimized
        - Reduce function

    Maximized or minimized is needed for early stopping (saving best checkpoint)
    Reduce function to generate a single metric value, usually "mean" or "none"
    """

    _metrics = {
        "AUC": (auc_score, "max", "mean"),
        "Accuracy": (accuracy_score, "max", "mean"),
        "LogLoss": (logloss_score, "min", "mean"),
    }

    @classmethod
    def names(cls) -> List[str]:
        return sorted(cls._metrics.keys())

    @classmethod
    def get(cls, name: str) -> Any:
        """Access to Metrics.

        Args:
            name: metrics name
        Returns:
            A class to build the Metrics
        """
        return cls._metrics.get(name, cls._metrics["LogLoss"])