Upload 3 files
Browse files- app.py +10 -0
- mc_auroc.py +70 -0
- requirements.txt +2 -0
app.py
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: UTF-8 -*-
|
2 |
+
"""
|
3 |
+
Created on 27.02.24
|
4 |
+
:author: Martin Fajčík
|
5 |
+
"""
|
6 |
+
import evaluate
|
7 |
+
from evaluate.utils import launch_gradio_widget
|
8 |
+
|
9 |
+
module = evaluate.load("CZLC/mc_auroc")
|
10 |
+
launch_gradio_widget(module)
|
mc_auroc.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Sequence
|
2 |
+
|
3 |
+
import numpy
|
4 |
+
from sklearn.metrics import roc_curve, auc
|
5 |
+
|
6 |
+
import datasets
|
7 |
+
import evaluate
|
8 |
+
|
9 |
+
_DESCRIPTION = """
|
10 |
+
MC-AUROC (Multi-class Area Under the Receiver Operating Characteristic Curve) is a performance metric used in multiclass classification tasks.
|
11 |
+
It evaluates the ability of a model to distinguish between positive and negative classes across different threshold values.
|
12 |
+
The curve is generated by plotting the true positive rate (sensitivity) against the false positive rate (1-specificity) at various threshold settings.
|
13 |
+
AUROC provides a single scalar value indicating the overall discriminatory power of the model, with higher values suggesting better performance.
|
14 |
+
"""
|
15 |
+
|
16 |
+
_KWARGS_DESCRIPTION = """
|
17 |
+
AUROC metric for binary classification predictions. Here we use one-vs-all strategy to calculate the AUROC for multi-class classification problems.
|
18 |
+
The multi-class AUROC is calculated by treating each class as the positive class and the rest as the negative class.
|
19 |
+
The final score is the average of the AUROC scores for each class.
|
20 |
+
|
21 |
+
Args:
|
22 |
+
probabilities: list-like. Predicted probabilities or decision scores for the each class.
|
23 |
+
true_labels: list-like. True labels indicating the actual class memberships (must be ordinal, starting from 0).
|
24 |
+
Returns:
|
25 |
+
auroc_score: float. Multi-class Area Under the Receiver Operating Characteristic Curve (MC-AUROC) score.
|
26 |
+
"""
|
27 |
+
|
28 |
+
|
29 |
+
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
|
30 |
+
class AVG_MULTICLASS_AUROC(evaluate.Metric):
|
31 |
+
def _info(self):
|
32 |
+
return evaluate.MetricInfo(
|
33 |
+
description=_DESCRIPTION,
|
34 |
+
inputs_description=_KWARGS_DESCRIPTION,
|
35 |
+
citation="",
|
36 |
+
features=[
|
37 |
+
datasets.Features(
|
38 |
+
{
|
39 |
+
"predictions":datasets.Sequence(datasets.Value("float")),
|
40 |
+
"references": datasets.Value("int8")
|
41 |
+
}
|
42 |
+
),
|
43 |
+
],
|
44 |
+
reference_urls=[
|
45 |
+
"https://en.wikipedia.org/wiki/Receiver_operating_characteristic"
|
46 |
+
],
|
47 |
+
)
|
48 |
+
|
49 |
+
def _compute(self, predictions: Sequence[Sequence[float]], references: Sequence[int]):
|
50 |
+
"""
|
51 |
+
Computes the average AUROC score for multi-class classification problems.
|
52 |
+
"""
|
53 |
+
probabilities = predictions
|
54 |
+
|
55 |
+
n_classes = list(range(len(probabilities[0])))
|
56 |
+
fpr = dict()
|
57 |
+
tpr = dict()
|
58 |
+
roc_auc = dict()
|
59 |
+
for i in range(len(n_classes)):
|
60 |
+
fpr[i], tpr[i], _ = roc_curve(y_true=[1 if x == n_classes[i] else 0 for x in references],
|
61 |
+
y_score=[prob[i] for prob in probabilities])
|
62 |
+
roc_auc[i] = auc(fpr[i], tpr[i])
|
63 |
+
|
64 |
+
# Compute average AUC
|
65 |
+
average_auc = numpy.mean(list(roc_auc.values()))
|
66 |
+
|
67 |
+
return {
|
68 |
+
"mc_auroc_score": average_auc,
|
69 |
+
"mc_auroc_ci": "Not implemented yet."
|
70 |
+
}
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
evaluate~=0.4.1
|
2 |
+
datasets~=2.15.0
|