helena-balabin commited on
Commit
7679afc
1 Parent(s): 7240cf3

Create youden index metric

Browse files
Files changed (1) hide show
  1. youden_index.py +89 -61
youden_index.py CHANGED
@@ -11,85 +11,113 @@
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
- """TODO: Add a description here."""
15
-
16
- import evaluate
17
  import datasets
 
 
 
 
 
 
 
18
 
19
-
20
- # TODO: Add BibTeX citation
21
- _CITATION = """\
22
- @InProceedings{huggingface:module,
23
- title = {A great new module},
24
- authors={huggingface, Inc.},
25
- year={2020}
26
- }
27
- """
28
-
29
- # TODO: Add description of the module here
30
- _DESCRIPTION = """\
31
- This new module is designed to solve this great ML task and is crafted with a lot of care.
32
  """
33
 
34
-
35
- # TODO: Add description of the arguments of the module here
36
  _KWARGS_DESCRIPTION = """
37
- Calculates how good are predictions given some references, using certain scores
38
  Args:
39
- predictions: list of predictions to score. Each predictions
40
- should be a string with tokens separated by spaces.
41
- references: list of reference for each prediction. Each
42
- reference should be a string with tokens separated by spaces.
43
  Returns:
44
- accuracy: description of the first score,
45
- another_score: description of the second score,
46
- Examples:
47
- Examples should be written in doctest format, and should illustrate how
48
- to use the function.
49
-
50
- >>> my_new_module = evaluate.load("my_new_module")
51
- >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
52
- >>> print(results)
53
- {'accuracy': 1.0}
54
  """
55
 
56
- # TODO: Define external resources urls if needed
57
- BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
 
60
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
61
- class youden_index(evaluate.Metric):
62
- """TODO: Short description of my evaluation module."""
63
-
64
  def _info(self):
65
- # TODO: Specifies the evaluate.EvaluationModuleInfo object
66
  return evaluate.MetricInfo(
67
- # This is the description that will appear on the modules page.
68
- module_type="metric",
69
  description=_DESCRIPTION,
70
  citation=_CITATION,
71
  inputs_description=_KWARGS_DESCRIPTION,
72
- # This defines the format of each prediction and reference
73
- features=datasets.Features({
74
- 'predictions': datasets.Value('int64'),
75
- 'references': datasets.Value('int64'),
76
- }),
77
- # Homepage of the module for documentation
78
- homepage="http://module.homepage",
79
- # Additional links to the codebase or references
80
- codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
81
- reference_urls=["http://path.to.reference.url/new_module"]
82
  )
83
 
84
- def _download_and_prepare(self, dl_manager):
85
- """Optional: download external resources useful to compute the scores"""
86
- # TODO: Download external resources if needed
87
- pass
 
 
 
 
 
 
88
 
89
- def _compute(self, predictions, references):
90
- """Returns the scores"""
91
- # TODO: Compute the different scores of the module
92
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
 
 
 
 
 
 
 
 
 
 
93
  return {
94
- "accuracy": accuracy,
95
- }
 
 
 
 
 
 
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
  # See the License for the specific language governing permissions and
13
  # limitations under the License.
14
+ """Youden index metric."""
 
 
15
  import datasets
16
+ import evaluate
17
+ import numpy as np
18
+ from sklearn.metrics import (
19
+ precision_score,
20
+ roc_auc_score,
21
+ roc_curve,
22
+ )
23
 
24
+ _DESCRIPTION = """
25
+ This metric computes the Youden index based on the area under the curve (AUC) for the Receiver Operating Characteristic Curve (ROC).
26
+ The return values represent the ideal point in the ROC curve, where max(TPR - FPR) holds true (across all points in the curve).
27
+ This metric only works with binary labels: The case in which there are only two different label classes, and each example gets only one label.
 
 
 
 
 
 
 
 
 
28
  """
29
 
 
 
30
  _KWARGS_DESCRIPTION = """
 
31
  Args:
32
+ - references (array-like of shape (n_samples,)): Ground truth labels.
33
+ - binary: expects an array-like of shape (n_samples,)
34
+ - prediction_scores (array-like of shape (n_samples,)): Model predictions, probailities of the positive class.
35
+ - binary: expects an array-like of shape (n_samples,)
36
  Returns:
37
+ Dict[str, float]: Returns threshold (for classification), sensitivity and specificity at the optimal Youden index.
 
 
 
 
 
 
 
 
 
38
  """
39
 
40
+ _CITATION = """\
41
+ @article{youden1950index,
42
+ title={Index for rating diagnostic tests},
43
+ author={Youden, William J},
44
+ journal={Cancer},
45
+ volume={3},
46
+ number={1},
47
+ pages={32--35},
48
+ year={1950},
49
+ publisher={Wiley Online Library}
50
+ }
51
+ @article{fluss2005estimation,
52
+ title={Estimation of the Youden Index and its associated cutoff point},
53
+ author={Fluss, Ronen and Faraggi, David and Reiser, Benjamin},
54
+ journal={Biometrical Journal: Journal of Mathematical Methods in Biosciences},
55
+ volume={47},
56
+ number={4},
57
+ pages={458--472},
58
+ year={2005},
59
+ publisher={Wiley Online Library}
60
+ }
61
+ @article{scikit-learn,
62
+ title={Scikit-learn: Machine Learning in {P}ython},
63
+ author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
64
+ journal={Journal of Machine Learning Research},
65
+ volume={12},
66
+ pages={2825--2830},
67
+ year={2011}
68
+ }
69
+ """
70
 
71
 
72
  @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
73
+ class YoudenIndex(evaluate.Metric):
 
 
74
  def _info(self):
 
75
  return evaluate.MetricInfo(
 
 
76
  description=_DESCRIPTION,
77
  citation=_CITATION,
78
  inputs_description=_KWARGS_DESCRIPTION,
79
+ features=datasets.Features(
80
+ {
81
+ "prediction_scores": datasets.Sequence(datasets.Value("float")),
82
+ "references": datasets.Value("int32"),
83
+ }
84
+ ),
85
+ reference_urls=[
86
+ "https://en.wikipedia.org/wiki/Youden%27s_J_statistic",
87
+ "https://gist.github.com/twolodzko/4fae2980a1f15f8682d243808e5859bb",
88
+ ],
89
  )
90
 
91
+ def _compute(
92
+ self,
93
+ references,
94
+ prediction_scores,
95
+ ):
96
+ """Compute the Youden index to determine the optimal threshold for classification.
97
+
98
+ Args:
99
+ references (array-like of shape (n_samples,)): Binary ground truth labels.
100
+ prediction_scores (array-like of shape (n_samples,)): Model predictions, probailities of the positive class.
101
 
102
+ Returns:
103
+ Dict[str, float]: Returns threshold (for classification), sensitivity, specificity, NPV and PPV
104
+ at the optimal Youden index and the ROC AUC score.
105
+ """
106
+ # 1. Compute ROC AUC
107
+ roc_auc = roc_auc_score(references, prediction_scores)
108
+ # 2. Determine the optimal threshold based on the Youden index
109
+ fpr, tpr, thresholds = roc_curve(references, prediction_scores)
110
+ idx = np.argmax(tpr - fpr)
111
+ optimal_threshold = thresholds[idx]
112
+ # 3. Calculate PPV (precision) and NPV based on the optimal threshold
113
+ optimal_predictions = np.where(prediction_scores > optimal_threshold, 1, 0)
114
+ ppv = precision_score(references, optimal_predictions)
115
+ npv = precision_score(references, optimal_predictions, pos_label=0)
116
  return {
117
+ "youden_threshold": optimal_threshold,
118
+ "sensitivity": tpr[idx],
119
+ "specificity": 1 - fpr[idx],
120
+ "roc_auc": roc_auc,
121
+ "ppv": ppv,
122
+ "npv": npv,
123
+ }