unnati commited on
Commit
1ef2f1f
1 Parent(s): 060bdb5

Add kendall's Tau distance

Browse files

- Add kendall tau distance as module
- Add the compute function
- Add the description

Reference: [Metric Request] Fuzzy reordering and Kendall tau distance #389

Files changed (1) hide show
  1. kendall_tau_distance.py +21 -12
kendall_tau_distance.py CHANGED
@@ -41,16 +41,16 @@ Args:
41
  references: list of reference for each prediction. Each
42
  reference should be a string with tokens separated by spaces.
43
  Returns:
44
- accuracy: description of the first score,
45
- another_score: description of the second score,
46
  Examples:
47
  Examples should be written in doctest format, and should illustrate how
48
  to use the function.
49
 
50
- >>> my_new_module = evaluate.load("my_new_module")
51
- >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
52
  >>> print(results)
53
- {'accuracy': 1.0}
54
  """
55
 
56
  # TODO: Define external resources urls if needed
@@ -81,15 +81,24 @@ class kendalltaudistance(evaluate.Metric):
81
  reference_urls=["http://path.to.reference.url/new_module"]
82
  )
83
 
84
- def _download_and_prepare(self, dl_manager):
85
- """Optional: download external resources useful to compute the scores"""
86
- # TODO: Download external resources if needed
87
- pass
88
-
89
  def _compute(self, predictions, references):
90
  """Returns the scores"""
91
  # TODO: Compute the different scores of the module
92
- accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  return {
94
- "accuracy": accuracy,
 
95
  }
 
41
  references: list of reference for each prediction. Each
42
  reference should be a string with tokens separated by spaces.
43
  Returns:
44
+ kendall_tau_distance: Kendell's tau distance between predictions and references
45
+ normalized_kendall_tau_distance: Kendell's tau distance between predictions and references normalized by the number of pairs
46
  Examples:
47
  Examples should be written in doctest format, and should illustrate how
48
  to use the function.
49
 
50
+ >>> kendall_tau_distance = evaluate.load("kendall_tau_distance")
51
+ >>> results = kendall_tau_distance.compute(references=[0, 1], predictions=[0, 1])
52
  >>> print(results)
53
+ {'kendall_tau_distance': 0, 'normalized_kendall_tau_distance': 0}
54
  """
55
 
56
  # TODO: Define external resources urls if needed
 
81
  reference_urls=["http://path.to.reference.url/new_module"]
82
  )
83
 
 
 
 
 
 
84
  def _compute(self, predictions, references):
85
  """Returns the scores"""
86
  # TODO: Compute the different scores of the module
87
+
88
+ n = len(predictions)
89
+ assert n == len(references), "The number of predictions and references should be the same"
90
+
91
+ n_discordant_pairs = 0
92
+
93
+ for i in range(len(predictions)):
94
+ j = references.index(predictions[i])
95
+ n_discordant_pairs += len(set(predictions[:i]).intersection(set(references[j:]))) + len(set(predictions[i+1:]).intersection(set(references[:j])))
96
+
97
+ n_discordant_pairs = n_discordant_pairs / 2
98
+
99
+ num_pairs = n * (n - 1) / 2
100
+
101
  return {
102
+ 'kendall_tau_distance': n_discordant_pairs,
103
+ 'normalized_kendall_tau_distance': n_discordant_pairs / num_pairs,
104
  }