Spaces:

unnati
/

kendall_tau_distance

Sleeping

App Files Files Community

unnati commited on Mar 20, 2023

Commit

1ef2f1f

•

1 Parent(s): 060bdb5

Add kendall's Tau distance

Browse files

- Add kendall tau distance as module
- Add the compute function
- Add the description

Reference: [Metric Request] Fuzzy reordering and Kendall tau distance #389

Files changed (1) hide show

kendall_tau_distance.py +21 -12

kendall_tau_distance.py CHANGED Viewed

@@ -41,16 +41,16 @@ Args:
     references: list of reference for each prediction. Each
         reference should be a string with tokens separated by spaces.
 Returns:
-    accuracy: description of the first score,
-    another_score: description of the second score,
 Examples:
     Examples should be written in doctest format, and should illustrate how
     to use the function.
-    >>> my_new_module = evaluate.load("my_new_module")
-    >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
     >>> print(results)
-    {'accuracy': 1.0}
 """
 # TODO: Define external resources urls if needed
@@ -81,15 +81,24 @@ class kendalltaudistance(evaluate.Metric):
             reference_urls=["http://path.to.reference.url/new_module"]
         )
-    def _download_and_prepare(self, dl_manager):
-        """Optional: download external resources useful to compute the scores"""
-        # TODO: Download external resources if needed
-        pass
     def _compute(self, predictions, references):
         """Returns the scores"""
         # TODO: Compute the different scores of the module
-        accuracy = sum(i == j for i, j in zip(predictions, references)) / len(predictions)
         return {
-            "accuracy": accuracy,
         }

     references: list of reference for each prediction. Each
         reference should be a string with tokens separated by spaces.
 Returns:
+    kendall_tau_distance: Kendell's tau distance between predictions and references
+    normalized_kendall_tau_distance: Kendell's tau distance between predictions and references normalized by the number of pairs
 Examples:
     Examples should be written in doctest format, and should illustrate how
     to use the function.
+    >>> kendall_tau_distance = evaluate.load("kendall_tau_distance")
+    >>> results = kendall_tau_distance.compute(references=[0, 1], predictions=[0, 1])
     >>> print(results)
+    {'kendall_tau_distance': 0, 'normalized_kendall_tau_distance': 0}
 """
 # TODO: Define external resources urls if needed
             reference_urls=["http://path.to.reference.url/new_module"]
         )
     def _compute(self, predictions, references):
         """Returns the scores"""
         # TODO: Compute the different scores of the module
+        n = len(predictions)
+        assert n == len(references), "The number of predictions and references should be the same"
+        n_discordant_pairs = 0
+        for i in range(len(predictions)):
+            j = references.index(predictions[i])
+            n_discordant_pairs += len(set(predictions[:i]).intersection(set(references[j:]))) + len(set(predictions[i+1:]).intersection(set(references[:j])))
+        n_discordant_pairs = n_discordant_pairs / 2
+        num_pairs = n * (n - 1) / 2
         return {
+            'kendall_tau_distance': n_discordant_pairs,
+            'normalized_kendall_tau_distance': n_discordant_pairs / num_pairs,
         }