Spaces:

venkatasg
/

gleu

Runtime error

App Files Files Community

venkatasg commited on Jul 5, 2024

Commit

35d2d2e

1 Parent(s): 24fa801

Added examples and fixed errors

Browse files

Files changed (2) hide show

README.md +1 -1
gleu.py +35 -24

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ tags:
 - metric
 description: "Generalized Language Evaluation Understanding (GLEU) is a metric initially developed for Grammatical Error Correction (GEC), that builds upon BLEU by rewarding corrections while also correctly crediting unchanged source text."
 sdk: gradio
-sdk_version: 3.19.1
 app_file: app.py
 pinned: false
 ---

 - metric
 description: "Generalized Language Evaluation Understanding (GLEU) is a metric initially developed for Grammatical Error Correction (GEC), that builds upon BLEU by rewarding corrections while also correctly crediting unchanged source text."
 sdk: gradio
+sdk_version: 4.37.2
 app_file: app.py
 pinned: false
 ---

gleu.py CHANGED Viewed

@@ -18,6 +18,7 @@ import datasets
 from collections import Counter
 from math import log, exp
 from random import seed, randint
 # TODO: Add BibTeX citation
@@ -45,20 +46,28 @@ _DESCRIPTION = """\
 _KWARGS_DESCRIPTION = """
 Calculates how good are predictions given some references, using certain scores
 Args:
-    sources: Source language reference sentences. This is assumed to be same as references if not provided
-    references: list of reference for each prediction. Each reference should be a string with tokens separated by spaces.
-    predictions: list of predictions to score. Each predictions should be a string with tokens separated by spaces.
 Returns:
-    gleu_score: description of the first score,
 Examples:
-    Examples should be written in doctest format, and should illustrate how
-    to use the function.
     >>> my_new_module = evaluate.load("my_new_module")
-    >>> results = my_new_module.compute(references=[0, 1], predictions=[0, 1])
     >>> print(results)
-    {'accuracy': 1.0}
 """
 # TODO: Define external resources urls if needed
@@ -171,15 +180,13 @@ class gleu(evaluate.Metric):
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
-            features=datasets.Features({
-                'predictions': datasets.Value('int64'),
-                'references': datasets.Value('int64'),
-            }),
-            # Homepage of the module for documentation
-            homepage="http://module.homepage",
-            # Additional links to the codebase or references
-            codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
-            reference_urls=["http://path.to.reference.url/new_module"]
         )
     def _download_and_prepare(self, dl_manager):
@@ -190,7 +197,7 @@ class gleu(evaluate.Metric):
     def __init__(self, order=4) :
         self.order = order
-    def _compute(self, sources=None, references, predictions):
         """Returns the scores"""
         num_iterations = 500
@@ -200,11 +207,11 @@ class gleu(evaluate.Metric):
         gleu_calculator = GLEU(self.order)
-        if sources:
-            gleu_calculator.load_sources(sources)
-        else:
-            gleu_calculator.load_sources(references)
         gleu_calculator.load_references(references)
         # first generate a random list of indices, using a different seed
@@ -231,4 +238,8 @@ class gleu(evaluate.Metric):
                     stats_by_ref[ref] = this_stats
                 iter_stats[j] = [sum(scores) or scores in zip(iter_stats[j], this_stats)]

 from collections import Counter
 from math import log, exp
 from random import seed, randint
+import ipdb
 # TODO: Add BibTeX citation
 _KWARGS_DESCRIPTION = """
 Calculates how good are predictions given some references, using certain scores
 Args:
+    sources: Source language reference sentences. This is assumed to be same as references if not provided.
+    references: Reference for each prediction. Each reference should be a string with tokens separated by spaces.
+    predictions: list of predictions to score. Each prediction should be a string with tokens separated by spaces.
 Returns:
+    gleu_score: Average gleu_score over all predictions.
 Examples:
     >>> my_new_module = evaluate.load("my_new_module")
+    >>> references=["We may in actual fact be communicating with a hoax Facebook acccount of a cyberfriend , which we assume to be real but in reality , it is a fake account ."]
+    >>> results = my_new_module.compute(references=references, predictions=["We may of actual fact communicating with a hoax Facebook acccount of a cyber friend , which we assumed to be real but in reality , it is a fake account ."])
     >>> print(results)
+    {'gleu_score': 0.6}
+    >>> results = my_new_module.compute(references=references, predictions=["We may be in actual fact communicating with a hoax Facebook acccount of a cyber friend , we assume to be real but in reality , it is a fake account ."])
+    >>> print(results)
+    {'gleu_score': 0.62}
+    >>> results = my_new_module.compute(references=references, predictions=["We may in actual fact communicating with a hoax Facebook account of a cyber friend , which we assume to be real but in reality , it is a fake accounts ."])
+    >>> print(results)
+    {'gleu_score': 0.64}
 """
 # TODO: Define external resources urls if needed
             citation=_CITATION,
             inputs_description=_KWARGS_DESCRIPTION,
             # This defines the format of each prediction and reference
+            features=datasets.Features(
+                {
+                    "predictions": datasets.Value("string", id="sequence"),
+                    "references": datasets.Value("string", id="sequence"),
+                }
+            ),
+            codebase_urls=["https://github.com/cnap/gec-ranking/"],
         )
     def _download_and_prepare(self, dl_manager):
     def __init__(self, order=4) :
         self.order = order
+    def _compute(self, references, predictions):
         """Returns the scores"""
         num_iterations = 500
         gleu_calculator = GLEU(self.order)
+        # if sources:
+        #     gleu_calculator.load_sources(sources)
+        # else:
+        #
+        gleu_calculator.load_sources(references)
         gleu_calculator.load_references(references)
         # first generate a random list of indices, using a different seed
                     stats_by_ref[ref] = this_stats
                 iter_stats[j] = [sum(scores) or scores in zip(iter_stats[j], this_stats)]
+        final_gleu_score = get_gleu_stats([gleu_calculator.compute_gleu(stats)
+          for stats in iter_stats ])[0]
+        ipdb.set_trace()
+        return return {"gleu_score": final_gleu_score}