Added examples and fixed errors
Browse files
README.md
CHANGED
@@ -5,7 +5,7 @@ tags:
|
|
5 |
- metric
|
6 |
description: "Generalized Language Evaluation Understanding (GLEU) is a metric initially developed for Grammatical Error Correction (GEC), that builds upon BLEU by rewarding corrections while also correctly crediting unchanged source text."
|
7 |
sdk: gradio
|
8 |
-
sdk_version:
|
9 |
app_file: app.py
|
10 |
pinned: false
|
11 |
---
|
|
|
5 |
- metric
|
6 |
description: "Generalized Language Evaluation Understanding (GLEU) is a metric initially developed for Grammatical Error Correction (GEC), that builds upon BLEU by rewarding corrections while also correctly crediting unchanged source text."
|
7 |
sdk: gradio
|
8 |
+
sdk_version: 4.37.2
|
9 |
app_file: app.py
|
10 |
pinned: false
|
11 |
---
|
gleu.py
CHANGED
@@ -18,6 +18,7 @@ import datasets
|
|
18 |
from collections import Counter
|
19 |
from math import log, exp
|
20 |
from random import seed, randint
|
|
|
21 |
|
22 |
|
23 |
# TODO: Add BibTeX citation
|
@@ -45,20 +46,28 @@ _DESCRIPTION = """\
|
|
45 |
_KWARGS_DESCRIPTION = """
|
46 |
Calculates how good are predictions given some references, using certain scores
|
47 |
Args:
|
48 |
-
sources: Source language reference sentences. This is assumed to be same as references if not provided
|
49 |
-
references:
|
50 |
-
predictions: list of predictions to score. Each
|
51 |
Returns:
|
52 |
-
gleu_score:
|
53 |
|
54 |
Examples:
|
55 |
-
Examples should be written in doctest format, and should illustrate how
|
56 |
-
to use the function.
|
57 |
|
58 |
>>> my_new_module = evaluate.load("my_new_module")
|
59 |
-
>>>
|
|
|
60 |
>>> print(results)
|
61 |
-
{'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
"""
|
63 |
|
64 |
# TODO: Define external resources urls if needed
|
@@ -171,15 +180,13 @@ class gleu(evaluate.Metric):
|
|
171 |
citation=_CITATION,
|
172 |
inputs_description=_KWARGS_DESCRIPTION,
|
173 |
# This defines the format of each prediction and reference
|
174 |
-
features=datasets.Features(
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
codebase_urls=["http://github.com/path/to/codebase/of/new_module"],
|
182 |
-
reference_urls=["http://path.to.reference.url/new_module"]
|
183 |
)
|
184 |
|
185 |
def _download_and_prepare(self, dl_manager):
|
@@ -190,7 +197,7 @@ class gleu(evaluate.Metric):
|
|
190 |
def __init__(self, order=4) :
|
191 |
self.order = order
|
192 |
|
193 |
-
def _compute(self,
|
194 |
"""Returns the scores"""
|
195 |
|
196 |
num_iterations = 500
|
@@ -200,11 +207,11 @@ class gleu(evaluate.Metric):
|
|
200 |
|
201 |
gleu_calculator = GLEU(self.order)
|
202 |
|
203 |
-
if sources:
|
204 |
-
|
205 |
-
else:
|
206 |
-
|
207 |
-
|
208 |
gleu_calculator.load_references(references)
|
209 |
|
210 |
# first generate a random list of indices, using a different seed
|
@@ -231,4 +238,8 @@ class gleu(evaluate.Metric):
|
|
231 |
stats_by_ref[ref] = this_stats
|
232 |
|
233 |
iter_stats[j] = [sum(scores) or scores in zip(iter_stats[j], this_stats)]
|
234 |
-
|
|
|
|
|
|
|
|
|
|
18 |
from collections import Counter
|
19 |
from math import log, exp
|
20 |
from random import seed, randint
|
21 |
+
import ipdb
|
22 |
|
23 |
|
24 |
# TODO: Add BibTeX citation
|
|
|
46 |
_KWARGS_DESCRIPTION = """
|
47 |
Calculates how good are predictions given some references, using certain scores
|
48 |
Args:
|
49 |
+
sources: Source language reference sentences. This is assumed to be same as references if not provided.
|
50 |
+
references: Reference for each prediction. Each reference should be a string with tokens separated by spaces.
|
51 |
+
predictions: list of predictions to score. Each prediction should be a string with tokens separated by spaces.
|
52 |
Returns:
|
53 |
+
gleu_score: Average gleu_score over all predictions.
|
54 |
|
55 |
Examples:
|
|
|
|
|
56 |
|
57 |
>>> my_new_module = evaluate.load("my_new_module")
|
58 |
+
>>> references=["We may in actual fact be communicating with a hoax Facebook acccount of a cyberfriend , which we assume to be real but in reality , it is a fake account ."]
|
59 |
+
>>> results = my_new_module.compute(references=references, predictions=["We may of actual fact communicating with a hoax Facebook acccount of a cyber friend , which we assumed to be real but in reality , it is a fake account ."])
|
60 |
>>> print(results)
|
61 |
+
{'gleu_score': 0.6}
|
62 |
+
|
63 |
+
>>> results = my_new_module.compute(references=references, predictions=["We may be in actual fact communicating with a hoax Facebook acccount of a cyber friend , we assume to be real but in reality , it is a fake account ."])
|
64 |
+
>>> print(results)
|
65 |
+
{'gleu_score': 0.62}
|
66 |
+
|
67 |
+
>>> results = my_new_module.compute(references=references, predictions=["We may in actual fact communicating with a hoax Facebook account of a cyber friend , which we assume to be real but in reality , it is a fake accounts ."])
|
68 |
+
>>> print(results)
|
69 |
+
{'gleu_score': 0.64}
|
70 |
+
|
71 |
"""
|
72 |
|
73 |
# TODO: Define external resources urls if needed
|
|
|
180 |
citation=_CITATION,
|
181 |
inputs_description=_KWARGS_DESCRIPTION,
|
182 |
# This defines the format of each prediction and reference
|
183 |
+
features=datasets.Features(
|
184 |
+
{
|
185 |
+
"predictions": datasets.Value("string", id="sequence"),
|
186 |
+
"references": datasets.Value("string", id="sequence"),
|
187 |
+
}
|
188 |
+
),
|
189 |
+
codebase_urls=["https://github.com/cnap/gec-ranking/"],
|
|
|
|
|
190 |
)
|
191 |
|
192 |
def _download_and_prepare(self, dl_manager):
|
|
|
197 |
def __init__(self, order=4) :
|
198 |
self.order = order
|
199 |
|
200 |
+
def _compute(self, references, predictions):
|
201 |
"""Returns the scores"""
|
202 |
|
203 |
num_iterations = 500
|
|
|
207 |
|
208 |
gleu_calculator = GLEU(self.order)
|
209 |
|
210 |
+
# if sources:
|
211 |
+
# gleu_calculator.load_sources(sources)
|
212 |
+
# else:
|
213 |
+
#
|
214 |
+
gleu_calculator.load_sources(references)
|
215 |
gleu_calculator.load_references(references)
|
216 |
|
217 |
# first generate a random list of indices, using a different seed
|
|
|
238 |
stats_by_ref[ref] = this_stats
|
239 |
|
240 |
iter_stats[j] = [sum(scores) or scores in zip(iter_stats[j], this_stats)]
|
241 |
+
|
242 |
+
final_gleu_score = get_gleu_stats([gleu_calculator.compute_gleu(stats)
|
243 |
+
for stats in iter_stats ])[0]
|
244 |
+
ipdb.set_trace()
|
245 |
+
return return {"gleu_score": final_gleu_score}
|