|
|
|
""" |
|
Created on 02.02.24 |
|
Module for raw ROUGE score calculation from: |
|
@inproceedings{straka-etal-2018-sumeczech, |
|
title = "{S}ume{C}zech: Large {C}zech News-Based Summarization Dataset", |
|
author = "Straka, Milan and |
|
Mediankin, Nikita and |
|
Kocmi, Tom and |
|
{\v{Z}}abokrtsk{\'y}, Zden{\v{e}}k and |
|
Hude{\v{c}}ek, Vojt{\v{e}}ch and |
|
Haji{\v{c}}, Jan", |
|
editor = "Calzolari, Nicoletta and |
|
Choukri, Khalid and |
|
Cieri, Christopher and |
|
Declerck, Thierry and |
|
Goggi, Sara and |
|
Hasida, Koiti and |
|
Isahara, Hitoshi and |
|
Maegaard, Bente and |
|
Mariani, Joseph and |
|
Mazo, H{\'e}l{\`e}ne and |
|
Moreno, Asuncion and |
|
Odijk, Jan and |
|
Piperidis, Stelios and |
|
Tokunaga, Takenobu", |
|
booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)", |
|
month = may, |
|
year = "2018", |
|
address = "Miyazaki, Japan", |
|
publisher = "European Language Resources Association (ELRA)", |
|
url = "https://aclanthology.org/L18-1551", |
|
} |
|
|
|
|
|
:author: Martin Dočekal |
|
""" |
|
|
|
import re |
|
from typing import Sequence |
|
|
|
import datasets |
|
import evaluate |
|
|
|
|
|
class RougeRaw: |
|
""" |
|
This is the original implementation of the ROUGERaw metric. |
|
Compute RougeRAW-1, RougeRAW-2, RougeRAW-L metrics. |
|
""" |
|
|
|
class FScore: |
|
"""F1 score representation.""" |
|
def __init__(self, correct, gold, system): |
|
self.p = correct / system if system else 0. |
|
self.r = correct / gold if gold else 0. |
|
self.f = 2 * correct / (system + gold) if system + gold else 0. |
|
|
|
def _rouge_n(self, n, gold_words, system_words): |
|
"""Compute Rouge-n for given words.""" |
|
def n_grams(n, words): |
|
ngrams = {} |
|
total = 0 |
|
for i in range(len(words) - n + 1): |
|
ngram = "\t".join(words[i:i + n]) |
|
ngrams[ngram] = 1 + ngrams.get(ngram, 0) |
|
total += 1 |
|
return ngrams, total |
|
|
|
gold_ngrams, gold_total = n_grams(n, gold_words) |
|
system_ngrams, system_total = n_grams(n, system_words) |
|
|
|
intersection = 0 |
|
for ngram in system_ngrams: |
|
intersection += min(system_ngrams[ngram], gold_ngrams.get(ngram, 0)) |
|
|
|
return self.FScore(intersection, gold_total, system_total) |
|
|
|
def _rouge_l(self, gold_words, system_words): |
|
"""Compute Rouge-L for given words.""" |
|
lcs = [[0] * len(system_words) for _ in gold_words] |
|
for r in range(len(gold_words)): |
|
for s in range(len(system_words)): |
|
if gold_words[r] == system_words[s]: |
|
lcs[r][s] = 1 + (lcs[r - 1][s - 1] if r and s else 0) |
|
lcs[r][s] = max(lcs[r][s], lcs[r - 1][s] if r else 0) |
|
lcs[r][s] = max(lcs[r][s], lcs[r][s - 1] if s else 0) |
|
|
|
return self.FScore(lcs[-1][-1], len(gold_words), len(system_words)) |
|
|
|
def _tokenize(self, text): |
|
"""Tokenize given text.""" |
|
return re.sub(r"\s+", " ", re.sub(r"\b", " ", text, re.UNICODE), re.UNICODE).strip().split(" ") |
|
|
|
def document(self, gold, system): |
|
"""Compute RougeRAW-1, RougeRAW-2, RougeRAW-L for given documents. |
|
Each document should be a string. |
|
""" |
|
|
|
assert isinstance(gold, str) and isinstance(system, str), "Expected string arguments" |
|
|
|
lc_gold_words = [word.lower() for word in self._tokenize(gold)] |
|
lc_system_words = [word.lower() for word in self._tokenize(system)] |
|
|
|
return { |
|
"1": self._rouge_n(1, lc_gold_words, lc_system_words), |
|
"2": self._rouge_n(2, lc_gold_words, lc_system_words), |
|
"L": self._rouge_l(lc_gold_words, lc_system_words), |
|
} |
|
|
|
def corpus(self, gold, system): |
|
"""Compute RougeRAW-1, RougeRAW-2, RougeRAW-L for given corpora. |
|
Each corpus should be a collection of documents, each document a string. |
|
""" |
|
|
|
assert isinstance(gold, list) and isinstance(system, list), "Expected list arguments" |
|
assert len(gold) == len(system), "Given corpora should be of the same length" |
|
|
|
rouge = {key: self.FScore(0, 0, 0) for key in ["1", "2", "L"]} |
|
|
|
if len(gold): |
|
for gold_document, system_document in zip(gold, system): |
|
for key, value in self.document(gold_document, system_document).items(): |
|
rouge[key].p += value.p |
|
rouge[key].r += value.r |
|
rouge[key].f += value.f |
|
|
|
for key in rouge: |
|
rouge[key].p /= len(gold) |
|
rouge[key].r /= len(gold) |
|
rouge[key].f /= len(gold) |
|
|
|
return rouge |
|
|
|
|
|
_CITATION = """\ |
|
@inproceedings{straka-etal-2018-sumeczech, |
|
title = "{S}ume{C}zech: Large {C}zech News-Based Summarization Dataset", |
|
author = "Straka, Milan and |
|
Mediankin, Nikita and |
|
Kocmi, Tom and |
|
{\v{Z}}abokrtsk{\'y}, Zden{\v{e}}k and |
|
Hude{\v{c}}ek, Vojt{\v{e}}ch and |
|
Haji{\v{c}}, Jan", |
|
editor = "Calzolari, Nicoletta and |
|
Choukri, Khalid and |
|
Cieri, Christopher and |
|
Declerck, Thierry and |
|
Goggi, Sara and |
|
Hasida, Koiti and |
|
Isahara, Hitoshi and |
|
Maegaard, Bente and |
|
Mariani, Joseph and |
|
Mazo, H{\'e}l{\`e}ne and |
|
Moreno, Asuncion and |
|
Odijk, Jan and |
|
Piperidis, Stelios and |
|
Tokunaga, Takenobu", |
|
booktitle = "Proceedings of the Eleventh International Conference on Language Resources and Evaluation ({LREC} 2018)", |
|
month = may, |
|
year = "2018", |
|
address = "Miyazaki, Japan", |
|
publisher = "European Language Resources Association (ELRA)", |
|
url = "https://aclanthology.org/L18-1551", |
|
} |
|
""" |
|
|
|
_DESCRIPTION = """\ |
|
ROUGE RAW is language-agnostic variant of ROUGE without stemmer, stop words and synonymas. |
|
This is a wrapper around the original http://hdl.handle.net/11234/1-2615 script. |
|
""" |
|
|
|
_KWARGS_DESCRIPTION = """ |
|
ROCUE RAW metric for list of predictions and references. |
|
Args: |
|
predictions: list of predictions to evaluate. Each prediction should be a string with tokens separated by spaces. |
|
references: list of reference for each prediction. Each reference should be a string with tokens separated by spaces. |
|
Returns: |
|
rougeraw1_precision |
|
rougeraw1_recall |
|
rougeraw1_fmeasure |
|
rougeraw2_precision |
|
rougeraw2_recall |
|
rougeraw2_fmeasure |
|
rougerawl_precision |
|
rougerawl_recall |
|
rougerawl_fmeasure |
|
Examples: |
|
>>> rougeraw = evaluate.load('CZLC/rouge_raw') |
|
>>> predictions = ["the cat is on the mat", "hello there"] |
|
>>> references = ["the cat is on the mat", "hello there"] |
|
>>> results = rougeraw.compute(predictions=predictions, references=references) |
|
>>> print(results) |
|
{'rougeraw1_precision': 1.0, 'rougeraw1_recall': 1.0, 'rougeraw1_fmeasure': 1.0, 'rougeraw2_precision': 1.0, 'rougeraw2_recall': 1.0, 'rougeraw2_fmeasure': 1.0, 'rougerawl_precision': 1.0, 'rougerawl_recall': 1.0, 'rougerawl_fmeasure': 1.0} |
|
""" |
|
|
|
|
|
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) |
|
class Rouge(evaluate.Metric): |
|
def _info(self): |
|
return evaluate.MetricInfo( |
|
description=_DESCRIPTION, |
|
citation=_CITATION, |
|
inputs_description=_KWARGS_DESCRIPTION, |
|
features=[ |
|
datasets.Features( |
|
{ |
|
"predictions": datasets.Value("string", id="sequence"), |
|
"references": datasets.Value("string", id="sequence"), |
|
} |
|
), |
|
], |
|
reference_urls=[ |
|
"http://hdl.handle.net/11234/1-2615", |
|
], |
|
) |
|
|
|
def _compute(self, predictions: Sequence[str], references: Sequence[str]): |
|
res = RougeRaw().corpus(references, predictions) |
|
return { |
|
"rougeraw1_precision": res["1"].p, |
|
"rougeraw1_recall": res["1"].r, |
|
"rougeraw1_fmeasure": res["1"].f, |
|
"rougeraw2_precision": res["2"].p, |
|
"rougeraw2_recall": res["2"].r, |
|
"rougeraw2_fmeasure": res["2"].f, |
|
"rougerawl_precision": res["L"].p, |
|
"rougerawl_recall": res["L"].r, |
|
"rougerawl_fmeasure": res["L"].f, |
|
} |
|
|
|
|