Remeris commited on
Commit
a16c41f
·
verified ·
1 Parent(s): 1d42cb1

Update rouge_ru.py

Browse files
Files changed (1) hide show
  1. rouge_ru.py +200 -0
rouge_ru.py CHANGED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2020 The HuggingFace Evaluate Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ """ ROUGE metric from Google Research github repo. """
15
+
16
+ # The dependencies in https://github.com/google-research/google-research/blob/master/rouge/requirements.txt
17
+ from collections.abc import Callable
18
+ from string import punctuation
19
+ from typing import List
20
+
21
+ import absl # Here to have a nice missing dependency error message early on
22
+ import datasets
23
+ import evaluate
24
+ import nltk # Here to have a nice missing dependency error message early on
25
+ import numpy # Here to have a nice missing dependency error message early on
26
+ import six # Here to have a nice missing dependency error message early on
27
+ from nltk.corpus import stopwords
28
+ from nltk.tokenize import word_tokenize
29
+ from rouge_score import rouge_scorer, scoring
30
+
31
+ _CITATION = """\
32
+ @inproceedings{lin-2004-rouge,
33
+ title = "{ROUGE}: A Package for Automatic Evaluation of Summaries",
34
+ author = "Lin, Chin-Yew",
35
+ booktitle = "Text Summarization Branches Out",
36
+ month = jul,
37
+ year = "2004",
38
+ address = "Barcelona, Spain",
39
+ publisher = "Association for Computational Linguistics",
40
+ url = "https://www.aclweb.org/anthology/W04-1013",
41
+ pages = "74--81",
42
+ }
43
+ """
44
+
45
+ _DESCRIPTION = """\
46
+ ROUGE, or Recall-Oriented Understudy for Gisting Evaluation, is a set of metrics and a software package used for
47
+ evaluating automatic summarization and machine translation software in natural language processing.
48
+ The metrics compare an automatically produced summary or translation against a reference or a set of references (human-produced) summary or translation.
49
+
50
+ Note that ROUGE is case insensitive, meaning that upper case letters are treated the same way as lower case letters.
51
+
52
+ This metrics is a wrapper around Google Research reimplementation of ROUGE:
53
+ https://github.com/google-research/google-research/tree/master/rouge
54
+ """
55
+
56
+ _KWARGS_DESCRIPTION = """
57
+ Calculates average rouge scores for a list of hypotheses and references
58
+ Args:
59
+ predictions: list of predictions to score. Each prediction
60
+ should be a string with tokens separated by spaces.
61
+ references: list of reference for each prediction. Each
62
+ reference should be a string with tokens separated by spaces.
63
+ rouge_types: A list of rouge types to calculate.
64
+ Valid names:
65
+ `"rouge{n}"` (e.g. `"rouge1"`, `"rouge2"`) where: {n} is the n-gram based scoring,
66
+ `"rougeL"`: Longest common subsequence based scoring.
67
+ `"rougeLsum"`: rougeLsum splits text using `"\n"`.
68
+ See details in https://github.com/huggingface/datasets/issues/617
69
+ use_stemmer: Bool indicating whether Porter stemmer should be used to strip word suffixes.
70
+ use_aggregator: Return aggregates if this is set to True
71
+ Returns:
72
+ rouge1: rouge_1 (f1),
73
+ rouge2: rouge_2 (f1),
74
+ rougeL: rouge_l (f1),
75
+ rougeLsum: rouge_lsum (f1)
76
+ Examples:
77
+
78
+ >>> rouge = evaluate.load('rouge')
79
+ >>> predictions = ["hello there", "general kenobi"]
80
+ >>> references = ["hello there", "general kenobi"]
81
+ >>> results = rouge.compute(predictions=predictions, references=references)
82
+ >>> print(results)
83
+ {'rouge1': 1.0, 'rouge2': 1.0, 'rougeL': 1.0, 'rougeLsum': 1.0}
84
+ """
85
+
86
+
87
+ def tokenize_normalize_ru(
88
+ row,
89
+ normalizer_foo: Callable,
90
+ russian_stopwords: List[str]
91
+ ) -> List[str]:
92
+ tokenized_row = [
93
+ normalizer_foo(word)
94
+ # morpher.parse(word)[0].normal_form
95
+ for word in word_tokenize(row.lower())
96
+ if word not in russian_stopwords
97
+ # check in list of words
98
+ and word not in punctuation
99
+ # check in string of symbols
100
+ ]
101
+ return tokenized_row
102
+
103
+ class Tokenizer:
104
+ """Helper class to wrap a callable into a class with a `tokenize` method as used by rouge-score."""
105
+
106
+ def __init__(self, tokenizer_func, word_normalizer_foo=None, language="russian"):
107
+ self.tokenizer_func = tokenizer_func
108
+ self.word_normalizer_foo = word_normalizer_foo
109
+ if self.word_normalizer_foo is None:
110
+ self.word_normalizer_foo = nltk.stem.SnowballStemmer(language).stem
111
+ self.stopwords = stopwords.words(language)
112
+
113
+ def tokenize(self, text):
114
+ return self.tokenizer_func(
115
+ text,
116
+ self.word_normalizer_foo,
117
+ self.stopwords
118
+ )
119
+
120
+
121
+ @evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
122
+ class Rouge(evaluate.Metric):
123
+ def _info(self):
124
+ return evaluate.MetricInfo(
125
+ description=_DESCRIPTION,
126
+ citation=_CITATION,
127
+ inputs_description=_KWARGS_DESCRIPTION,
128
+ features=[
129
+ datasets.Features(
130
+ {
131
+ "predictions": datasets.Value("string", id="sequence"),
132
+ "references": datasets.Sequence(datasets.Value("string", id="sequence")),
133
+ }
134
+ ),
135
+ datasets.Features(
136
+ {
137
+ "predictions": datasets.Value("string", id="sequence"),
138
+ "references": datasets.Value("string", id="sequence"),
139
+ }
140
+ ),
141
+ ],
142
+ codebase_urls=["https://github.com/google-research/google-research/tree/master/rouge"],
143
+ reference_urls=[
144
+ "https://en.wikipedia.org/wiki/ROUGE_(metric)",
145
+ "https://github.com/google-research/google-research/tree/master/rouge",
146
+ ],
147
+ )
148
+
149
+ def _compute(
150
+ self, predictions, references, rouge_types=None, use_aggregator=True, use_stemmer=False, tokenizer=None
151
+ ):
152
+ if rouge_types is None:
153
+ rouge_types = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
154
+
155
+ multi_ref = isinstance(references[0], list)
156
+
157
+ if tokenizer is not None:
158
+ tokenizer = Tokenizer(tokenizer)
159
+
160
+ scorer = rouge_scorer.RougeScorer(rouge_types=rouge_types, use_stemmer=use_stemmer, tokenizer=tokenizer)
161
+ if use_aggregator:
162
+ aggregator = scoring.BootstrapAggregator()
163
+ else:
164
+ scores = []
165
+
166
+ for ref, pred in zip(references, predictions):
167
+ if multi_ref:
168
+ score = scorer.score_multi(ref, pred)
169
+ else:
170
+ score = scorer.score(ref, pred)
171
+ if use_aggregator:
172
+ aggregator.add_scores(score)
173
+ else:
174
+ scores.append(score)
175
+
176
+ if use_aggregator:
177
+ result = aggregator.aggregate()
178
+ for key in result:
179
+ metrics = {
180
+ "recall": result[key].mid.recall,
181
+ "precision": result[key].mid.precision,
182
+ "fmeasure": result[key].mid.fmeasure
183
+ }
184
+ result[key] = metrics
185
+
186
+ else:
187
+ result = {}
188
+ for key in scores[0]:
189
+ transposed_scores = list(zip(*((score[key].recall,
190
+ score[key].precision,
191
+ score[key].fmeasure) for score in scores)))
192
+
193
+ metrics = {
194
+ "recall": transposed_scores[0],
195
+ "precision": transposed_scores[1],
196
+ "fmeasure": transposed_scores[2]
197
+ }
198
+ result[key] = metrics
199
+
200
+ return result