|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import evaluate |
|
import datasets |
|
from .tokenizer_13a import Tokenizer13a |
|
|
|
|
|
|
|
_CITATION = """\ |
|
@inproceedings{liu-etal-2022-rethinking, |
|
title = "Rethinking and Refining the Distinct Metric", |
|
author = "Liu, Siyang and |
|
Sabour, Sahand and |
|
Zheng, Yinhe and |
|
Ke, Pei and |
|
Zhu, Xiaoyan and |
|
Huang, Minlie", |
|
booktitle = "Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)", |
|
year = "2022", |
|
publisher = "Association for Computational Linguistics", |
|
url = "https://aclanthology.org/2022.acl-short.86", |
|
doi = "10.18653/v1/2022.acl-short.86", |
|
} |
|
|
|
@inproceedings{li-etal-2016-diversity, |
|
title = "A Diversity-Promoting Objective Function for Neural Conversation Models", |
|
author = "Li, Jiwei and |
|
Galley, Michel and |
|
Brockett, Chris and |
|
Gao, Jianfeng and |
|
Dolan, Bill", |
|
booktitle = "Proceedings of the 2016 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies", |
|
year = "2016", |
|
publisher = "Association for Computational Linguistics", |
|
url = "https://aclanthology.org/N16-1014", |
|
doi = "10.18653/v1/N16-1014", |
|
} |
|
""" |
|
|
|
|
|
_DESCRIPTION = """\ |
|
Distinct metric is to calculate corpus-level diversity of language. We provide two versions of distinct score. Expectation-Adjusted-Distinct (EAD) is the default one, which removes |
|
the biases of the original distinct score on lengthier sentences (see Figure below). Distinct is the original version. |
|
|
|
""" |
|
|
|
|
|
|
|
_KWARGS_DESCRIPTION = """ |
|
Calculates how good are predictions given some references, using certain scores |
|
Args: |
|
predictions: list of sentecnes. Each prediction should be a string. |
|
Returns: |
|
Expectation-Adjusted-Distinct |
|
Distinct-1 |
|
Distinct-2 |
|
Distinct-3 |
|
Examples: |
|
Examples should be written in doctest format, and should illustrate how |
|
to use the function. |
|
|
|
>>> my_new_module = evaluate.load("lsy641/distinct") |
|
>>> results = my_new_module.compute(references=["Hi.", "I'm sorry to hear that", "I don't know"], vocab_size=50257) |
|
>>> print(results) |
|
|
|
|
|
>>> dataset = ["This is my friend jack", "I'm sorry to hear that", "But you know I am the one who always support you", "Welcome to our family"] |
|
>>> results = my_new_module.compute(references=["Hi.", "I'm sorry to hear that", "I don't know"], dataForVocabCal = dataset) |
|
>>> print(results) |
|
|
|
|
|
>>> results = my_new_module.compute(references=["Hi.", "I'm sorry to hear that", "I don't know"], mode="Distinct") |
|
>>> print(results) |
|
|
|
""" |
|
|
|
|
|
BAD_WORDS_URL = "http://url/to/external/resource/bad_words.txt" |
|
|
|
|
|
@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION) |
|
class distinct(evaluate.Measurement): |
|
|
|
def _info(self): |
|
return evaluate.MeasurementInfo( |
|
|
|
module_type="measurement", |
|
description=_DESCRIPTION, |
|
citation=_CITATION, |
|
inputs_description=_KWARGS_DESCRIPTION, |
|
|
|
features=datasets.Features({ |
|
'predictions': datasets.Value('string') |
|
}), |
|
|
|
homepage="https://huggingface.co/spaces/lsy641/distinct", |
|
|
|
codebase_urls=["https://github.com/lsy641/Expectation-Adjusted-Distinct/tree/main"], |
|
reference_urls=["https://aclanthology.org/2022.acl-short.86/"] |
|
) |
|
|
|
def _download_and_prepare(self, dl_manager): |
|
"""Optional: download external resources useful to compute the scores""" |
|
|
|
def _compute(self, predictions, dataForVocabCal=None, vocab_size=None, tokenizer=Tokenizer13a(), mode="Expectation-Adjusted-Distinct"): |
|
from nltk.util import ngrams |
|
|
|
|
|
|
|
"""Returns the scores""" |
|
if mode == "Expectation-Adjusted-Distinct" and vocab_size is None and dataForVocabCal is None: |
|
raise ValueError("Either vocab_size or dataForVocabCal needs to be specified when using mode 'Expectation-Adjusted-Distinct'. See https://github.com/lsy641/Expectation-Adjusted-Distinct/blob/main/EAD.ipynb for vocab_size specification. \n Or use mode='Distinct' to get original version of distinct score.") |
|
elif mode == "Expectation-Adjusted-Distinct" and vocab_size is not None and dataForVocabCal is not None: |
|
raise Warning("We've detected that both vocab_size and dataForVocabCal are specified. We will use dataForVocabCal.") |
|
elif mode == "Distinct": |
|
pass |
|
|
|
if tokenizer == "white_space": |
|
tokenizer = WhitespaceTokenizer() |
|
|
|
if mode == "Expectation-Adjusted-Distinct" and dataForVocabCal is not None: |
|
if isinstance(dataForVocabCal, list) and len(dataForVocabCal) > 0 and isinstance(dataForVocabCal[0], str): |
|
vocab = set() |
|
for sentence in dataForVocabCal: |
|
if tokenizer == "white_space": |
|
vocab = vocab | set(sentence.split(" ")) |
|
else: |
|
vocab = vocab | set(tokenizer.tokenize(sentence)) |
|
vocab_size = len(vocab) |
|
else: |
|
raise TypeError("Argument dataForVocabCal should be a list of strings") |
|
distinct_tokens = set() |
|
distinct_tokens_2grams = set() |
|
distinct_tokens_3grams = set() |
|
total_tokens = [] |
|
total_tokens_2grams = [] |
|
total_tokens_3grams = [] |
|
|
|
for prediction in predictions: |
|
try: |
|
tokens = list(tokenizer.tokenize(prediction)) |
|
tokens_2grams = list(ngrams(list(tokenizer.tokenize(prediction)), 2, pad_left=True, left_pad_symbol='<s>')) |
|
tokens_3grams = list(ngrams(list(tokenizer.tokenize(prediction)), 3, pad_left=True, left_pad_symbol='<s>')) |
|
except Exception as e: |
|
raise e |
|
|
|
distinct_tokens = distinct_tokens | set(tokens) |
|
distinct_tokens_2grams = distinct_tokens_2grams | set(tokens_2grams) |
|
distinct_tokens_3grams = distinct_tokens_3grams | set(tokens_3grams) |
|
total_tokens.extend(tokens) |
|
total_tokens_2grams.extend(list(tokens_2grams)) |
|
total_tokens_3grams.extend(list(tokens_3grams)) |
|
|
|
Distinct_1 = len(distinct_tokens)/len(total_tokens) |
|
Distinct_2 = len(distinct_tokens_2grams)/len(total_tokens_2grams) |
|
Distinct_3 = len(distinct_tokens_3grams)/len(total_tokens_3grams) |
|
if mode == "Expectation-Adjusted-Distinct": |
|
Expectation_Adjusted_Distinct = len(distinct_tokens)/(vocab_size*(1-((vocab_size-1)/vocab_size)**len(total_tokens))) |
|
return { |
|
"Expectation-Adjusted-Distinct": Expectation_Adjusted_Distinct, |
|
"Distinct-1": Distinct_1, |
|
"Distinct-2": Distinct_2, |
|
"Distinct-3": Distinct_3 |
|
} |
|
|
|
if mode == "Distinct": |
|
return { |
|
"Distinct-1": Distinct_1, |
|
"Distinct-2": Distinct_2, |
|
"Distinct-3": Distinct_3 |
|
} |