Spaces:
Sleeping
Sleeping
import ast | |
import statistics | |
import sacrebleu | |
from rouge_score import rouge_scorer | |
def sentence_bleu(references, predictions): | |
scores = [] | |
for reference, prediction in zip(references, predictions): | |
scores.append( | |
sacrebleu.sentence_bleu( | |
prediction, | |
reference if isinstance(reference, list) else [reference], | |
smooth_method="exp", | |
smooth_value=0.0, | |
lowercase=True, | |
tokenize="intl", | |
).score | |
) | |
return statistics.mean(scores) | |
def corpus_bleu(references, predictions): | |
return sacrebleu.corpus_bleu( | |
predictions, | |
references, | |
smooth_method="exp", | |
smooth_value=0.0, | |
lowercase=True, | |
tokenize="intl", | |
).score | |
def rouge(scope, references, predictions): | |
rouge = f"rouge{scope}" | |
scorer = rouge_scorer.RougeScorer([rouge], use_stemmer=True) | |
references = [r if isinstance(r,list) else [r] for r in references] | |
scores_dict = {"precision": 0, "recall": 0, "f1": 0} | |
for refs, prediction in zip(references, predictions): | |
best_f1 = 0 | |
for ref in refs: | |
scores = scorer.score(ref, prediction) | |
if scores[rouge].fmeasure > best_f1: | |
best_f1 = scores[rouge].fmeasure | |
scores_dict["precision"] += scores[rouge].precision | |
scores_dict["recall"] += scores[rouge].recall | |
scores_dict["f1"] += scores[rouge].fmeasure | |
return {k: v / len(references) for k, v in scores_dict.items()} | |
def parse_string(string): | |
try: | |
return list(ast.literal_eval(string)) | |
except (ValueError, SyntaxError) as e: | |
print(f"Error parsing string: {e}") | |
return None | |