File size: 2,446 Bytes
5caedb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from unittest.mock import MagicMock

import numpy as np
import pandas as pd
import pytest

from llm_studio.src.metrics.text_causal_language_modeling_metrics import sacrebleu_score


@pytest.fixture
def mock_val_df():
    return pd.DataFrame()


def test_sacrebleu_score_perfect_match(mock_val_df):
    cfg = MagicMock()
    results = {
        "predicted_text": ["Hello world", "Python is great"],
        "target_text": ["Hello world", "Python is great"],
    }

    scores = sacrebleu_score(cfg, results, mock_val_df)

    assert np.allclose(scores, np.array([100.0, 100.0]))


def test_sacrebleu_score_partial_match(mock_val_df):
    cfg = MagicMock()
    results = {
        "predicted_text": ["Hello universe", "Python is awesome"],
        "target_text": ["Hello world", "Python is great"],
    }

    scores = sacrebleu_score(cfg, results, mock_val_df)

    assert np.allclose(scores, np.array([50.0, 55.03212081]))


def test_sacrebleu_score_no_match(mock_val_df):
    cfg = MagicMock()
    results = {
        "predicted_text": ["Goodbye universe", "What a day"],
        "target_text": ["Hello world", "Python is great"],
    }

    scores = sacrebleu_score(cfg, results, mock_val_df)

    assert np.allclose(scores, np.array([0.0, 0.0]))


def test_sacrebleu_score_all_empty_target(mock_val_df):
    cfg = MagicMock()
    results = {
        "predicted_text": ["Hello world", "Python is great"],
        "target_text": ["", ""],
    }

    scores = sacrebleu_score(cfg, results, mock_val_df)

    assert np.allclose(scores, np.array([0.0, 0.0]))


def test_sacrebleu_score_one_empty_target(mock_val_df):
    cfg = MagicMock()
    results = {
        "predicted_text": ["Hello world", "Python is great"],
        "target_text": ["", "Python is great"],
    }

    scores = sacrebleu_score(cfg, results, mock_val_df)

    assert np.allclose(scores, np.array([0.0, 100.0]))


def test_sacrebleu_score_invalid_input_empty(mock_val_df):
    cfg = MagicMock()
    results = {"predicted_text": [], "target_text": []}

    with pytest.raises(ValueError):
        sacrebleu_score(cfg, results, mock_val_df)


def test_sacrebleu_score_invalid_input_different_lengths(mock_val_df):
    cfg = MagicMock()
    results = {
        "predicted_text": ["Hello world", "Python", "is", "great"],
        "target_text": ["Hello universe", "Python is awesome"],
    }

    with pytest.raises(ValueError):
        sacrebleu_score(cfg, results, mock_val_df)