File size: 1,719 Bytes
eca6215
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from src.dataset import formatting_prompts_func

def test_formatting_prompts_func():
    # Test case with basic input
    examples = {
        "instruction": ["Test instruction"],
        "input": ["Test input"],
        "output": ["Test output"],
    }
    template = "Instruction: {}\nInput: {}\nOutput: {}"
    eos_token = "<EOS>"
    
    result = formatting_prompts_func(examples, template, eos_token)
    
    # Check if result contains the 'text' key
    assert "text" in result
    
    # Check if result contains exactly one formatted entry
    assert len(result["text"]) == 1
    
    # Check if the formatted text is correct
    expected = "Instruction: Test instruction\nInput: Test input\nOutput: Test output<EOS>"
    assert result["text"][0] == expected

    # Test with empty inputs (edge case)
    examples_empty = {
        "instruction": [""],
        "input": [""],
        "output": [""],
    }
    result_empty = formatting_prompts_func(examples_empty, template, eos_token)
    assert result_empty["text"][0] == "Instruction: \nInput: \nOutput: <EOS>"

    # Test with multiple examples
    examples_multi = {
        "instruction": ["Test instruction 1", "Test instruction 2"],
        "input": ["Test input 1", "Test input 2"],
        "output": ["Test output 1", "Test output 2"],
    }
    result_multi = formatting_prompts_func(examples_multi, template, eos_token)
    assert len(result_multi["text"]) == 2
    assert result_multi["text"][0] == "Instruction: Test instruction 1\nInput: Test input 1\nOutput: Test output 1<EOS>"
    assert result_multi["text"][1] == "Instruction: Test instruction 2\nInput: Test input 2\nOutput: Test output 2<EOS>"