File size: 1,890 Bytes
e331e72
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
import unittest

import pandas as pd
import pytest

from graphrag.index.verbs.text.split import text_split_df


class TestTextSplit(unittest.TestCase):
    def test_empty_string(self):
        input = pd.DataFrame([{"in": ""}])
        result = text_split_df(input, "in", "out", ",").to_dict(orient="records")

        assert len(result) == 1
        assert result[0]["out"] == []

    def test_string_without_seperator(self):
        input = pd.DataFrame([{"in": "test_string_without_seperator"}])
        result = text_split_df(input, "in", "out", ",").to_dict(orient="records")

        assert len(result) == 1
        assert result[0]["out"] == ["test_string_without_seperator"]

    def test_string_with_seperator(self):
        input = pd.DataFrame([{"in": "test_1,test_2"}])
        result = text_split_df(input, "in", "out", ",").to_dict(orient="records")

        assert len(result) == 1
        assert result[0]["out"] == ["test_1", "test_2"]

    def test_row_with_list_as_column(self):
        input = pd.DataFrame([{"in": ["test_1", "test_2"]}])
        result = text_split_df(input, "in", "out", ",").to_dict(orient="records")

        assert len(result) == 1
        assert result[0]["out"] == ["test_1", "test_2"]

    def test_non_string_column_throws_error(self):
        input = pd.DataFrame([{"in": 5}])
        with pytest.raises(TypeError):
            text_split_df(input, "in", "out", ",").to_dict(orient="records")

    def test_more_than_one_row_returns_correctly(self):
        input = pd.DataFrame([{"in": "row_1_1,row_1_2"}, {"in": "row_2_1,row_2_2"}])
        result = text_split_df(input, "in", "out", ",").to_dict(orient="records")

        assert len(result) == 2
        assert result[0]["out"] == ["row_1_1", "row_1_2"]
        assert result[1]["out"] == ["row_2_1", "row_2_2"]