Spaces:
Runtime error
Runtime error
File size: 1,890 Bytes
e331e72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License
import unittest
import pandas as pd
import pytest
from graphrag.index.verbs.text.split import text_split_df
class TestTextSplit(unittest.TestCase):
def test_empty_string(self):
input = pd.DataFrame([{"in": ""}])
result = text_split_df(input, "in", "out", ",").to_dict(orient="records")
assert len(result) == 1
assert result[0]["out"] == []
def test_string_without_seperator(self):
input = pd.DataFrame([{"in": "test_string_without_seperator"}])
result = text_split_df(input, "in", "out", ",").to_dict(orient="records")
assert len(result) == 1
assert result[0]["out"] == ["test_string_without_seperator"]
def test_string_with_seperator(self):
input = pd.DataFrame([{"in": "test_1,test_2"}])
result = text_split_df(input, "in", "out", ",").to_dict(orient="records")
assert len(result) == 1
assert result[0]["out"] == ["test_1", "test_2"]
def test_row_with_list_as_column(self):
input = pd.DataFrame([{"in": ["test_1", "test_2"]}])
result = text_split_df(input, "in", "out", ",").to_dict(orient="records")
assert len(result) == 1
assert result[0]["out"] == ["test_1", "test_2"]
def test_non_string_column_throws_error(self):
input = pd.DataFrame([{"in": 5}])
with pytest.raises(TypeError):
text_split_df(input, "in", "out", ",").to_dict(orient="records")
def test_more_than_one_row_returns_correctly(self):
input = pd.DataFrame([{"in": "row_1_1,row_1_2"}, {"in": "row_2_1,row_2_2"}])
result = text_split_df(input, "in", "out", ",").to_dict(orient="records")
assert len(result) == 2
assert result[0]["out"] == ["row_1_1", "row_1_2"]
assert result[1]["out"] == ["row_2_1", "row_2_2"]
|