File size: 3,861 Bytes
ba2f5d6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os

import pytest
import pandas as pd
from toolz import pipe

from ..data import limit_rows, MaxRowsError, sample, to_values, to_json, to_csv


def _create_dataframe(N):
    data = pd.DataFrame({"x": range(N), "y": range(N)})
    return data


def _create_data_with_values(N):
    data = {"values": [{"x": i, "y": i + 1} for i in range(N)]}
    return data


def test_limit_rows():
    """Test the limit_rows data transformer."""
    data = _create_dataframe(10)
    result = limit_rows(data, max_rows=20)
    assert data is result
    with pytest.raises(MaxRowsError):
        pipe(data, limit_rows(max_rows=5))
    data = _create_data_with_values(10)
    result = pipe(data, limit_rows(max_rows=20))
    assert data is result
    with pytest.raises(MaxRowsError):
        limit_rows(data, max_rows=5)


def test_sample():
    """Test the sample data transformer."""
    data = _create_dataframe(20)
    result = pipe(data, sample(n=10))
    assert len(result) == 10
    assert isinstance(result, pd.DataFrame)
    data = _create_data_with_values(20)
    result = sample(data, n=10)
    assert isinstance(result, dict)
    assert "values" in result
    assert len(result["values"]) == 10
    data = _create_dataframe(20)
    result = pipe(data, sample(frac=0.5))
    assert len(result) == 10
    assert isinstance(result, pd.DataFrame)
    data = _create_data_with_values(20)
    result = sample(data, frac=0.5)
    assert isinstance(result, dict)
    assert "values" in result
    assert len(result["values"]) == 10


def test_to_values():
    """Test the to_values data transformer."""
    data = _create_dataframe(10)
    result = pipe(data, to_values)
    assert result == {"values": data.to_dict(orient="records")}


def test_type_error():
    """Ensure that TypeError is raised for types other than dict/DataFrame."""
    for f in (sample, limit_rows, to_values):
        with pytest.raises(TypeError):
            pipe(0, f)


def test_dataframe_to_json():
    """Test to_json
    - make certain the filename is deterministic
    - make certain the file contents match the data
    """
    data = _create_dataframe(10)
    try:
        result1 = pipe(data, to_json)
        result2 = pipe(data, to_json)
        filename = result1["url"]
        output = pd.read_json(filename)
    finally:
        os.remove(filename)

    assert result1 == result2
    assert output.equals(data)


def test_dict_to_json():
    """Test to_json
    - make certain the filename is deterministic
    - make certain the file contents match the data
    """
    data = _create_data_with_values(10)
    try:
        result1 = pipe(data, to_json)
        result2 = pipe(data, to_json)
        filename = result1["url"]
        output = pd.read_json(filename).to_dict(orient="records")
    finally:
        os.remove(filename)

    assert result1 == result2
    assert data == {"values": output}


def test_dataframe_to_csv():
    """Test to_csv with dataframe input
    - make certain the filename is deterministic
    - make certain the file contents match the data
    """
    data = _create_dataframe(10)
    try:
        result1 = pipe(data, to_csv)
        result2 = pipe(data, to_csv)
        filename = result1["url"]
        output = pd.read_csv(filename)
    finally:
        os.remove(filename)

    assert result1 == result2
    assert output.equals(data)


def test_dict_to_csv():
    """Test to_csv with dict input
    - make certain the filename is deterministic
    - make certain the file contents match the data
    """
    data = _create_data_with_values(10)
    try:
        result1 = pipe(data, to_csv)
        result2 = pipe(data, to_csv)
        filename = result1["url"]
        output = pd.read_csv(filename).to_dict(orient="records")
    finally:
        os.remove(filename)

    assert result1 == result2
    assert data == {"values": output}