Spaces:
Running
Running
import os | |
import pytest | |
import pandas as pd | |
from toolz import pipe | |
from ..data import limit_rows, MaxRowsError, sample, to_values, to_json, to_csv | |
def _create_dataframe(N): | |
data = pd.DataFrame({"x": range(N), "y": range(N)}) | |
return data | |
def _create_data_with_values(N): | |
data = {"values": [{"x": i, "y": i + 1} for i in range(N)]} | |
return data | |
def test_limit_rows(): | |
"""Test the limit_rows data transformer.""" | |
data = _create_dataframe(10) | |
result = limit_rows(data, max_rows=20) | |
assert data is result | |
with pytest.raises(MaxRowsError): | |
pipe(data, limit_rows(max_rows=5)) | |
data = _create_data_with_values(10) | |
result = pipe(data, limit_rows(max_rows=20)) | |
assert data is result | |
with pytest.raises(MaxRowsError): | |
limit_rows(data, max_rows=5) | |
def test_sample(): | |
"""Test the sample data transformer.""" | |
data = _create_dataframe(20) | |
result = pipe(data, sample(n=10)) | |
assert len(result) == 10 | |
assert isinstance(result, pd.DataFrame) | |
data = _create_data_with_values(20) | |
result = sample(data, n=10) | |
assert isinstance(result, dict) | |
assert "values" in result | |
assert len(result["values"]) == 10 | |
data = _create_dataframe(20) | |
result = pipe(data, sample(frac=0.5)) | |
assert len(result) == 10 | |
assert isinstance(result, pd.DataFrame) | |
data = _create_data_with_values(20) | |
result = sample(data, frac=0.5) | |
assert isinstance(result, dict) | |
assert "values" in result | |
assert len(result["values"]) == 10 | |
def test_to_values(): | |
"""Test the to_values data transformer.""" | |
data = _create_dataframe(10) | |
result = pipe(data, to_values) | |
assert result == {"values": data.to_dict(orient="records")} | |
def test_type_error(): | |
"""Ensure that TypeError is raised for types other than dict/DataFrame.""" | |
for f in (sample, limit_rows, to_values): | |
with pytest.raises(TypeError): | |
pipe(0, f) | |
def test_dataframe_to_json(): | |
"""Test to_json | |
- make certain the filename is deterministic | |
- make certain the file contents match the data | |
""" | |
data = _create_dataframe(10) | |
try: | |
result1 = pipe(data, to_json) | |
result2 = pipe(data, to_json) | |
filename = result1["url"] | |
output = pd.read_json(filename) | |
finally: | |
os.remove(filename) | |
assert result1 == result2 | |
assert output.equals(data) | |
def test_dict_to_json(): | |
"""Test to_json | |
- make certain the filename is deterministic | |
- make certain the file contents match the data | |
""" | |
data = _create_data_with_values(10) | |
try: | |
result1 = pipe(data, to_json) | |
result2 = pipe(data, to_json) | |
filename = result1["url"] | |
output = pd.read_json(filename).to_dict(orient="records") | |
finally: | |
os.remove(filename) | |
assert result1 == result2 | |
assert data == {"values": output} | |
def test_dataframe_to_csv(): | |
"""Test to_csv with dataframe input | |
- make certain the filename is deterministic | |
- make certain the file contents match the data | |
""" | |
data = _create_dataframe(10) | |
try: | |
result1 = pipe(data, to_csv) | |
result2 = pipe(data, to_csv) | |
filename = result1["url"] | |
output = pd.read_csv(filename) | |
finally: | |
os.remove(filename) | |
assert result1 == result2 | |
assert output.equals(data) | |
def test_dict_to_csv(): | |
"""Test to_csv with dict input | |
- make certain the filename is deterministic | |
- make certain the file contents match the data | |
""" | |
data = _create_data_with_values(10) | |
try: | |
result1 = pipe(data, to_csv) | |
result2 = pipe(data, to_csv) | |
filename = result1["url"] | |
output = pd.read_csv(filename).to_dict(orient="records") | |
finally: | |
os.remove(filename) | |
assert result1 == result2 | |
assert data == {"values": output} | |