Sam Chaudry

Upload folder using huggingface_hub

7885a28 verified about 1 month ago

17.4 kB

	from copy import (
	copy,
	deepcopy,
	)

	import numpy as np
	import pytest

	from pandas.core.dtypes.common import is_scalar

	from pandas import (
	DataFrame,
	Index,
	Series,
	date_range,
	)
	import pandas._testing as tm

	# ----------------------------------------------------------------------
	# Generic types test cases


	def construct(box, shape, value=None, dtype=None, **kwargs):
	"""
	construct an object for the given shape
	if value is specified use that if its a scalar
	if value is an array, repeat it as needed
	"""
	if isinstance(shape, int):
	shape = tuple([shape] * box._AXIS_LEN)
	if value is not None:
	if is_scalar(value):
	if value == "empty":
	arr = None
	dtype = np.float64

	# remove the info axis
	kwargs.pop(box._info_axis_name, None)
	else:
	arr = np.empty(shape, dtype=dtype)
	arr.fill(value)
	else:
	fshape = np.prod(shape)
	arr = value.ravel()
	new_shape = fshape / arr.shape[0]
	if fshape % arr.shape[0] != 0:
	raise Exception("invalid value passed in construct")

	arr = np.repeat(arr, new_shape).reshape(shape)
	else:
	arr = np.random.default_rng(2).standard_normal(shape)
	return box(arr, dtype=dtype, **kwargs)


	class TestGeneric:
	@pytest.mark.parametrize(
	"func",
	[
	str.lower,
	{x: x.lower() for x in list("ABCD")},
	Series({x: x.lower() for x in list("ABCD")}),
	],
	)
	def test_rename(self, frame_or_series, func):
	# single axis
	idx = list("ABCD")

	for axis in frame_or_series._AXIS_ORDERS:
	kwargs = {axis: idx}
	obj = construct(frame_or_series, 4, **kwargs)

	# rename a single axis
	result = obj.rename(**{axis: func})
	expected = obj.copy()
	setattr(expected, axis, list("abcd"))
	tm.assert_equal(result, expected)

	def test_get_numeric_data(self, frame_or_series):
	n = 4
	kwargs = {
	frame_or_series._get_axis_name(i): list(range(n))
	for i in range(frame_or_series._AXIS_LEN)
	}

	# get the numeric data
	o = construct(frame_or_series, n, **kwargs)
	result = o._get_numeric_data()
	tm.assert_equal(result, o)

	# non-inclusion
	result = o._get_bool_data()
	expected = construct(frame_or_series, n, value="empty", **kwargs)
	if isinstance(o, DataFrame):
	# preserve columns dtype
	expected.columns = o.columns[:0]
	# https://github.com/pandas-dev/pandas/issues/50862
	tm.assert_equal(result.reset_index(drop=True), expected)

	# get the bool data
	arr = np.array([True, True, False, True])
	o = construct(frame_or_series, n, value=arr, **kwargs)
	result = o._get_numeric_data()
	tm.assert_equal(result, o)

	def test_nonzero(self, frame_or_series):
	# GH 4633
	# look at the boolean/nonzero behavior for objects
	obj = construct(frame_or_series, shape=4)
	msg = f"The truth value of a {frame_or_series.__name__} is ambiguous"
	with pytest.raises(ValueError, match=msg):
	bool(obj == 0)
	with pytest.raises(ValueError, match=msg):
	bool(obj == 1)
	with pytest.raises(ValueError, match=msg):
	bool(obj)

	obj = construct(frame_or_series, shape=4, value=1)
	with pytest.raises(ValueError, match=msg):
	bool(obj == 0)
	with pytest.raises(ValueError, match=msg):
	bool(obj == 1)
	with pytest.raises(ValueError, match=msg):
	bool(obj)

	obj = construct(frame_or_series, shape=4, value=np.nan)
	with pytest.raises(ValueError, match=msg):
	bool(obj == 0)
	with pytest.raises(ValueError, match=msg):
	bool(obj == 1)
	with pytest.raises(ValueError, match=msg):
	bool(obj)

	# empty
	obj = construct(frame_or_series, shape=0)
	with pytest.raises(ValueError, match=msg):
	bool(obj)

	# invalid behaviors

	obj1 = construct(frame_or_series, shape=4, value=1)
	obj2 = construct(frame_or_series, shape=4, value=1)

	with pytest.raises(ValueError, match=msg):
	if obj1:
	pass

	with pytest.raises(ValueError, match=msg):
	obj1 and obj2
	with pytest.raises(ValueError, match=msg):
	obj1 or obj2
	with pytest.raises(ValueError, match=msg):
	not obj1

	def test_frame_or_series_compound_dtypes(self, frame_or_series):
	# see gh-5191
	# Compound dtypes should raise NotImplementedError.

	def f(dtype):
	return construct(frame_or_series, shape=3, value=1, dtype=dtype)

	msg = (
	"compound dtypes are not implemented "
	f"in the {frame_or_series.__name__} constructor"
	)

	with pytest.raises(NotImplementedError, match=msg):
	f([("A", "datetime64[h]"), ("B", "str"), ("C", "int32")])

	# these work (though results may be unexpected)
	f("int64")
	f("float64")
	f("M8[ns]")

	def test_metadata_propagation(self, frame_or_series):
	# check that the metadata matches up on the resulting ops

	o = construct(frame_or_series, shape=3)
	o.name = "foo"
	o2 = construct(frame_or_series, shape=3)
	o2.name = "bar"

	# ----------
	# preserving
	# ----------

	# simple ops with scalars
	for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
	result = getattr(o, op)(1)
	tm.assert_metadata_equivalent(o, result)

	# ops with like
	for op in ["__add__", "__sub__", "__truediv__", "__mul__"]:
	result = getattr(o, op)(o)
	tm.assert_metadata_equivalent(o, result)

	# simple boolean
	for op in ["__eq__", "__le__", "__ge__"]:
	v1 = getattr(o, op)(o)
	tm.assert_metadata_equivalent(o, v1)
	tm.assert_metadata_equivalent(o, v1 & v1)
	tm.assert_metadata_equivalent(o, v1 \| v1)

	# combine_first
	result = o.combine_first(o2)
	tm.assert_metadata_equivalent(o, result)

	# ---------------------------
	# non-preserving (by default)
	# ---------------------------

	# add non-like
	result = o + o2
	tm.assert_metadata_equivalent(result)

	# simple boolean
	for op in ["__eq__", "__le__", "__ge__"]:
	# this is a name matching op
	v1 = getattr(o, op)(o)
	v2 = getattr(o, op)(o2)
	tm.assert_metadata_equivalent(v2)
	tm.assert_metadata_equivalent(v1 & v2)
	tm.assert_metadata_equivalent(v1 \| v2)

	def test_size_compat(self, frame_or_series):
	# GH8846
	# size property should be defined

	o = construct(frame_or_series, shape=10)
	assert o.size == np.prod(o.shape)
	assert o.size == 10 ** len(o.axes)

	def test_split_compat(self, frame_or_series):
	# xref GH8846
	o = construct(frame_or_series, shape=10)
	with tm.assert_produces_warning(
	FutureWarning, match=".swapaxes' is deprecated", check_stacklevel=False
	):
	assert len(np.array_split(o, 5)) == 5
	assert len(np.array_split(o, 2)) == 2

	# See gh-12301
	def test_stat_unexpected_keyword(self, frame_or_series):
	obj = construct(frame_or_series, 5)
	starwars = "Star Wars"
	errmsg = "unexpected keyword"

	with pytest.raises(TypeError, match=errmsg):
	obj.max(epic=starwars) # stat_function
	with pytest.raises(TypeError, match=errmsg):
	obj.var(epic=starwars) # stat_function_ddof
	with pytest.raises(TypeError, match=errmsg):
	obj.sum(epic=starwars) # cum_function
	with pytest.raises(TypeError, match=errmsg):
	obj.any(epic=starwars) # logical_function

	@pytest.mark.parametrize("func", ["sum", "cumsum", "any", "var"])
	def test_api_compat(self, func, frame_or_series):
	# GH 12021
	# compat for __name__, __qualname__

	obj = construct(frame_or_series, 5)
	f = getattr(obj, func)
	assert f.__name__ == func
	assert f.__qualname__.endswith(func)

	def test_stat_non_defaults_args(self, frame_or_series):
	obj = construct(frame_or_series, 5)
	out = np.array([0])
	errmsg = "the 'out' parameter is not supported"

	with pytest.raises(ValueError, match=errmsg):
	obj.max(out=out) # stat_function
	with pytest.raises(ValueError, match=errmsg):
	obj.var(out=out) # stat_function_ddof
	with pytest.raises(ValueError, match=errmsg):
	obj.sum(out=out) # cum_function
	with pytest.raises(ValueError, match=errmsg):
	obj.any(out=out) # logical_function

	def test_truncate_out_of_bounds(self, frame_or_series):
	# GH11382

	# small
	shape = [2000] + ([1] * (frame_or_series._AXIS_LEN - 1))
	small = construct(frame_or_series, shape, dtype="int8", value=1)
	tm.assert_equal(small.truncate(), small)
	tm.assert_equal(small.truncate(before=0, after=3e3), small)
	tm.assert_equal(small.truncate(before=-1, after=2e3), small)

	# big
	shape = [2_000_000] + ([1] * (frame_or_series._AXIS_LEN - 1))
	big = construct(frame_or_series, shape, dtype="int8", value=1)
	tm.assert_equal(big.truncate(), big)
	tm.assert_equal(big.truncate(before=0, after=3e6), big)
	tm.assert_equal(big.truncate(before=-1, after=2e6), big)

	@pytest.mark.parametrize(
	"func",
	[copy, deepcopy, lambda x: x.copy(deep=False), lambda x: x.copy(deep=True)],
	)
	@pytest.mark.parametrize("shape", [0, 1, 2])
	def test_copy_and_deepcopy(self, frame_or_series, shape, func):
	# GH 15444
	obj = construct(frame_or_series, shape)
	obj_copy = func(obj)
	assert obj_copy is not obj
	tm.assert_equal(obj_copy, obj)

	def test_data_deprecated(self, frame_or_series):
	obj = frame_or_series()
	msg = "(Series\|DataFrame)._data is deprecated"
	with tm.assert_produces_warning(DeprecationWarning, match=msg):
	mgr = obj._data
	assert mgr is obj._mgr


	class TestNDFrame:
	# tests that don't fit elsewhere

	@pytest.mark.parametrize(
	"ser",
	[
	Series(range(10), dtype=np.float64),
	Series([str(i) for i in range(10)], dtype=object),
	],
	)
	def test_squeeze_series_noop(self, ser):
	# noop
	tm.assert_series_equal(ser.squeeze(), ser)

	def test_squeeze_frame_noop(self):
	# noop
	df = DataFrame(np.eye(2))
	tm.assert_frame_equal(df.squeeze(), df)

	def test_squeeze_frame_reindex(self):
	# squeezing
	df = DataFrame(
	np.random.default_rng(2).standard_normal((10, 4)),
	columns=Index(list("ABCD"), dtype=object),
	index=date_range("2000-01-01", periods=10, freq="B"),
	).reindex(columns=["A"])
	tm.assert_series_equal(df.squeeze(), df["A"])

	def test_squeeze_0_len_dim(self):
	# don't fail with 0 length dimensions GH11229 & GH8999
	empty_series = Series([], name="five", dtype=np.float64)
	empty_frame = DataFrame([empty_series])
	tm.assert_series_equal(empty_series, empty_series.squeeze())
	tm.assert_series_equal(empty_series, empty_frame.squeeze())

	def test_squeeze_axis(self):
	# axis argument
	df = DataFrame(
	np.random.default_rng(2).standard_normal((1, 4)),
	columns=Index(list("ABCD"), dtype=object),
	index=date_range("2000-01-01", periods=1, freq="B"),
	).iloc[:, :1]
	assert df.shape == (1, 1)
	tm.assert_series_equal(df.squeeze(axis=0), df.iloc[0])
	tm.assert_series_equal(df.squeeze(axis="index"), df.iloc[0])
	tm.assert_series_equal(df.squeeze(axis=1), df.iloc[:, 0])
	tm.assert_series_equal(df.squeeze(axis="columns"), df.iloc[:, 0])
	assert df.squeeze() == df.iloc[0, 0]
	msg = "No axis named 2 for object type DataFrame"
	with pytest.raises(ValueError, match=msg):
	df.squeeze(axis=2)
	msg = "No axis named x for object type DataFrame"
	with pytest.raises(ValueError, match=msg):
	df.squeeze(axis="x")

	def test_squeeze_axis_len_3(self):
	df = DataFrame(
	np.random.default_rng(2).standard_normal((3, 4)),
	columns=Index(list("ABCD"), dtype=object),
	index=date_range("2000-01-01", periods=3, freq="B"),
	)
	tm.assert_frame_equal(df.squeeze(axis=0), df)

	def test_numpy_squeeze(self):
	s = Series(range(2), dtype=np.float64)
	tm.assert_series_equal(np.squeeze(s), s)

	df = DataFrame(
	np.random.default_rng(2).standard_normal((10, 4)),
	columns=Index(list("ABCD"), dtype=object),
	index=date_range("2000-01-01", periods=10, freq="B"),
	).reindex(columns=["A"])
	tm.assert_series_equal(np.squeeze(df), df["A"])

	@pytest.mark.parametrize(
	"ser",
	[
	Series(range(10), dtype=np.float64),
	Series([str(i) for i in range(10)], dtype=object),
	],
	)
	def test_transpose_series(self, ser):
	# calls implementation in pandas/core/base.py
	tm.assert_series_equal(ser.transpose(), ser)

	def test_transpose_frame(self):
	df = DataFrame(
	np.random.default_rng(2).standard_normal((10, 4)),
	columns=Index(list("ABCD"), dtype=object),
	index=date_range("2000-01-01", periods=10, freq="B"),
	)
	tm.assert_frame_equal(df.transpose().transpose(), df)

	def test_numpy_transpose(self, frame_or_series):
	obj = DataFrame(
	np.random.default_rng(2).standard_normal((10, 4)),
	columns=Index(list("ABCD"), dtype=object),
	index=date_range("2000-01-01", periods=10, freq="B"),
	)
	obj = tm.get_obj(obj, frame_or_series)

	if frame_or_series is Series:
	# 1D -> np.transpose is no-op
	tm.assert_series_equal(np.transpose(obj), obj)

	# round-trip preserved
	tm.assert_equal(np.transpose(np.transpose(obj)), obj)

	msg = "the 'axes' parameter is not supported"
	with pytest.raises(ValueError, match=msg):
	np.transpose(obj, axes=1)

	@pytest.mark.parametrize(
	"ser",
	[
	Series(range(10), dtype=np.float64),
	Series([str(i) for i in range(10)], dtype=object),
	],
	)
	def test_take_series(self, ser):
	indices = [1, 5, -2, 6, 3, -1]
	out = ser.take(indices)
	expected = Series(
	data=ser.values.take(indices),
	index=ser.index.take(indices),
	dtype=ser.dtype,
	)
	tm.assert_series_equal(out, expected)

	def test_take_frame(self):
	indices = [1, 5, -2, 6, 3, -1]
	df = DataFrame(
	np.random.default_rng(2).standard_normal((10, 4)),
	columns=Index(list("ABCD"), dtype=object),
	index=date_range("2000-01-01", periods=10, freq="B"),
	)
	out = df.take(indices)
	expected = DataFrame(
	data=df.values.take(indices, axis=0),
	index=df.index.take(indices),
	columns=df.columns,
	)
	tm.assert_frame_equal(out, expected)

	def test_take_invalid_kwargs(self, frame_or_series):
	indices = [-3, 2, 0, 1]

	obj = DataFrame(range(5))
	obj = tm.get_obj(obj, frame_or_series)

	msg = r"take\(\) got an unexpected keyword argument 'foo'"
	with pytest.raises(TypeError, match=msg):
	obj.take(indices, foo=2)

	msg = "the 'out' parameter is not supported"
	with pytest.raises(ValueError, match=msg):
	obj.take(indices, out=indices)

	msg = "the 'mode' parameter is not supported"
	with pytest.raises(ValueError, match=msg):
	obj.take(indices, mode="clip")

	def test_axis_classmethods(self, frame_or_series):
	box = frame_or_series
	obj = box(dtype=object)
	values = box._AXIS_TO_AXIS_NUMBER.keys()
	for v in values:
	assert obj._get_axis_number(v) == box._get_axis_number(v)
	assert obj._get_axis_name(v) == box._get_axis_name(v)
	assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v)

	def test_flags_identity(self, frame_or_series):
	obj = Series([1, 2])
	if frame_or_series is DataFrame:
	obj = obj.to_frame()

	assert obj.flags is obj.flags
	obj2 = obj.copy()
	assert obj2.flags is not obj.flags

	def test_bool_dep(self) -> None:
	# GH-51749
	msg_warn = (
	"DataFrame.bool is now deprecated and will be removed "
	"in future version of pandas"
	)
	with tm.assert_produces_warning(FutureWarning, match=msg_warn):
	DataFrame({"col": [False]}).bool()