File size: 5,735 Bytes
7885a28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
""" Test cases for GroupBy.plot """


import numpy as np
import pytest

from pandas import (
    DataFrame,
    Index,
    Series,
)
from pandas.tests.plotting.common import (
    _check_axes_shape,
    _check_legend_labels,
)

pytest.importorskip("matplotlib")


class TestDataFrameGroupByPlots:
    def test_series_groupby_plotting_nominally_works(self):
        n = 10
        weight = Series(np.random.default_rng(2).normal(166, 20, size=n))
        gender = np.random.default_rng(2).choice(["male", "female"], size=n)

        weight.groupby(gender).plot()

    def test_series_groupby_plotting_nominally_works_hist(self):
        n = 10
        height = Series(np.random.default_rng(2).normal(60, 10, size=n))
        gender = np.random.default_rng(2).choice(["male", "female"], size=n)
        height.groupby(gender).hist()

    def test_series_groupby_plotting_nominally_works_alpha(self):
        n = 10
        height = Series(np.random.default_rng(2).normal(60, 10, size=n))
        gender = np.random.default_rng(2).choice(["male", "female"], size=n)
        # Regression test for GH8733
        height.groupby(gender).plot(alpha=0.5)

    def test_plotting_with_float_index_works(self):
        # GH 7025
        df = DataFrame(
            {
                "def": [1, 1, 1, 2, 2, 2, 3, 3, 3],
                "val": np.random.default_rng(2).standard_normal(9),
            },
            index=[1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0],
        )

        df.groupby("def")["val"].plot()

    def test_plotting_with_float_index_works_apply(self):
        # GH 7025
        df = DataFrame(
            {
                "def": [1, 1, 1, 2, 2, 2, 3, 3, 3],
                "val": np.random.default_rng(2).standard_normal(9),
            },
            index=[1.0, 2.0, 3.0, 1.0, 2.0, 3.0, 1.0, 2.0, 3.0],
        )
        df.groupby("def")["val"].apply(lambda x: x.plot())

    def test_hist_single_row(self):
        # GH10214
        bins = np.arange(80, 100 + 2, 1)
        df = DataFrame({"Name": ["AAA", "BBB"], "ByCol": [1, 2], "Mark": [85, 89]})
        df["Mark"].hist(by=df["ByCol"], bins=bins)

    def test_hist_single_row_single_bycol(self):
        # GH10214
        bins = np.arange(80, 100 + 2, 1)
        df = DataFrame({"Name": ["AAA"], "ByCol": [1], "Mark": [85]})
        df["Mark"].hist(by=df["ByCol"], bins=bins)

    def test_plot_submethod_works(self):
        df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")})
        df.groupby("z").plot.scatter("x", "y")

    def test_plot_submethod_works_line(self):
        df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")})
        df.groupby("z")["x"].plot.line()

    def test_plot_kwargs(self):
        df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")})

        res = df.groupby("z").plot(kind="scatter", x="x", y="y")
        # check that a scatter plot is effectively plotted: the axes should
        # contain a PathCollection from the scatter plot (GH11805)
        assert len(res["a"].collections) == 1

    def test_plot_kwargs_scatter(self):
        df = DataFrame({"x": [1, 2, 3, 4, 5], "y": [1, 2, 3, 2, 1], "z": list("ababa")})
        res = df.groupby("z").plot.scatter(x="x", y="y")
        assert len(res["a"].collections) == 1

    @pytest.mark.parametrize("column, expected_axes_num", [(None, 2), ("b", 1)])
    def test_groupby_hist_frame_with_legend(self, column, expected_axes_num):
        # GH 6279 - DataFrameGroupBy histogram can have a legend
        expected_layout = (1, expected_axes_num)
        expected_labels = column or [["a"], ["b"]]

        index = Index(15 * ["1"] + 15 * ["2"], name="c")
        df = DataFrame(
            np.random.default_rng(2).standard_normal((30, 2)),
            index=index,
            columns=["a", "b"],
        )
        g = df.groupby("c")

        for axes in g.hist(legend=True, column=column):
            _check_axes_shape(axes, axes_num=expected_axes_num, layout=expected_layout)
            for ax, expected_label in zip(axes[0], expected_labels):
                _check_legend_labels(ax, expected_label)

    @pytest.mark.parametrize("column", [None, "b"])
    def test_groupby_hist_frame_with_legend_raises(self, column):
        # GH 6279 - DataFrameGroupBy histogram with legend and label raises
        index = Index(15 * ["1"] + 15 * ["2"], name="c")
        df = DataFrame(
            np.random.default_rng(2).standard_normal((30, 2)),
            index=index,
            columns=["a", "b"],
        )
        g = df.groupby("c")

        with pytest.raises(ValueError, match="Cannot use both legend and label"):
            g.hist(legend=True, column=column, label="d")

    def test_groupby_hist_series_with_legend(self):
        # GH 6279 - SeriesGroupBy histogram can have a legend
        index = Index(15 * ["1"] + 15 * ["2"], name="c")
        df = DataFrame(
            np.random.default_rng(2).standard_normal((30, 2)),
            index=index,
            columns=["a", "b"],
        )
        g = df.groupby("c")

        for ax in g["a"].hist(legend=True):
            _check_axes_shape(ax, axes_num=1, layout=(1, 1))
            _check_legend_labels(ax, ["1", "2"])

    def test_groupby_hist_series_with_legend_raises(self):
        # GH 6279 - SeriesGroupBy histogram with legend and label raises
        index = Index(15 * ["1"] + 15 * ["2"], name="c")
        df = DataFrame(
            np.random.default_rng(2).standard_normal((30, 2)),
            index=index,
            columns=["a", "b"],
        )
        g = df.groupby("c")

        with pytest.raises(ValueError, match="Cannot use both legend and label"):
            g.hist(legend=True, label="d")