1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
| import numpy as np
| import pytest
|
| from pandas.compat import is_numpy_dev
|
| import pandas as pd
| import pandas._testing as tm
|
|
| def test_agg_relabel():
| # GH 26513
| df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
|
| # simplest case with one column, one func
| result = df.agg(foo=("B", "sum"))
| expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"]))
| tm.assert_frame_equal(result, expected)
|
| # test on same column with different methods
| result = df.agg(foo=("B", "sum"), bar=("B", "min"))
| expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"]))
|
| tm.assert_frame_equal(result, expected)
|
|
| def test_agg_relabel_multi_columns_multi_methods():
| # GH 26513, test on multiple columns with multiple methods
| df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
| result = df.agg(
| foo=("A", "sum"),
| bar=("B", "mean"),
| cat=("A", "min"),
| dat=("B", "max"),
| f=("A", "max"),
| g=("C", "min"),
| )
| expected = pd.DataFrame(
| {
| "A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan],
| "B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan],
| "C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0],
| },
| index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]),
| )
| tm.assert_frame_equal(result, expected)
|
|
| @pytest.mark.xfail(is_numpy_dev, reason="name of min now equals name of np.min")
| def test_agg_relabel_partial_functions():
| # GH 26513, test on partial, functools or more complex cases
| df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
| result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
| expected = pd.DataFrame(
| {"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
| )
| tm.assert_frame_equal(result, expected)
|
| result = df.agg(
| foo=("A", min),
| bar=("A", np.min),
| cat=("B", max),
| dat=("C", "min"),
| f=("B", np.sum),
| kk=("B", lambda x: min(x)),
| )
| expected = pd.DataFrame(
| {
| "A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],
| "B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0],
| "C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan],
| },
| index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]),
| )
| tm.assert_frame_equal(result, expected)
|
|
| def test_agg_namedtuple():
| # GH 26513
| df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
| result = df.agg(
| foo=pd.NamedAgg("B", "sum"),
| bar=pd.NamedAgg("B", min),
| cat=pd.NamedAgg(column="B", aggfunc="count"),
| fft=pd.NamedAgg("B", aggfunc="max"),
| )
|
| expected = pd.DataFrame(
| {"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"])
| )
| tm.assert_frame_equal(result, expected)
|
| result = df.agg(
| foo=pd.NamedAgg("A", "min"),
| bar=pd.NamedAgg(column="B", aggfunc="max"),
| cat=pd.NamedAgg(column="A", aggfunc="max"),
| )
| expected = pd.DataFrame(
| {"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]},
| index=pd.Index(["foo", "bar", "cat"]),
| )
| tm.assert_frame_equal(result, expected)
|
|