1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
| import numpy as np
| import pytest
|
| from pandas import (
| DataFrame,
| NaT,
| Series,
| Timedelta,
| Timestamp,
| )
| import pandas._testing as tm
|
|
| def test_group_shift_with_null_key():
| # This test is designed to replicate the segfault in issue #13813.
| n_rows = 1200
|
| # Generate a moderately large dataframe with occasional missing
| # values in column `B`, and then group by [`A`, `B`]. This should
| # force `-1` in `labels` array of `g.grouper.group_info` exactly
| # at those places, where the group-by key is partially missing.
| df = DataFrame(
| [(i % 12, i % 3 if i % 3 else np.nan, i) for i in range(n_rows)],
| dtype=float,
| columns=["A", "B", "Z"],
| index=None,
| )
| g = df.groupby(["A", "B"])
|
| expected = DataFrame(
| [(i + 12 if i % 3 and i < n_rows - 12 else np.nan) for i in range(n_rows)],
| dtype=float,
| columns=["Z"],
| index=None,
| )
| result = g.shift(-1)
|
| tm.assert_frame_equal(result, expected)
|
|
| def test_group_shift_with_fill_value():
| # GH #24128
| n_rows = 24
| df = DataFrame(
| [(i % 12, i % 3, i) for i in range(n_rows)],
| dtype=float,
| columns=["A", "B", "Z"],
| index=None,
| )
| g = df.groupby(["A", "B"])
|
| expected = DataFrame(
| [(i + 12 if i < n_rows - 12 else 0) for i in range(n_rows)],
| dtype=float,
| columns=["Z"],
| index=None,
| )
| result = g.shift(-1, fill_value=0)
|
| tm.assert_frame_equal(result, expected)
|
|
| def test_group_shift_lose_timezone():
| # GH 30134
| now_dt = Timestamp.utcnow()
| df = DataFrame({"a": [1, 1], "date": now_dt})
| result = df.groupby("a").shift(0).iloc[0]
| expected = Series({"date": now_dt}, name=result.name)
| tm.assert_series_equal(result, expected)
|
|
| def test_group_diff_real_series(any_real_numpy_dtype):
| df = DataFrame(
| {"a": [1, 2, 3, 3, 2], "b": [1, 2, 3, 4, 5]},
| dtype=any_real_numpy_dtype,
| )
| result = df.groupby("a")["b"].diff()
| exp_dtype = "float"
| if any_real_numpy_dtype in ["int8", "int16", "float32"]:
| exp_dtype = "float32"
| expected = Series([np.nan, np.nan, np.nan, 1.0, 3.0], dtype=exp_dtype, name="b")
| tm.assert_series_equal(result, expected)
|
|
| def test_group_diff_real_frame(any_real_numpy_dtype):
| df = DataFrame(
| {
| "a": [1, 2, 3, 3, 2],
| "b": [1, 2, 3, 4, 5],
| "c": [1, 2, 3, 4, 6],
| },
| dtype=any_real_numpy_dtype,
| )
| result = df.groupby("a").diff()
| exp_dtype = "float"
| if any_real_numpy_dtype in ["int8", "int16", "float32"]:
| exp_dtype = "float32"
| expected = DataFrame(
| {
| "b": [np.nan, np.nan, np.nan, 1.0, 3.0],
| "c": [np.nan, np.nan, np.nan, 1.0, 4.0],
| },
| dtype=exp_dtype,
| )
| tm.assert_frame_equal(result, expected)
|
|
| @pytest.mark.parametrize(
| "data",
| [
| [
| Timestamp("2013-01-01"),
| Timestamp("2013-01-02"),
| Timestamp("2013-01-03"),
| ],
| [Timedelta("5 days"), Timedelta("6 days"), Timedelta("7 days")],
| ],
| )
| def test_group_diff_datetimelike(data):
| df = DataFrame({"a": [1, 2, 2], "b": data})
| result = df.groupby("a")["b"].diff()
| expected = Series([NaT, NaT, Timedelta("1 days")], name="b")
| tm.assert_series_equal(result, expected)
|
|
| def test_group_diff_bool():
| df = DataFrame({"a": [1, 2, 3, 3, 2], "b": [True, True, False, False, True]})
| result = df.groupby("a")["b"].diff()
| expected = Series([np.nan, np.nan, np.nan, False, False], name="b")
| tm.assert_series_equal(result, expected)
|
|
| def test_group_diff_object_raises(object_dtype):
| df = DataFrame(
| {"a": ["foo", "bar", "bar"], "b": ["baz", "foo", "foo"]}, dtype=object_dtype
| )
| with pytest.raises(TypeError, match=r"unsupported operand type\(s\) for -"):
| df.groupby("a")["b"].diff()
|
|
| def test_empty_shift_with_fill():
| # GH 41264, single-index check
| df = DataFrame(columns=["a", "b", "c"])
| shifted = df.groupby(["a"]).shift(1)
| shifted_with_fill = df.groupby(["a"]).shift(1, fill_value=0)
| tm.assert_frame_equal(shifted, shifted_with_fill)
| tm.assert_index_equal(shifted.index, shifted_with_fill.index)
|
|
| def test_multindex_empty_shift_with_fill():
| # GH 41264, multi-index check
| df = DataFrame(columns=["a", "b", "c"])
| shifted = df.groupby(["a", "b"]).shift(1)
| shifted_with_fill = df.groupby(["a", "b"]).shift(1, fill_value=0)
| tm.assert_frame_equal(shifted, shifted_with_fill)
| tm.assert_index_equal(shifted.index, shifted_with_fill.index)
|
|