1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
| import numpy as np
| import pytest
|
| import pandas.util._test_decorators as td
|
| import pandas as pd
| from pandas import DataFrame
| import pandas._testing as tm
| from pandas.tests.copy_view.util import get_array
|
|
| @td.skip_array_manager_invalid_test
| def test_consolidate(using_copy_on_write):
| # create unconsolidated DataFrame
| df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
| df["c"] = [4, 5, 6]
|
| # take a viewing subset
| subset = df[:]
|
| # each block of subset references a block of df
| assert all(blk.refs.has_reference() for blk in subset._mgr.blocks)
|
| # consolidate the two int64 blocks
| subset._consolidate_inplace()
|
| # the float64 block still references the parent one because it still a view
| assert subset._mgr.blocks[0].refs.has_reference()
| # equivalent of assert np.shares_memory(df["b"].values, subset["b"].values)
| # but avoids caching df["b"]
| assert np.shares_memory(get_array(df, "b"), get_array(subset, "b"))
|
| # the new consolidated int64 block does not reference another
| assert not subset._mgr.blocks[1].refs.has_reference()
|
| # the parent dataframe now also only is linked for the float column
| assert not df._mgr.blocks[0].refs.has_reference()
| assert df._mgr.blocks[1].refs.has_reference()
| assert not df._mgr.blocks[2].refs.has_reference()
|
| # and modifying subset still doesn't modify parent
| if using_copy_on_write:
| subset.iloc[0, 1] = 0.0
| assert not df._mgr.blocks[1].refs.has_reference()
| assert df.loc[0, "b"] == 0.1
|
|
| @pytest.mark.single_cpu
| @td.skip_array_manager_invalid_test
| def test_switch_options():
| # ensure we can switch the value of the option within one session
| # (assuming data is constructed after switching)
|
| # using the option_context to ensure we set back to global option value
| # after running the test
| with pd.option_context("mode.copy_on_write", False):
| df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
| subset = df[:]
| subset.iloc[0, 0] = 0
| # df updated with CoW disabled
| assert df.iloc[0, 0] == 0
|
| pd.options.mode.copy_on_write = True
| df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
| subset = df[:]
| subset.iloc[0, 0] = 0
| # df not updated with CoW enabled
| assert df.iloc[0, 0] == 1
|
| pd.options.mode.copy_on_write = False
| df = DataFrame({"a": [1, 2, 3], "b": [0.1, 0.2, 0.3]})
| subset = df[:]
| subset.iloc[0, 0] = 0
| # df updated with CoW disabled
| assert df.iloc[0, 0] == 0
|
|
| @td.skip_array_manager_invalid_test
| @pytest.mark.parametrize("dtype", [np.intp, np.int8])
| @pytest.mark.parametrize(
| "locs, arr",
| [
| ([0], np.array([-1, -2, -3])),
| ([1], np.array([-1, -2, -3])),
| ([5], np.array([-1, -2, -3])),
| ([0, 1], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
| ([0, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
| ([0, 1, 2], np.array([[-1, -2, -3], [-4, -5, -6], [-4, -5, -6]]).T),
| ([1, 2], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
| ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
| ([1, 3], np.array([[-1, -2, -3], [-4, -5, -6]]).T),
| ],
| )
| def test_iset_splits_blocks_inplace(using_copy_on_write, locs, arr, dtype):
| # Nothing currently calls iset with
| # more than 1 loc with inplace=True (only happens with inplace=False)
| # but ensure that it works
| df = DataFrame(
| {
| "a": [1, 2, 3],
| "b": [4, 5, 6],
| "c": [7, 8, 9],
| "d": [10, 11, 12],
| "e": [13, 14, 15],
| "f": ["a", "b", "c"],
| },
| )
| arr = arr.astype(dtype)
| df_orig = df.copy()
| df2 = df.copy(deep=None) # Trigger a CoW (if enabled, otherwise makes copy)
| df2._mgr.iset(locs, arr, inplace=True)
|
| tm.assert_frame_equal(df, df_orig)
|
| if using_copy_on_write:
| for i, col in enumerate(df.columns):
| if i not in locs:
| assert np.shares_memory(get_array(df, col), get_array(df2, col))
| else:
| for col in df.columns:
| assert not np.shares_memory(get_array(df, col), get_array(df2, col))
|
|