import numpy as np
|
import pytest
|
|
from pandas import DataFrame
|
import pandas._testing as tm
|
from pandas.tests.copy_view.util import get_array
|
|
|
def test_assigning_to_same_variable_removes_references(using_copy_on_write):
|
df = DataFrame({"a": [1, 2, 3]})
|
df = df.reset_index()
|
if using_copy_on_write:
|
assert df._mgr._has_no_reference(1)
|
arr = get_array(df, "a")
|
df.iloc[0, 1] = 100 # Write into a
|
|
assert np.shares_memory(arr, get_array(df, "a"))
|
|
|
def test_setitem_dont_track_unnecessary_references(using_copy_on_write):
|
df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
|
|
df["b"] = 100
|
arr = get_array(df, "a")
|
# We split the block in setitem, if we are not careful the new blocks will
|
# reference each other triggering a copy
|
df.iloc[0, 0] = 100
|
assert np.shares_memory(arr, get_array(df, "a"))
|
|
|
def test_setitem_with_view_copies(using_copy_on_write):
|
df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
|
view = df[:]
|
expected = df.copy()
|
|
df["b"] = 100
|
arr = get_array(df, "a")
|
df.iloc[0, 0] = 100 # Check that we correctly track reference
|
if using_copy_on_write:
|
assert not np.shares_memory(arr, get_array(df, "a"))
|
tm.assert_frame_equal(view, expected)
|
|
|
def test_setitem_with_view_invalidated_does_not_copy(using_copy_on_write, request):
|
df = DataFrame({"a": [1, 2, 3], "b": 1, "c": 1})
|
view = df[:]
|
|
df["b"] = 100
|
arr = get_array(df, "a")
|
view = None # noqa
|
df.iloc[0, 0] = 100
|
if using_copy_on_write:
|
# Setitem split the block. Since the old block shared data with view
|
# all the new blocks are referencing view and each other. When view
|
# goes out of scope, they don't share data with any other block,
|
# so we should not trigger a copy
|
mark = pytest.mark.xfail(
|
reason="blk.delete does not track references correctly"
|
)
|
request.node.add_marker(mark)
|
assert np.shares_memory(arr, get_array(df, "a"))
|
|
|
def test_out_of_scope(using_copy_on_write):
|
def func():
|
df = DataFrame({"a": [1, 2], "b": 1.5, "c": 1})
|
# create some subset
|
result = df[["a", "b"]]
|
return result
|
|
result = func()
|
if using_copy_on_write:
|
assert not result._mgr.blocks[0].refs.has_reference()
|
assert not result._mgr.blocks[1].refs.has_reference()
|
|
|
def test_delete(using_copy_on_write):
|
df = DataFrame(np.random.randn(4, 3), columns=["a", "b", "c"])
|
del df["b"]
|
if using_copy_on_write:
|
# TODO: This should not have references, delete makes a shallow copy
|
# but keeps the blocks alive
|
assert df._mgr.blocks[0].refs.has_reference()
|
assert df._mgr.blocks[1].refs.has_reference()
|
|
df = df[["a"]]
|
if using_copy_on_write:
|
assert not df._mgr.blocks[0].refs.has_reference()
|