import pytest
|
|
from pandas.core.dtypes.missing import array_equivalent
|
|
import pandas as pd
|
|
|
# Fixtures
|
# ========
|
@pytest.fixture
|
def df():
|
"""DataFrame with columns 'L1', 'L2', and 'L3'"""
|
return pd.DataFrame({"L1": [1, 2, 3], "L2": [11, 12, 13], "L3": ["A", "B", "C"]})
|
|
|
@pytest.fixture(params=[[], ["L1"], ["L1", "L2"], ["L1", "L2", "L3"]])
|
def df_levels(request, df):
|
"""DataFrame with columns or index levels 'L1', 'L2', and 'L3'"""
|
levels = request.param
|
|
if levels:
|
df = df.set_index(levels)
|
|
return df
|
|
|
@pytest.fixture
|
def df_ambig(df):
|
"""DataFrame with levels 'L1' and 'L2' and labels 'L1' and 'L3'"""
|
df = df.set_index(["L1", "L2"])
|
|
df["L1"] = df["L3"]
|
|
return df
|
|
|
@pytest.fixture
|
def df_duplabels(df):
|
"""DataFrame with level 'L1' and labels 'L2', 'L3', and 'L2'"""
|
df = df.set_index(["L1"])
|
df = pd.concat([df, df["L2"]], axis=1)
|
|
return df
|
|
|
# Test is label/level reference
|
# =============================
|
def get_labels_levels(df_levels):
|
expected_labels = list(df_levels.columns)
|
expected_levels = [name for name in df_levels.index.names if name is not None]
|
return expected_labels, expected_levels
|
|
|
def assert_label_reference(frame, labels, axis):
|
for label in labels:
|
assert frame._is_label_reference(label, axis=axis)
|
assert not frame._is_level_reference(label, axis=axis)
|
assert frame._is_label_or_level_reference(label, axis=axis)
|
|
|
def assert_level_reference(frame, levels, axis):
|
for level in levels:
|
assert frame._is_level_reference(level, axis=axis)
|
assert not frame._is_label_reference(level, axis=axis)
|
assert frame._is_label_or_level_reference(level, axis=axis)
|
|
|
# DataFrame
|
# ---------
|
def test_is_level_or_label_reference_df_simple(df_levels, axis):
|
axis = df_levels._get_axis_number(axis)
|
# Compute expected labels and levels
|
expected_labels, expected_levels = get_labels_levels(df_levels)
|
|
# Transpose frame if axis == 1
|
if axis == 1:
|
df_levels = df_levels.T
|
|
# Perform checks
|
assert_level_reference(df_levels, expected_levels, axis=axis)
|
assert_label_reference(df_levels, expected_labels, axis=axis)
|
|
|
def test_is_level_reference_df_ambig(df_ambig, axis):
|
axis = df_ambig._get_axis_number(axis)
|
|
# Transpose frame if axis == 1
|
if axis == 1:
|
df_ambig = df_ambig.T
|
|
# df has both an on-axis level and off-axis label named L1
|
# Therefore L1 should reference the label, not the level
|
assert_label_reference(df_ambig, ["L1"], axis=axis)
|
|
# df has an on-axis level named L2 and it is not ambiguous
|
# Therefore L2 is an level reference
|
assert_level_reference(df_ambig, ["L2"], axis=axis)
|
|
# df has a column named L3 and it not an level reference
|
assert_label_reference(df_ambig, ["L3"], axis=axis)
|
|
|
# Series
|
# ------
|
def test_is_level_reference_series_simple_axis0(df):
|
# Make series with L1 as index
|
s = df.set_index("L1").L2
|
assert_level_reference(s, ["L1"], axis=0)
|
assert not s._is_level_reference("L2")
|
|
# Make series with L1 and L2 as index
|
s = df.set_index(["L1", "L2"]).L3
|
assert_level_reference(s, ["L1", "L2"], axis=0)
|
assert not s._is_level_reference("L3")
|
|
|
def test_is_level_reference_series_axis1_error(df):
|
# Make series with L1 as index
|
s = df.set_index("L1").L2
|
|
with pytest.raises(ValueError, match="No axis named 1"):
|
s._is_level_reference("L1", axis=1)
|
|
|
# Test _check_label_or_level_ambiguity_df
|
# =======================================
|
|
|
# DataFrame
|
# ---------
|
def test_check_label_or_level_ambiguity_df(df_ambig, axis):
|
axis = df_ambig._get_axis_number(axis)
|
# Transpose frame if axis == 1
|
if axis == 1:
|
df_ambig = df_ambig.T
|
msg = "'L1' is both a column level and an index label"
|
|
else:
|
msg = "'L1' is both an index level and a column label"
|
# df_ambig has both an on-axis level and off-axis label named L1
|
# Therefore, L1 is ambiguous.
|
with pytest.raises(ValueError, match=msg):
|
df_ambig._check_label_or_level_ambiguity("L1", axis=axis)
|
|
# df_ambig has an on-axis level named L2,, and it is not ambiguous.
|
df_ambig._check_label_or_level_ambiguity("L2", axis=axis)
|
|
# df_ambig has an off-axis label named L3, and it is not ambiguous
|
assert not df_ambig._check_label_or_level_ambiguity("L3", axis=axis)
|
|
|
# Series
|
# ------
|
def test_check_label_or_level_ambiguity_series(df):
|
# A series has no columns and therefore references are never ambiguous
|
|
# Make series with L1 as index
|
s = df.set_index("L1").L2
|
s._check_label_or_level_ambiguity("L1", axis=0)
|
s._check_label_or_level_ambiguity("L2", axis=0)
|
|
# Make series with L1 and L2 as index
|
s = df.set_index(["L1", "L2"]).L3
|
s._check_label_or_level_ambiguity("L1", axis=0)
|
s._check_label_or_level_ambiguity("L2", axis=0)
|
s._check_label_or_level_ambiguity("L3", axis=0)
|
|
|
def test_check_label_or_level_ambiguity_series_axis1_error(df):
|
# Make series with L1 as index
|
s = df.set_index("L1").L2
|
|
with pytest.raises(ValueError, match="No axis named 1"):
|
s._check_label_or_level_ambiguity("L1", axis=1)
|
|
|
# Test _get_label_or_level_values
|
# ===============================
|
def assert_label_values(frame, labels, axis):
|
axis = frame._get_axis_number(axis)
|
for label in labels:
|
if axis == 0:
|
expected = frame[label]._values
|
else:
|
expected = frame.loc[label]._values
|
|
result = frame._get_label_or_level_values(label, axis=axis)
|
assert array_equivalent(expected, result)
|
|
|
def assert_level_values(frame, levels, axis):
|
axis = frame._get_axis_number(axis)
|
for level in levels:
|
if axis == 0:
|
expected = frame.index.get_level_values(level=level)._values
|
else:
|
expected = frame.columns.get_level_values(level=level)._values
|
|
result = frame._get_label_or_level_values(level, axis=axis)
|
assert array_equivalent(expected, result)
|
|
|
# DataFrame
|
# ---------
|
def test_get_label_or_level_values_df_simple(df_levels, axis):
|
# Compute expected labels and levels
|
expected_labels, expected_levels = get_labels_levels(df_levels)
|
|
axis = df_levels._get_axis_number(axis)
|
# Transpose frame if axis == 1
|
if axis == 1:
|
df_levels = df_levels.T
|
|
# Perform checks
|
assert_label_values(df_levels, expected_labels, axis=axis)
|
assert_level_values(df_levels, expected_levels, axis=axis)
|
|
|
def test_get_label_or_level_values_df_ambig(df_ambig, axis):
|
axis = df_ambig._get_axis_number(axis)
|
# Transpose frame if axis == 1
|
if axis == 1:
|
df_ambig = df_ambig.T
|
|
# df has an on-axis level named L2, and it is not ambiguous.
|
assert_level_values(df_ambig, ["L2"], axis=axis)
|
|
# df has an off-axis label named L3, and it is not ambiguous.
|
assert_label_values(df_ambig, ["L3"], axis=axis)
|
|
|
def test_get_label_or_level_values_df_duplabels(df_duplabels, axis):
|
axis = df_duplabels._get_axis_number(axis)
|
# Transpose frame if axis == 1
|
if axis == 1:
|
df_duplabels = df_duplabels.T
|
|
# df has unambiguous level 'L1'
|
assert_level_values(df_duplabels, ["L1"], axis=axis)
|
|
# df has unique label 'L3'
|
assert_label_values(df_duplabels, ["L3"], axis=axis)
|
|
# df has duplicate labels 'L2'
|
if axis == 0:
|
expected_msg = "The column label 'L2' is not unique"
|
else:
|
expected_msg = "The index label 'L2' is not unique"
|
|
with pytest.raises(ValueError, match=expected_msg):
|
assert_label_values(df_duplabels, ["L2"], axis=axis)
|
|
|
# Series
|
# ------
|
def test_get_label_or_level_values_series_axis0(df):
|
# Make series with L1 as index
|
s = df.set_index("L1").L2
|
assert_level_values(s, ["L1"], axis=0)
|
|
# Make series with L1 and L2 as index
|
s = df.set_index(["L1", "L2"]).L3
|
assert_level_values(s, ["L1", "L2"], axis=0)
|
|
|
def test_get_label_or_level_values_series_axis1_error(df):
|
# Make series with L1 as index
|
s = df.set_index("L1").L2
|
|
with pytest.raises(ValueError, match="No axis named 1"):
|
s._get_label_or_level_values("L1", axis=1)
|
|
|
# Test _drop_labels_or_levels
|
# ===========================
|
def assert_labels_dropped(frame, labels, axis):
|
axis = frame._get_axis_number(axis)
|
for label in labels:
|
df_dropped = frame._drop_labels_or_levels(label, axis=axis)
|
|
if axis == 0:
|
assert label in frame.columns
|
assert label not in df_dropped.columns
|
else:
|
assert label in frame.index
|
assert label not in df_dropped.index
|
|
|
def assert_levels_dropped(frame, levels, axis):
|
axis = frame._get_axis_number(axis)
|
for level in levels:
|
df_dropped = frame._drop_labels_or_levels(level, axis=axis)
|
|
if axis == 0:
|
assert level in frame.index.names
|
assert level not in df_dropped.index.names
|
else:
|
assert level in frame.columns.names
|
assert level not in df_dropped.columns.names
|
|
|
# DataFrame
|
# ---------
|
def test_drop_labels_or_levels_df(df_levels, axis):
|
# Compute expected labels and levels
|
expected_labels, expected_levels = get_labels_levels(df_levels)
|
|
axis = df_levels._get_axis_number(axis)
|
# Transpose frame if axis == 1
|
if axis == 1:
|
df_levels = df_levels.T
|
|
# Perform checks
|
assert_labels_dropped(df_levels, expected_labels, axis=axis)
|
assert_levels_dropped(df_levels, expected_levels, axis=axis)
|
|
with pytest.raises(ValueError, match="not valid labels or levels"):
|
df_levels._drop_labels_or_levels("L4", axis=axis)
|
|
|
# Series
|
# ------
|
def test_drop_labels_or_levels_series(df):
|
# Make series with L1 as index
|
s = df.set_index("L1").L2
|
assert_levels_dropped(s, ["L1"], axis=0)
|
|
with pytest.raises(ValueError, match="not valid labels or levels"):
|
s._drop_labels_or_levels("L4", axis=0)
|
|
# Make series with L1 and L2 as index
|
s = df.set_index(["L1", "L2"]).L3
|
assert_levels_dropped(s, ["L1", "L2"], axis=0)
|
|
with pytest.raises(ValueError, match="not valid labels or levels"):
|
s._drop_labels_or_levels("L4", axis=0)
|