import numpy as np
|
import pytest
|
|
from pandas import (
|
PeriodIndex,
|
Series,
|
date_range,
|
period_range,
|
timedelta_range,
|
)
|
import pandas._testing as tm
|
|
|
class DropDuplicates:
|
def test_drop_duplicates_metadata(self, idx):
|
# GH#10115
|
result = idx.drop_duplicates()
|
tm.assert_index_equal(idx, result)
|
assert idx.freq == result.freq
|
|
idx_dup = idx.append(idx)
|
result = idx_dup.drop_duplicates()
|
|
expected = idx
|
if not isinstance(idx, PeriodIndex):
|
# freq is reset except for PeriodIndex
|
assert idx_dup.freq is None
|
assert result.freq is None
|
expected = idx._with_freq(None)
|
else:
|
assert result.freq == expected.freq
|
|
tm.assert_index_equal(result, expected)
|
|
@pytest.mark.parametrize(
|
"keep, expected, index",
|
[
|
(
|
"first",
|
np.concatenate(([False] * 10, [True] * 5)),
|
np.arange(0, 10, dtype=np.int64),
|
),
|
(
|
"last",
|
np.concatenate(([True] * 5, [False] * 10)),
|
np.arange(5, 15, dtype=np.int64),
|
),
|
(
|
False,
|
np.concatenate(([True] * 5, [False] * 5, [True] * 5)),
|
np.arange(5, 10, dtype=np.int64),
|
),
|
],
|
)
|
def test_drop_duplicates(self, keep, expected, index, idx):
|
# to check Index/Series compat
|
idx = idx.append(idx[:5])
|
|
tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected)
|
expected = idx[~expected]
|
|
result = idx.drop_duplicates(keep=keep)
|
tm.assert_index_equal(result, expected)
|
|
result = Series(idx).drop_duplicates(keep=keep)
|
expected = Series(expected, index=index)
|
tm.assert_series_equal(result, expected)
|
|
|
class TestDropDuplicatesPeriodIndex(DropDuplicates):
|
@pytest.fixture(params=["D", "3D", "H", "2H", "T", "2T", "S", "3S"])
|
def freq(self, request):
|
return request.param
|
|
@pytest.fixture
|
def idx(self, freq):
|
return period_range("2011-01-01", periods=10, freq=freq, name="idx")
|
|
|
class TestDropDuplicatesDatetimeIndex(DropDuplicates):
|
@pytest.fixture
|
def idx(self, freq_sample):
|
return date_range("2011-01-01", freq=freq_sample, periods=10, name="idx")
|
|
|
class TestDropDuplicatesTimedeltaIndex(DropDuplicates):
|
@pytest.fixture
|
def idx(self, freq_sample):
|
return timedelta_range("1 day", periods=10, freq=freq_sample, name="idx")
|