1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
from io import StringIO
from string import ascii_uppercase as uppercase
import textwrap
 
import numpy as np
import pytest
 
from pandas.compat import PYPY
 
from pandas import (
    CategoricalIndex,
    MultiIndex,
    Series,
    date_range,
)
 
 
def test_info_categorical_column_just_works():
    n = 2500
    data = np.array(list("abcdefghij")).take(np.random.randint(0, 10, size=n))
    s = Series(data).astype("category")
    s.isna()
    buf = StringIO()
    s.info(buf=buf)
 
    s2 = s[s == "d"]
    buf = StringIO()
    s2.info(buf=buf)
 
 
def test_info_categorical():
    # GH14298
    idx = CategoricalIndex(["a", "b"])
    s = Series(np.zeros(2), index=idx)
    buf = StringIO()
    s.info(buf=buf)
 
 
@pytest.mark.parametrize("verbose", [True, False])
def test_info_series(lexsorted_two_level_string_multiindex, verbose):
    index = lexsorted_two_level_string_multiindex
    ser = Series(range(len(index)), index=index, name="sth")
    buf = StringIO()
    ser.info(verbose=verbose, buf=buf)
    result = buf.getvalue()
 
    expected = textwrap.dedent(
        """\
        <class 'pandas.core.series.Series'>
        MultiIndex: 10 entries, ('foo', 'one') to ('qux', 'three')
        """
    )
    if verbose:
        expected += textwrap.dedent(
            """\
            Series name: sth
            Non-Null Count  Dtype
            --------------  -----
            10 non-null     int64
            """
        )
    expected += textwrap.dedent(
        f"""\
        dtypes: int64(1)
        memory usage: {ser.memory_usage()}.0+ bytes
        """
    )
    assert result == expected
 
 
def test_info_memory():
    s = Series([1, 2], dtype="i8")
    buf = StringIO()
    s.info(buf=buf)
    result = buf.getvalue()
    memory_bytes = float(s.memory_usage())
    expected = textwrap.dedent(
        f"""\
    <class 'pandas.core.series.Series'>
    RangeIndex: 2 entries, 0 to 1
    Series name: None
    Non-Null Count  Dtype
    --------------  -----
    2 non-null      int64
    dtypes: int64(1)
    memory usage: {memory_bytes} bytes
    """
    )
    assert result == expected
 
 
def test_info_wide():
    s = Series(np.random.randn(101))
    msg = "Argument `max_cols` can only be passed in DataFrame.info, not Series.info"
    with pytest.raises(ValueError, match=msg):
        s.info(max_cols=1)
 
 
def test_info_shows_dtypes():
    dtypes = [
        "int64",
        "float64",
        "datetime64[ns]",
        "timedelta64[ns]",
        "complex128",
        "object",
        "bool",
    ]
    n = 10
    for dtype in dtypes:
        s = Series(np.random.randint(2, size=n).astype(dtype))
        buf = StringIO()
        s.info(buf=buf)
        res = buf.getvalue()
        name = f"{n:d} non-null     {dtype}"
        assert name in res
 
 
@pytest.mark.xfail(PYPY, reason="on PyPy deep=True doesn't change result")
def test_info_memory_usage_deep_not_pypy():
    s_with_object_index = Series({"a": [1]}, index=["foo"])
    assert s_with_object_index.memory_usage(
        index=True, deep=True
    ) > s_with_object_index.memory_usage(index=True)
 
    s_object = Series({"a": ["a"]})
    assert s_object.memory_usage(deep=True) > s_object.memory_usage()
 
 
@pytest.mark.xfail(not PYPY, reason="on PyPy deep=True does not change result")
def test_info_memory_usage_deep_pypy():
    s_with_object_index = Series({"a": [1]}, index=["foo"])
    assert s_with_object_index.memory_usage(
        index=True, deep=True
    ) == s_with_object_index.memory_usage(index=True)
 
    s_object = Series({"a": ["a"]})
    assert s_object.memory_usage(deep=True) == s_object.memory_usage()
 
 
@pytest.mark.parametrize(
    "series, plus",
    [
        (Series(1, index=[1, 2, 3]), False),
        (Series(1, index=list("ABC")), True),
        (Series(1, index=MultiIndex.from_product([range(3), range(3)])), False),
        (
            Series(1, index=MultiIndex.from_product([range(3), ["foo", "bar"]])),
            True,
        ),
    ],
)
def test_info_memory_usage_qualified(series, plus):
    buf = StringIO()
    series.info(buf=buf)
    if plus:
        assert "+" in buf.getvalue()
    else:
        assert "+" not in buf.getvalue()
 
 
def test_info_memory_usage_bug_on_multiindex():
    # GH 14308
    # memory usage introspection should not materialize .values
    N = 100
    M = len(uppercase)
    index = MultiIndex.from_product(
        [list(uppercase), date_range("20160101", periods=N)],
        names=["id", "date"],
    )
    s = Series(np.random.randn(N * M), index=index)
 
    unstacked = s.unstack("id")
    assert s.values.nbytes == unstacked.values.nbytes
    assert s.memory_usage(deep=True) > unstacked.memory_usage(deep=True).sum()
 
    # high upper bound
    diff = unstacked.memory_usage(deep=True).sum() - s.memory_usage(deep=True)
    assert diff < 2000