zmc
2023-08-08 e792e9a60d958b93aef96050644f369feb25d61b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
"""
Tests that work on both the Python and C engines but do not have a
specific classification into the other test modules.
"""
from io import StringIO
 
import pytest
 
from pandas import (
    DataFrame,
    concat,
)
import pandas._testing as tm
 
pytestmark = pytest.mark.usefixtures("pyarrow_skip")
 
 
def test_iterator(all_parsers):
    # see gh-6607
    data = """index,A,B,C,D
foo,2,3,4,5
bar,7,8,9,10
baz,12,13,14,15
qux,12,13,14,15
foo2,12,13,14,15
bar2,12,13,14,15
"""
    parser = all_parsers
    kwargs = {"index_col": 0}
 
    expected = parser.read_csv(StringIO(data), **kwargs)
    with parser.read_csv(StringIO(data), iterator=True, **kwargs) as reader:
        first_chunk = reader.read(3)
        tm.assert_frame_equal(first_chunk, expected[:3])
 
        last_chunk = reader.read(5)
    tm.assert_frame_equal(last_chunk, expected[3:])
 
 
def test_iterator2(all_parsers):
    parser = all_parsers
    data = """A,B,C
foo,1,2,3
bar,4,5,6
baz,7,8,9
"""
 
    with parser.read_csv(StringIO(data), iterator=True) as reader:
        result = list(reader)
 
    expected = DataFrame(
        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
        index=["foo", "bar", "baz"],
        columns=["A", "B", "C"],
    )
    tm.assert_frame_equal(result[0], expected)
 
 
def test_iterator_stop_on_chunksize(all_parsers):
    # gh-3967: stopping iteration when chunksize is specified
    parser = all_parsers
    data = """A,B,C
foo,1,2,3
bar,4,5,6
baz,7,8,9
"""
 
    with parser.read_csv(StringIO(data), chunksize=1) as reader:
        result = list(reader)
 
    assert len(result) == 3
    expected = DataFrame(
        [[1, 2, 3], [4, 5, 6], [7, 8, 9]],
        index=["foo", "bar", "baz"],
        columns=["A", "B", "C"],
    )
    tm.assert_frame_equal(concat(result), expected)
 
 
@pytest.mark.parametrize(
    "kwargs", [{"iterator": True, "chunksize": 1}, {"iterator": True}, {"chunksize": 1}]
)
def test_iterator_skipfooter_errors(all_parsers, kwargs):
    msg = "'skipfooter' not supported for iteration"
    parser = all_parsers
    data = "a\n1\n2"
 
    with pytest.raises(ValueError, match=msg):
        with parser.read_csv(StringIO(data), skipfooter=1, **kwargs) as _:
            pass
 
 
def test_iteration_open_handle(all_parsers):
    parser = all_parsers
    kwargs = {"header": None}
 
    with tm.ensure_clean() as path:
        with open(path, "w") as f:
            f.write("AAA\nBBB\nCCC\nDDD\nEEE\nFFF\nGGG")
 
        with open(path) as f:
            for line in f:
                if "CCC" in line:
                    break
 
            result = parser.read_csv(f, **kwargs)
            expected = DataFrame({0: ["DDD", "EEE", "FFF", "GGG"]})
            tm.assert_frame_equal(result, expected)