1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
| """
| Tests the usecols functionality during parsing
| for all of the parsers defined in parsers.py
| """
| from io import StringIO
|
| import pytest
|
| from pandas import DataFrame
| import pandas._testing as tm
|
| _msg_validate_usecols_arg = (
| "'usecols' must either be list-like "
| "of all strings, all unicode, all "
| "integers or a callable."
| )
| _msg_validate_usecols_names = (
| "Usecols do not match columns, columns expected but not found: {0}"
| )
|
|
| def test_usecols_with_unicode_strings(all_parsers):
| # see gh-13219
| data = """AAA,BBB,CCC,DDD
| 0.056674973,8,True,a
| 2.613230982,2,False,b
| 3.568935038,7,False,a"""
| parser = all_parsers
|
| exp_data = {
| "AAA": {
| 0: 0.056674972999999997,
| 1: 2.6132309819999997,
| 2: 3.5689350380000002,
| },
| "BBB": {0: 8, 1: 2, 2: 7},
| }
| expected = DataFrame(exp_data)
|
| result = parser.read_csv(StringIO(data), usecols=["AAA", "BBB"])
| tm.assert_frame_equal(result, expected)
|
|
| def test_usecols_with_single_byte_unicode_strings(all_parsers):
| # see gh-13219
| data = """A,B,C,D
| 0.056674973,8,True,a
| 2.613230982,2,False,b
| 3.568935038,7,False,a"""
| parser = all_parsers
|
| exp_data = {
| "A": {
| 0: 0.056674972999999997,
| 1: 2.6132309819999997,
| 2: 3.5689350380000002,
| },
| "B": {0: 8, 1: 2, 2: 7},
| }
| expected = DataFrame(exp_data)
|
| result = parser.read_csv(StringIO(data), usecols=["A", "B"])
| tm.assert_frame_equal(result, expected)
|
|
| @pytest.mark.parametrize("usecols", [["AAA", b"BBB"], [b"AAA", "BBB"]])
| def test_usecols_with_mixed_encoding_strings(all_parsers, usecols):
| data = """AAA,BBB,CCC,DDD
| 0.056674973,8,True,a
| 2.613230982,2,False,b
| 3.568935038,7,False,a"""
| parser = all_parsers
|
| with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
| parser.read_csv(StringIO(data), usecols=usecols)
|
|
| @pytest.mark.parametrize("usecols", [["あああ", "いい"], ["あああ", "いい"]])
| def test_usecols_with_multi_byte_characters(all_parsers, usecols):
| data = """あああ,いい,ううう,ええええ
| 0.056674973,8,True,a
| 2.613230982,2,False,b
| 3.568935038,7,False,a"""
| parser = all_parsers
|
| exp_data = {
| "あああ": {
| 0: 0.056674972999999997,
| 1: 2.6132309819999997,
| 2: 3.5689350380000002,
| },
| "いい": {0: 8, 1: 2, 2: 7},
| }
| expected = DataFrame(exp_data)
|
| result = parser.read_csv(StringIO(data), usecols=usecols)
| tm.assert_frame_equal(result, expected)
|
|