Skip to content

Commit ce3298b

Browse files
authored
BUG: concat([ints, bools]) incorrect casting (pandas-dev#62889)
1 parent 1e09c37 commit ce3298b

File tree

4 files changed

+39
-0
lines changed

4 files changed

+39
-0
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1176,6 +1176,7 @@ Groupby/resample/rolling
11761176

11771177
Reshaping
11781178
^^^^^^^^^
1179+
- Bug in :func:`concat` with mixed integer and bool dtypes incorrectly casting the bools to integers (:issue:`45101`)
11791180
- Bug in :func:`qcut` where values at the quantile boundaries could be incorrectly assigned (:issue:`59355`)
11801181
- Bug in :meth:`DataFrame.combine_first` not preserving the column order (:issue:`60427`)
11811182
- Bug in :meth:`DataFrame.explode` producing incorrect result for :class:`pyarrow.large_list` type (:issue:`61091`)

pandas/core/dtypes/concat.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,10 @@ def _get_result_dtype(
161161
# coerce to object
162162
target_dtype = np.dtype(object)
163163
kinds = {"o"}
164+
elif "b" in kinds and len(kinds) > 1:
165+
# GH#21108, GH#45101
166+
target_dtype = np.dtype(object)
167+
kinds = {"o"}
164168
else:
165169
# error: Argument 1 to "np_find_common_type" has incompatible type
166170
# "*Set[Union[ExtensionDtype, Any]]"; expected "dtype[Any]"

pandas/tests/reshape/concat/test_dataframe.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,28 @@
1212

1313

1414
class TestDataFrameConcat:
15+
@pytest.mark.xfail(reason="GH#62888 the `mi[2][1] is 1` check fails")
16+
def test_concat_multiindex_level_bool_and_numeric(self):
17+
# GH#21108, GH#45101
18+
left = DataFrame([123, 456], columns=["data"], index=[True, False])
19+
right = DataFrame(
20+
[55, 983, 69, 112, 0], columns=["data"], index=[1, 2, 3, 4, 99]
21+
)
22+
result = concat({"One": left, "Two": right})
23+
24+
# in particular, the first two entries should not be cast to ints, the
25+
# other 1 should not cast to True
26+
mi = pd.MultiIndex.from_arrays(
27+
[
28+
["One"] * 2 + ["Two"] * 5,
29+
np.array([True, False, 1, 2, 3, 4, 99], dtype=object),
30+
],
31+
)
32+
assert mi[0][1] is True
33+
assert type(mi[2][1]) is int
34+
expected = DataFrame({"data": [123, 456, 55, 983, 69, 112, 0]}, index=mi)
35+
tm.assert_frame_equal(result, expected)
36+
1537
def test_concat_multiple_frames_dtypes(self):
1638
# GH#2759
1739
df1 = DataFrame(data=np.ones((10, 2)), columns=["foo", "bar"], dtype=np.float64)

pandas/tests/reshape/concat/test_series.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,18 @@
1414

1515

1616
class TestSeriesConcat:
17+
@pytest.mark.parametrize("bool_dtype", [bool, "boolean"])
18+
@pytest.mark.parametrize("dtype", [np.int64, np.float64, "Int64", "Float64"])
19+
def test_concat_bool_and_numeric(self, bool_dtype, dtype):
20+
# GH#21108, GH#45101
21+
left = Series([True, False], dtype=bool_dtype)
22+
right = Series([1, 2], dtype=dtype)
23+
result = concat([left, right], ignore_index=True)
24+
expected = Series([True, False, 1, 2], dtype=object)
25+
assert result.iloc[0] is True
26+
assert type(result.iloc[2]) in [int, float] # i.e. not bool
27+
tm.assert_series_equal(result, expected)
28+
1729
def test_concat_series(self):
1830
ts = Series(
1931
np.arange(20, dtype=np.float64),

0 commit comments

Comments
 (0)