Skip to content

Commit dd00e0f

Browse files
BUG: Fix precision loss in DataFrame.combine_first (pandas-dev#62814)
1 parent 9991295 commit dd00e0f

File tree

3 files changed

+18
-12
lines changed

3 files changed

+18
-12
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1014,6 +1014,7 @@ Numeric
10141014
^^^^^^^
10151015
- Bug in :func:`api.types.infer_dtype` returning "mixed" for complex and ``pd.NA`` mix (:issue:`61976`)
10161016
- Bug in :func:`api.types.infer_dtype` returning "mixed-integer-float" for float and ``pd.NA`` mix (:issue:`61621`)
1017+
- Bug in :meth:`DataFrame.combine_first` where Int64 and UInt64 integers with absolute value greater than ``2**53`` would lose precision after the operation. (:issue:`60128`)
10171018
- Bug in :meth:`DataFrame.corr` where numerical precision errors resulted in correlations above ``1.0`` (:issue:`61120`)
10181019
- Bug in :meth:`DataFrame.cov` raises a ``TypeError`` instead of returning potentially incorrect results or other errors (:issue:`53115`)
10191020
- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`)

pandas/core/frame.py

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9165,20 +9165,10 @@ def combine_first(self, other: DataFrame) -> DataFrame:
91659165
1 0.0 3.0 1.0
91669166
2 NaN 3.0 1.0
91679167
"""
9168-
from pandas.core.computation import expressions
91699168

91709169
def combiner(x: Series, y: Series):
9171-
mask = x.isna()._values
9172-
9173-
x_values = x._values
9174-
y_values = y._values
9175-
9176-
# If the column y in other DataFrame is not in first DataFrame,
9177-
# just return y_values.
9178-
if y.name not in self.columns:
9179-
return y_values
9180-
9181-
return expressions.where(mask, y_values, x_values)
9170+
# GH#60128 The combiner is supposed to preserve EA Dtypes.
9171+
return y if y.name not in self.columns else y.where(x.isna(), x)
91829172

91839173
if len(other) == 0:
91849174
combined = self.reindex(

pandas/tests/frame/methods/test_combine_first.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,21 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype):
398398
).set_index(["a", "b"])
399399
tm.assert_frame_equal(result, expected)
400400

401+
@pytest.mark.parametrize(
402+
"wide_val, dtype",
403+
(
404+
(1666880195890293744, "UInt64"),
405+
(-1666880195890293744, "Int64"),
406+
),
407+
)
408+
def test_combine_first_preserve_EA_precision(self, wide_val, dtype):
409+
# GH#60128
410+
df1 = DataFrame({"A": [wide_val, 5]}, dtype=dtype)
411+
df2 = DataFrame({"A": [6, 7, wide_val]}, dtype=dtype)
412+
result = df1.combine_first(df2)
413+
expected = DataFrame({"A": [wide_val, 5, wide_val]}, dtype=dtype)
414+
tm.assert_frame_equal(result, expected)
415+
401416

402417
@pytest.mark.parametrize(
403418
"scalar1, scalar2",

0 commit comments

Comments
 (0)