pandas-dev · angela-tarantula · Oct 14, 2025 · Oct 15, 2025 · Oct 15, 2025 · Oct 15, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -992,6 +992,7 @@ Numeric
 ^^^^^^^
 - Bug in :func:`api.types.infer_dtype` returning "mixed" for complex and ``pd.NA`` mix (:issue:`61976`)
 - Bug in :func:`api.types.infer_dtype` returning "mixed-integer-float" for float and ``pd.NA`` mix (:issue:`61621`)
+- Bug in :meth:`DataFrame.combine` and :meth:`DataFrame.combine_first` where integers with absolute value greater than ``2**53`` could lose precision after the operation. (:issue:`60128`)
 - Bug in :meth:`DataFrame.corr` where numerical precision errors resulted in correlations above ``1.0`` (:issue:`61120`)
 - Bug in :meth:`DataFrame.cov` raises a ``TypeError`` instead of returning potentially incorrect results or other errors (:issue:`53115`)
 - Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -144,6 +144,10 @@
     PeriodArray,
     TimedeltaArray,
 )
+from pandas.core.arrays.integer import (
+    Int64Dtype,
+    UInt64Dtype,
+)
 from pandas.core.arrays.sparse import SparseFrameAccessor
 from pandas.core.arrays.string_ import StringDtype
 from pandas.core.construction import (
@@ -9025,6 +9029,67 @@ def combine(
         1  0.0  3.0 1.0
         2  NaN  3.0 1.0
         """
+
+        # GH#60128 Integers n where |n| > 2**53 would lose precision after align
+        # upcasts them to float. Avoid lossy conversion by preemptively promoting
+        # int64 and uint64 to their nullable ExtensionDtypes, Int64 and UInt64.
+        def _ensure_nullable_int64_dtypes(df: DataFrame) -> DataFrame:
+            """Promote int64/uint64 DataFrame columns to Int64/UInt64."""
+            cast_map: dict[IndexLabel, DtypeObj] = {}
+            for col, dt in df.dtypes.items():
+                if dt == np.int64:
+                    cast_map[col] = Int64Dtype()
+                elif dt == np.uint64:
+                    cast_map[col] = UInt64Dtype()
+
+            if cast_map:
+                df = df.astype(cast_map)
+            return df
+
+        # To maintain backwards compatibility, downcast the pre-promoted int64
+        # columns of the combined DataFrame back to how they would have resolved.
+        # Consider just embracing nullable ExtensionDtypes instead, though.
+        def _revert_int64_dtype_promotion(
+            self_orig: DataFrame, other_orig: DataFrame, combined_df: DataFrame
+        ) -> DataFrame:
+            """Resolve the combined dtypes according to the original dtypes."""
+            cast_map: dict[IndexLabel, DtypeObj] = {}
+            for col in combined_df.columns:
+                ser = combined_df[col]
+                orig_dt_self = self_orig.dtypes.get(col)
+                orig_dt_other = other_orig.dtypes.get(col)
+
+                was_promoted = (orig_dt_self in [np.int64, np.uint64]) or (
+                    orig_dt_other in [np.int64, np.uint64]
+                )
+
+                if was_promoted:
+                    dtypes_to_resolve = [
+                        dt for dt in (orig_dt_self, orig_dt_other) if dt is not None
+                    ]
+                    if dtypes_to_resolve:
+                        if isna(ser).any():
+                            # If there are NAs, we can't safely downcast back
+                            # to int. Previously, we left the data as float64.
+                            # However, converting large integers to float can
+                            # lose precision, even if it's not immediately
+                            # obvious (since we don't cast back). Consider
+                            # embracing nullable ExtensionDtypes instead
+                            # and dropping this whole restoration step.
+                            dtypes_to_resolve.append(np.dtype(np.float64))
+                        target_type = find_common_type(dtypes_to_resolve)
+                        cast_map[col] = target_type
+
+            if cast_map:
+                combined_df = combined_df.astype(cast_map)
+            return combined_df
+
+        # store originals and prepare for align
+        self_orig = self
+        other_orig = other
+        self = _ensure_nullable_int64_dtypes(self)
+        other = _ensure_nullable_int64_dtypes(other)
+
         other_idxlen = len(other.index)  # save for compare
         other_columns = other.columns
 
@@ -9092,6 +9157,9 @@ def combine(
 
         # convert_objects just in case
         frame_result = self._constructor(result, index=new_index, columns=new_columns)
+        frame_result = _revert_int64_dtype_promotion(
+            self_orig, other_orig, frame_result
+        )
         return frame_result.__finalize__(self, method="combine")
 
     def combine_first(self, other: DataFrame) -> DataFrame:
@@ -9141,20 +9209,10 @@ def combine_first(self, other: DataFrame) -> DataFrame:
         1  0.0  3.0  1.0
         2  NaN  3.0  1.0
         """
-        from pandas.core.computation import expressions
 
         def combiner(x: Series, y: Series):
-            mask = x.isna()._values
-
-            x_values = x._values
-            y_values = y._values
-
-            # If the column y in other DataFrame is not in first DataFrame,
-            # just return y_values.
-            if y.name not in self.columns:
-                return y_values
-
-            return expressions.where(mask, y_values, x_values)
+            # GH#60128 The combiner is supposed to preserve EA Dtypes.
+            return y if y.name not in self.columns else y.where(x.isna(), x)
 
         if len(other) == 0:
             combined = self.reindex(

diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
@@ -398,6 +398,21 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype):
         ).set_index(["a", "b"])
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "wide_val, dtype",
+        (
+            (1666880195890293744, "uint64"),
+            (-1666880195890293744, "int64"),
+        ),
+    )
+    def test_combine_first_preserve_precision(self, wide_val, dtype):
+        # GH#60128
+        df1 = DataFrame({"A": [wide_val, 5]}, dtype=dtype)
+        df2 = DataFrame({"A": [6, 7, wide_val]}, dtype=dtype)
+        result = df1.combine_first(df2)
+        expected = DataFrame({"A": [wide_val, 5, wide_val]}, dtype=dtype)
+        tm.assert_frame_equal(result, expected)
+
 
 @pytest.mark.parametrize(
     "scalar1, scalar2",