Skip to content

Commit 98eb09e

Browse files
Revert scikit-learn#10558 Deprecate axis parameter in imputer (scikit-learn#10635)
1 parent 34f12da commit 98eb09e

File tree

5 files changed

+21
-60
lines changed

5 files changed

+21
-60
lines changed

doc/modules/preprocessing.rst

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -614,10 +614,9 @@ that contain the missing values::
614614

615615
>>> import numpy as np
616616
>>> from sklearn.preprocessing import Imputer
617-
>>> imp = Imputer(missing_values='NaN', strategy='mean')
617+
>>> imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
618618
>>> imp.fit([[1, 2], [np.nan, 3], [7, 6]])
619-
Imputer(axis=None, copy=True, missing_values='NaN', strategy='mean',
620-
verbose=0)
619+
Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)
621620
>>> X = [[np.nan, 2], [6, np.nan], [7, 6]]
622621
>>> print(imp.transform(X)) # doctest: +ELLIPSIS
623622
[[ 4. 2. ]
@@ -628,9 +627,9 @@ The :class:`Imputer` class also supports sparse matrices::
628627

629628
>>> import scipy.sparse as sp
630629
>>> X = sp.csc_matrix([[1, 2], [0, 3], [7, 6]])
631-
>>> imp = Imputer(missing_values=0, strategy='mean')
630+
>>> imp = Imputer(missing_values=0, strategy='mean', axis=0)
632631
>>> imp.fit(X)
633-
Imputer(axis=None, copy=True, missing_values=0, strategy='mean', verbose=0)
632+
Imputer(axis=0, copy=True, missing_values=0, strategy='mean', verbose=0)
634633
>>> X_test = sp.csc_matrix([[0, 2], [6, 0], [7, 6]])
635634
>>> print(imp.transform(X_test)) # doctest: +ELLIPSIS
636635
[[ 4. 2. ]

doc/whats_new/v0.20.rst

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -389,12 +389,6 @@ Outlier Detection models
389389
``raw_values`` parameter is deprecated as the shifted Mahalanobis distance
390390
will be always returned in 0.22. :issue:`9015` by `Nicolas Goix`_.
391391

392-
Preprocessing
393-
394-
- Deprecate ``axis`` parameter in :func:`preprocessing.Imputer`.
395-
:issue:`10558` by :user:`Baze Petrushev <petrushev>` and
396-
:user:`Hanmin Qin <qinhanmin2014>`.
397-
398392
Misc
399393

400394
- Changed warning type from UserWarning to ConvergenceWarning for failing

examples/plot_missing_values.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@
6565
X_missing[np.where(missing_samples)[0], missing_features] = 0
6666
y_missing = y_full.copy()
6767
estimator = Pipeline([("imputer", Imputer(missing_values=0,
68-
strategy="mean")),
68+
strategy="mean",
69+
axis=0)),
6970
("forest", RandomForestRegressor(random_state=0,
7071
n_estimators=100))])
7172
score = cross_val_score(estimator, X_missing, y_missing).mean()

sklearn/preprocessing/imputation.py

Lines changed: 15 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -82,19 +82,12 @@ class Imputer(BaseEstimator, TransformerMixin):
8282
- If "most_frequent", then replace missing using the most frequent
8383
value along the axis.
8484
85-
axis : integer, optional (default=None)
85+
axis : integer, optional (default=0)
8686
The axis along which to impute.
8787
8888
- If `axis=0`, then impute along columns.
8989
- If `axis=1`, then impute along rows.
9090
91-
.. deprecated:: 0.20
92-
Parameter ``axis`` has been deprecated in 0.20 and will be removed
93-
in 0.22. Future (and default) behavior is equivalent to ``axis=0``
94-
(impute along columns). Row-wise imputation can be performed with
95-
FunctionTransformer (e.g.,
96-
``FunctionTransformer(lambda X: Imputer().fit_transform(X.T).T)``).
97-
9891
verbose : integer, optional (default=0)
9992
Controls the verbosity of the imputer.
10093
@@ -122,7 +115,7 @@ class Imputer(BaseEstimator, TransformerMixin):
122115
contain missing values).
123116
"""
124117
def __init__(self, missing_values="NaN", strategy="mean",
125-
axis=None, verbose=0, copy=True):
118+
axis=0, verbose=0, copy=True):
126119
self.missing_values = missing_values
127120
self.strategy = strategy
128121
self.axis = axis
@@ -149,37 +142,27 @@ def fit(self, X, y=None):
149142
" got strategy={1}".format(allowed_strategies,
150143
self.strategy))
151144

152-
if self.axis is None:
153-
self._axis = 0
154-
else:
155-
warnings.warn("Parameter 'axis' has been deprecated in 0.20 and "
156-
"will be removed in 0.22. Future (and default) "
157-
"behavior is equivalent to 'axis=0' (impute along "
158-
"columns). Row-wise imputation can be performed "
159-
"with FunctionTransformer.", DeprecationWarning)
160-
self._axis = self.axis
161-
162-
if self._axis not in [0, 1]:
145+
if self.axis not in [0, 1]:
163146
raise ValueError("Can only impute missing values on axis 0 and 1, "
164-
" got axis={0}".format(self._axis))
147+
" got axis={0}".format(self.axis))
165148

166149
# Since two different arrays can be provided in fit(X) and
167150
# transform(X), the imputation data will be computed in transform()
168151
# when the imputation is done per sample (i.e., when axis=1).
169-
if self._axis == 0:
152+
if self.axis == 0:
170153
X = check_array(X, accept_sparse='csc', dtype=np.float64,
171154
force_all_finite=False)
172155

173156
if sparse.issparse(X):
174157
self.statistics_ = self._sparse_fit(X,
175158
self.strategy,
176159
self.missing_values,
177-
self._axis)
160+
self.axis)
178161
else:
179162
self.statistics_ = self._dense_fit(X,
180163
self.strategy,
181164
self.missing_values,
182-
self._axis)
165+
self.axis)
183166

184167
return self
185168

@@ -322,7 +305,7 @@ def transform(self, X):
322305
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
323306
The input data to complete.
324307
"""
325-
if self._axis == 0:
308+
if self.axis == 0:
326309
check_is_fitted(self, 'statistics_')
327310
X = check_array(X, accept_sparse='csc', dtype=FLOAT_DTYPES,
328311
force_all_finite=False, copy=self.copy)
@@ -342,27 +325,27 @@ def transform(self, X):
342325
statistics = self._sparse_fit(X,
343326
self.strategy,
344327
self.missing_values,
345-
self._axis)
328+
self.axis)
346329

347330
else:
348331
statistics = self._dense_fit(X,
349332
self.strategy,
350333
self.missing_values,
351-
self._axis)
334+
self.axis)
352335

353336
# Delete the invalid rows/columns
354337
invalid_mask = np.isnan(statistics)
355338
valid_mask = np.logical_not(invalid_mask)
356339
valid_statistics = statistics[valid_mask]
357340
valid_statistics_indexes = np.where(valid_mask)[0]
358-
missing = np.arange(X.shape[not self._axis])[invalid_mask]
341+
missing = np.arange(X.shape[not self.axis])[invalid_mask]
359342

360-
if self._axis == 0 and invalid_mask.any():
343+
if self.axis == 0 and invalid_mask.any():
361344
if self.verbose:
362345
warnings.warn("Deleting features without "
363346
"observed values: %s" % missing)
364347
X = X[:, valid_statistics_indexes]
365-
elif self._axis == 1 and invalid_mask.any():
348+
elif self.axis == 1 and invalid_mask.any():
366349
raise ValueError("Some rows only contain "
367350
"missing values: %s" % missing)
368351

@@ -379,10 +362,10 @@ def transform(self, X):
379362
X = X.toarray()
380363

381364
mask = _get_mask(X, self.missing_values)
382-
n_missing = np.sum(mask, axis=self._axis)
365+
n_missing = np.sum(mask, axis=self.axis)
383366
values = np.repeat(valid_statistics, n_missing)
384367

385-
if self._axis == 0:
368+
if self.axis == 0:
386369
coordinates = np.where(mask.transpose())[::-1]
387370
else:
388371
coordinates = mask

sklearn/preprocessing/tests/test_imputation.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@
77
from sklearn.utils.testing import assert_array_almost_equal
88
from sklearn.utils.testing import assert_raises
99
from sklearn.utils.testing import assert_false
10-
from sklearn.utils.testing import assert_warns_message
11-
from sklearn.utils.testing import ignore_warnings
1210

1311
from sklearn.preprocessing.imputation import Imputer
1412
from sklearn.pipeline import Pipeline
@@ -17,7 +15,6 @@
1715
from sklearn.random_projection import sparse_random_matrix
1816

1917

20-
@ignore_warnings(category=DeprecationWarning) # To be removed in 0.22
2118
def _check_statistics(X, X_true,
2219
strategy, statistics, missing_values):
2320
"""Utility function for testing imputation for a given strategy.
@@ -301,7 +298,6 @@ def test_imputation_pickle():
301298
)
302299

303300

304-
@ignore_warnings(category=DeprecationWarning) # To be removed in 0.22
305301
def test_imputation_copy():
306302
# Test imputation with copy
307303
X_orig = sparse_random_matrix(5, 5, density=0.75, random_state=0)
@@ -368,15 +364,3 @@ def test_imputation_copy():
368364

369365
# Note: If X is sparse and if missing_values=0, then a (dense) copy of X is
370366
# made, even if copy=False.
371-
372-
373-
def test_deprecated_imputer_axis():
374-
depr_message = ("Parameter 'axis' has been deprecated in 0.20 and will "
375-
"be removed in 0.22. Future (and default) behavior is "
376-
"equivalent to 'axis=0' (impute along columns). Row-wise "
377-
"imputation can be performed with FunctionTransformer.")
378-
X = sparse_random_matrix(5, 5, density=0.75, random_state=0)
379-
imputer = Imputer(missing_values=0, axis=0)
380-
assert_warns_message(DeprecationWarning, depr_message, imputer.fit, X)
381-
imputer = Imputer(missing_values=0, axis=1)
382-
assert_warns_message(DeprecationWarning, depr_message, imputer.fit, X)

0 commit comments

Comments
 (0)