1919
2020
2121class KBinsDiscretizer (TransformerMixin , BaseEstimator ):
22- """Bin continuous data into intervals.
22+ """
23+ Bin continuous data into intervals.
2324
2425 Read more in the :ref:`User Guide <preprocessing_discretization>`.
2526
@@ -63,6 +64,27 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
6364 The edges of each bin. Contain arrays of varying shapes ``(n_bins_, )``
6465 Ignored features will have empty arrays.
6566
67+ See Also
68+ --------
69+ sklearn.preprocessing.Binarizer : Class used to bin values as ``0`` or
70+ ``1`` based on a parameter ``threshold``.
71+
72+ Notes
73+ -----
74+ In bin edges for feature ``i``, the first and last values are used only for
75+ ``inverse_transform``. During transform, bin edges are extended to::
76+
77+ np.concatenate([-np.inf, bin_edges_[i][1:-1], np.inf])
78+
79+ You can combine ``KBinsDiscretizer`` with
80+ :class:`sklearn.compose.ColumnTransformer` if you only want to preprocess
81+ part of the features.
82+
83+ ``KBinsDiscretizer`` might produce constant features (e.g., when
84+ ``encode = 'onehot'`` and certain bins do not contain any data).
85+ These features can be removed with feature selection algorithms
86+ (e.g., :class:`sklearn.feature_selection.VarianceThreshold`).
87+
6688 Examples
6789 --------
6890 >>> X = [[-2, 1, -4, -1],
@@ -91,27 +113,6 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
91113 [-0.5, 2.5, -2.5, -0.5],
92114 [ 0.5, 3.5, -1.5, 0.5],
93115 [ 0.5, 3.5, -1.5, 1.5]])
94-
95- Notes
96- -----
97- In bin edges for feature ``i``, the first and last values are used only for
98- ``inverse_transform``. During transform, bin edges are extended to::
99-
100- np.concatenate([-np.inf, bin_edges_[i][1:-1], np.inf])
101-
102- You can combine ``KBinsDiscretizer`` with
103- :class:`sklearn.compose.ColumnTransformer` if you only want to preprocess
104- part of the features.
105-
106- ``KBinsDiscretizer`` might produce constant features (e.g., when
107- ``encode = 'onehot'`` and certain bins do not contain any data).
108- These features can be removed with feature selection algorithms
109- (e.g., :class:`sklearn.feature_selection.VarianceThreshold`).
110-
111- See also
112- --------
113- sklearn.preprocessing.Binarizer : class used to bin values as ``0`` or
114- ``1`` based on a parameter ``threshold``.
115116 """
116117
117118 def __init__ (self , n_bins = 5 , encode = 'onehot' , strategy = 'quantile' ):
@@ -120,14 +121,17 @@ def __init__(self, n_bins=5, encode='onehot', strategy='quantile'):
120121 self .strategy = strategy
121122
122123 def fit (self , X , y = None ):
123- """Fits the estimator.
124+ """
125+ Fit the estimator.
124126
125127 Parameters
126128 ----------
127129 X : numeric array-like, shape (n_samples, n_features)
128130 Data to be discretized.
129131
130- y : ignored
132+ y : None
133+ Ignored. This parameter exists only for compatibility with
134+ :class:`sklearn.pipeline.Pipeline`.
131135
132136 Returns
133137 -------
@@ -241,7 +245,8 @@ def _validate_n_bins(self, n_features):
241245 return n_bins
242246
243247 def transform (self , X ):
244- """Discretizes the data.
248+ """
249+ Discretize the data.
245250
246251 Parameters
247252 ----------
@@ -279,7 +284,8 @@ def transform(self, X):
279284 return self ._encoder .transform (Xt )
280285
281286 def inverse_transform (self , Xt ):
282- """Transforms discretized data back to original feature space.
287+ """
288+ Transform discretized data back to original feature space.
283289
284290 Note that this function does not regenerate the original data
285291 due to discretization rounding.
0 commit comments