@@ -60,12 +60,12 @@ class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin):
6060 are selected.
6161
6262 step : int or float, optional (default=1)
63- If greater than or equal to 1, then `step` corresponds to the (integer)
64- number of features to remove at each iteration.
65- If within (0.0, 1.0), then `step` corresponds to the percentage
63+ If greater than or equal to 1, then `` step`` corresponds to the
64+ (integer) number of features to remove at each iteration.
65+ If within (0.0, 1.0), then `` step` ` corresponds to the percentage
6666 (rounded down) of features to remove at each iteration.
6767
68- verbose : int, default=0
68+ verbose : int, ( default=0)
6969 Controls verbosity of output.
7070
7171 Attributes
@@ -335,10 +335,18 @@ class RFECV(RFE, MetaEstimatorMixin):
335335 attribute or through a ``feature_importances_`` attribute.
336336
337337 step : int or float, optional (default=1)
338- If greater than or equal to 1, then `step` corresponds to the (integer)
339- number of features to remove at each iteration.
340- If within (0.0, 1.0), then `step` corresponds to the percentage
338+ If greater than or equal to 1, then `` step`` corresponds to the
339+ (integer) number of features to remove at each iteration.
340+ If within (0.0, 1.0), then `` step` ` corresponds to the percentage
341341 (rounded down) of features to remove at each iteration.
342+ Note that the last iteration may remove fewer than ``step`` features in
343+ order to reach ``min_features_to_select``.
344+
345+ min_features_to_select : int, (default=1)
346+ The minimum number of features to be selected. This number of features
347+ will always be scored, even if the difference between the original
348+ feature count and ``min_features_to_select`` isn't divisible by
349+ ``step``.
342350
343351 cv : int, cross-validation generator or an iterable, optional
344352 Determines the cross-validation splitting strategy.
@@ -358,20 +366,20 @@ class RFECV(RFE, MetaEstimatorMixin):
358366 cross-validation strategies that can be used here.
359367
360368 .. versionchanged:: 0.20
361- ``cv`` default value if None will change from 3-fold to 5-fold
369+ ``cv`` default value of None will change from 3-fold to 5-fold
362370 in v0.22.
363371
364- scoring : string, callable or None, optional, default: None
372+ scoring : string, callable or None, optional, ( default= None)
365373 A string (see model evaluation documentation) or
366374 a scorer callable object / function with signature
367375 ``scorer(estimator, X, y)``.
368376
369- verbose : int, default=0
377+ verbose : int, ( default=0)
370378 Controls verbosity of output.
371379
372- n_jobs : int, default 1
380+ n_jobs : int, ( default=1)
373381 Number of cores to run in parallel while fitting across folds.
374- Defaults to 1 core. If `n_jobs=-1`, then number of jobs is set
382+ Defaults to 1 core. If `` n_jobs=-1` `, then number of jobs is set
375383 to number of cores.
376384
377385 Attributes
@@ -399,7 +407,8 @@ class RFECV(RFE, MetaEstimatorMixin):
399407
400408 Notes
401409 -----
402- The size of ``grid_scores_`` is equal to ceil((n_features - 1) / step) + 1,
410+ The size of ``grid_scores_`` is equal to
411+ ``ceil((n_features - min_features_to_select) / step) + 1``,
403412 where step is the number of features removed at each iteration.
404413
405414 Examples
@@ -431,14 +440,15 @@ class RFECV(RFE, MetaEstimatorMixin):
431440 for cancer classification using support vector machines",
432441 Mach. Learn., 46(1-3), 389--422, 2002.
433442 """
434- def __init__ (self , estimator , step = 1 , cv = 'warn' , scoring = None , verbose = 0 ,
435- n_jobs = None ):
443+ def __init__ (self , estimator , step = 1 , min_features_to_select = 1 , cv = 'warn' ,
444+ scoring = None , verbose = 0 , n_jobs = None ):
436445 self .estimator = estimator
437446 self .step = step
438447 self .cv = cv
439448 self .scoring = scoring
440449 self .verbose = verbose
441450 self .n_jobs = n_jobs
451+ self .min_features_to_select = min_features_to_select
442452
443453 def fit (self , X , y , groups = None ):
444454 """Fit the RFE model and automatically tune the number of selected
@@ -464,7 +474,6 @@ def fit(self, X, y, groups=None):
464474 cv = check_cv (self .cv , y , is_classifier (self .estimator ))
465475 scorer = check_scoring (self .estimator , scoring = self .scoring )
466476 n_features = X .shape [1 ]
467- n_features_to_select = 1
468477
469478 if 0.0 < self .step < 1.0 :
470479 step = int (max (1 , self .step * n_features ))
@@ -473,8 +482,10 @@ def fit(self, X, y, groups=None):
473482 if step <= 0 :
474483 raise ValueError ("Step must be >0" )
475484
485+ # Build an RFE object, which will evaluate and score each possible
486+ # feature count, down to self.min_features_to_select
476487 rfe = RFE (estimator = self .estimator ,
477- n_features_to_select = n_features_to_select ,
488+ n_features_to_select = self . min_features_to_select ,
478489 step = self .step , verbose = self .verbose )
479490
480491 # Determine the number of subsets of features by fitting across
@@ -504,7 +515,7 @@ def fit(self, X, y, groups=None):
504515 argmax_idx = len (scores ) - np .argmax (scores_rev ) - 1
505516 n_features_to_select = max (
506517 n_features - (argmax_idx * step ),
507- n_features_to_select )
518+ self . min_features_to_select )
508519
509520 # Re-execute an elimination with best_k over the whole set
510521 rfe = RFE (estimator = self .estimator ,
0 commit comments