1010# License: BSD
1111
1212from collections import defaultdict
13+ from itertools import islice
1314
1415import numpy as np
1516from scipy import sparse
@@ -41,7 +42,7 @@ class Pipeline(_BaseComposition):
4142 names and the parameter name separated by a '__', as in the example below.
4243 A step's estimator may be replaced entirely by setting the parameter
4344 with its name to another estimator, or a transformer removed by setting
44- to None.
45+ it to 'passthrough' or `` None`` .
4546
4647 Read more in the :ref:`User Guide <pipeline>`.
4748
@@ -158,19 +159,34 @@ def _validate_steps(self):
158159 estimator = estimators [- 1 ]
159160
160161 for t in transformers :
161- if t is None :
162+ if t is None or t == 'passthrough' :
162163 continue
163164 if (not (hasattr (t , "fit" ) or hasattr (t , "fit_transform" )) or not
164165 hasattr (t , "transform" )):
165166 raise TypeError ("All intermediate steps should be "
166- "transformers and implement fit and transform."
167- " '%s' (type %s) doesn't" % (t , type (t )))
167+ "transformers and implement fit and transform "
168+ "or be the string 'passthrough' "
169+ "'%s' (type %s) doesn't" % (t , type (t )))
168170
169171 # We allow last estimator to be None as an identity transformation
170- if estimator is not None and not hasattr (estimator , "fit" ):
171- raise TypeError ("Last step of Pipeline should implement fit. "
172- "'%s' (type %s) doesn't"
173- % (estimator , type (estimator )))
172+ if (estimator is not None and estimator != 'passthrough'
173+ and not hasattr (estimator , "fit" )):
174+ raise TypeError (
175+ "Last step of Pipeline should implement fit "
176+ "or be the string 'passthrough'. "
177+ "'%s' (type %s) doesn't" % (estimator , type (estimator )))
178+
179+ def _iter (self , with_final = True ):
180+ """
181+ Generate (name, trans) tuples excluding 'passthrough' transformers
182+ """
183+ stop = len (self .steps )
184+ if not with_final :
185+ stop -= 1
186+
187+ for name , trans in islice (self .steps , 0 , stop ):
188+ if trans is not None and trans != 'passthrough' :
189+ yield name , trans
174190
175191 @property
176192 def _estimator_type (self ):
@@ -183,7 +199,8 @@ def named_steps(self):
183199
184200 @property
185201 def _final_estimator (self ):
186- return self .steps [- 1 ][1 ]
202+ estimator = self .steps [- 1 ][1 ]
203+ return 'passthrough' if estimator is None else estimator
187204
188205 # Estimator interface
189206
@@ -202,37 +219,35 @@ def _fit(self, X, y=None, **fit_params):
202219 step , param = pname .split ('__' , 1 )
203220 fit_params_steps [step ][param ] = pval
204221 Xt = X
205- for step_idx , (name , transformer ) in enumerate (self .steps [:- 1 ]):
206- if transformer is None :
207- pass
208- else :
209- if hasattr (memory , 'location' ):
210- # joblib >= 0.12
211- if memory .location is None :
212- # we do not clone when caching is disabled to
213- # preserve backward compatibility
214- cloned_transformer = transformer
215- else :
216- cloned_transformer = clone (transformer )
217- elif hasattr (memory , 'cachedir' ):
218- # joblib < 0.11
219- if memory .cachedir is None :
220- # we do not clone when caching is disabled to
221- # preserve backward compatibility
222- cloned_transformer = transformer
223- else :
224- cloned_transformer = clone (transformer )
222+ for step_idx , (name , transformer ) in enumerate (
223+ self ._iter (with_final = False )):
224+ if hasattr (memory , 'location' ):
225+ # joblib >= 0.12
226+ if memory .location is None :
227+ # we do not clone when caching is disabled to
228+ # preserve backward compatibility
229+ cloned_transformer = transformer
225230 else :
226231 cloned_transformer = clone (transformer )
227- # Fit or load from cache the current transfomer
228- Xt , fitted_transformer = fit_transform_one_cached (
229- cloned_transformer , Xt , y , None ,
230- ** fit_params_steps [name ])
231- # Replace the transformer of the step with the fitted
232- # transformer. This is necessary when loading the transformer
233- # from the cache.
234- self .steps [step_idx ] = (name , fitted_transformer )
235- if self ._final_estimator is None :
232+ elif hasattr (memory , 'cachedir' ):
233+ # joblib < 0.11
234+ if memory .cachedir is None :
235+ # we do not clone when caching is disabled to
236+ # preserve backward compatibility
237+ cloned_transformer = transformer
238+ else :
239+ cloned_transformer = clone (transformer )
240+ else :
241+ cloned_transformer = clone (transformer )
242+ # Fit or load from cache the current transfomer
243+ Xt , fitted_transformer = fit_transform_one_cached (
244+ cloned_transformer , Xt , y , None ,
245+ ** fit_params_steps [name ])
246+ # Replace the transformer of the step with the fitted
247+ # transformer. This is necessary when loading the transformer
248+ # from the cache.
249+ self .steps [step_idx ] = (name , fitted_transformer )
250+ if self ._final_estimator == 'passthrough' :
236251 return Xt , {}
237252 return Xt , fit_params_steps [self .steps [- 1 ][0 ]]
238253
@@ -263,7 +278,7 @@ def fit(self, X, y=None, **fit_params):
263278 This estimator
264279 """
265280 Xt , fit_params = self ._fit (X , y , ** fit_params )
266- if self ._final_estimator is not None :
281+ if self ._final_estimator != 'passthrough' :
267282 self ._final_estimator .fit (Xt , y , ** fit_params )
268283 return self
269284
@@ -298,7 +313,7 @@ def fit_transform(self, X, y=None, **fit_params):
298313 Xt , fit_params = self ._fit (X , y , ** fit_params )
299314 if hasattr (last_step , 'fit_transform' ):
300315 return last_step .fit_transform (Xt , y , ** fit_params )
301- elif last_step is None :
316+ elif last_step == 'passthrough' :
302317 return Xt
303318 else :
304319 return last_step .fit (Xt , y , ** fit_params ).transform (Xt )
@@ -326,9 +341,8 @@ def predict(self, X, **predict_params):
326341 y_pred : array-like
327342 """
328343 Xt = X
329- for name , transform in self .steps [:- 1 ]:
330- if transform is not None :
331- Xt = transform .transform (Xt )
344+ for name , transform in self ._iter (with_final = False ):
345+ Xt = transform .transform (Xt )
332346 return self .steps [- 1 ][- 1 ].predict (Xt , ** predict_params )
333347
334348 @if_delegate_has_method (delegate = '_final_estimator' )
@@ -376,9 +390,8 @@ def predict_proba(self, X):
376390 y_proba : array-like, shape = [n_samples, n_classes]
377391 """
378392 Xt = X
379- for name , transform in self .steps [:- 1 ]:
380- if transform is not None :
381- Xt = transform .transform (Xt )
393+ for name , transform in self ._iter (with_final = False ):
394+ Xt = transform .transform (Xt )
382395 return self .steps [- 1 ][- 1 ].predict_proba (Xt )
383396
384397 @if_delegate_has_method (delegate = '_final_estimator' )
@@ -396,9 +409,8 @@ def decision_function(self, X):
396409 y_score : array-like, shape = [n_samples, n_classes]
397410 """
398411 Xt = X
399- for name , transform in self .steps [:- 1 ]:
400- if transform is not None :
401- Xt = transform .transform (Xt )
412+ for name , transform in self ._iter (with_final = False ):
413+ Xt = transform .transform (Xt )
402414 return self .steps [- 1 ][- 1 ].decision_function (Xt )
403415
404416 @if_delegate_has_method (delegate = '_final_estimator' )
@@ -416,9 +428,8 @@ def predict_log_proba(self, X):
416428 y_score : array-like, shape = [n_samples, n_classes]
417429 """
418430 Xt = X
419- for name , transform in self .steps [:- 1 ]:
420- if transform is not None :
421- Xt = transform .transform (Xt )
431+ for name , transform in self ._iter (with_final = False ):
432+ Xt = transform .transform (Xt )
422433 return self .steps [- 1 ][- 1 ].predict_log_proba (Xt )
423434
424435 @property
@@ -440,15 +451,14 @@ def transform(self):
440451 """
441452 # _final_estimator is None or has transform, otherwise attribute error
442453 # XXX: Handling the None case means we can't use if_delegate_has_method
443- if self ._final_estimator is not None :
454+ if self ._final_estimator != 'passthrough' :
444455 self ._final_estimator .transform
445456 return self ._transform
446457
447458 def _transform (self , X ):
448459 Xt = X
449- for name , transform in self .steps :
450- if transform is not None :
451- Xt = transform .transform (Xt )
460+ for _ , transform in self ._iter ():
461+ Xt = transform .transform (Xt )
452462 return Xt
453463
454464 @property
@@ -471,16 +481,15 @@ def inverse_transform(self):
471481 """
472482 # raise AttributeError if necessary for hasattr behaviour
473483 # XXX: Handling the None case means we can't use if_delegate_has_method
474- for name , transform in self .steps :
475- if transform is not None :
476- transform .inverse_transform
484+ for _ , transform in self ._iter ():
485+ transform .inverse_transform
477486 return self ._inverse_transform
478487
479488 def _inverse_transform (self , X ):
480489 Xt = X
481- for name , transform in self .steps [:: - 1 ]:
482- if transform is not None :
483- Xt = transform .inverse_transform (Xt )
490+ reverse_iter = reversed ( list ( self ._iter ()))
491+ for _ , transform in reverse_iter :
492+ Xt = transform .inverse_transform (Xt )
484493 return Xt
485494
486495 @if_delegate_has_method (delegate = '_final_estimator' )
@@ -506,9 +515,8 @@ def score(self, X, y=None, sample_weight=None):
506515 score : float
507516 """
508517 Xt = X
509- for name , transform in self .steps [:- 1 ]:
510- if transform is not None :
511- Xt = transform .transform (Xt )
518+ for name , transform in self ._iter (with_final = False ):
519+ Xt = transform .transform (Xt )
512520 score_params = {}
513521 if sample_weight is not None :
514522 score_params ['sample_weight' ] = sample_weight
@@ -527,7 +535,11 @@ def _pairwise(self):
527535def _name_estimators (estimators ):
528536 """Generate names for estimators."""
529537
530- names = [type (estimator ).__name__ .lower () for estimator in estimators ]
538+ names = [
539+ estimator
540+ if isinstance (estimator , str ) else type (estimator ).__name__ .lower ()
541+ for estimator in estimators
542+ ]
531543 namecount = defaultdict (int )
532544 for est , name in zip (estimators , names ):
533545 namecount [name ] += 1
0 commit comments