@@ -117,7 +117,7 @@ def f_oneway(*args):
117117
118118
119119def f_classif (X , y ):
120- """Compute the Anova F-value for the provided sample
120+ """Compute the ANOVA F-value for the provided sample.
121121
122122 Parameters
123123 ----------
@@ -134,6 +134,11 @@ def f_classif(X, y):
134134
135135 pval : array, shape = [n_features,]
136136 The set of p-values.
137+
138+ See also
139+ --------
140+ chi2: Chi-squared stats of non-negative features for classification tasks.
141+ f_regression: F-value between label/feature for regression tasks.
137142 """
138143 X , y = check_X_y (X , y , ['csr' , 'csc' , 'coo' ])
139144 args = [X [safe_mask (X , y == k )] for k in np .unique (y )]
@@ -159,12 +164,12 @@ def _chisquare(f_obs, f_exp):
159164
160165
161166def chi2 (X , y ):
162- """Compute chi-squared statistic for each class/ feature combination .
167+ """Compute chi-squared stats between each non-negative feature and class .
163168
164169 This score can be used to select the n_features features with the
165170 highest values for the test chi-squared statistic from X, which must
166- contain booleans or frequencies (e.g., term counts in document
167- classification), relative to the classes.
171+ contain only non-negative features such as booleans or frequencies
172+ (e.g., term counts in document classification), relative to the classes.
168173
169174 Recall that the chi-square test measures dependence between stochastic
170175 variables, so using this function "weeds out" the features that are the
@@ -189,6 +194,11 @@ def chi2(X, y):
189194 Notes
190195 -----
191196 Complexity of this algorithm is O(n_classes * n_features).
197+
198+ See also
199+ --------
200+ f_classif: ANOVA F-value between labe/feature for classification tasks.
201+ f_regression: F-value between label/feature for regression tasks.
192202 """
193203
194204 # XXX: we might want to do some of the following in logspace instead for
@@ -211,16 +221,17 @@ def chi2(X, y):
211221
212222
213223def f_regression (X , y , center = True ):
214- """Univariate linear regression tests
224+ """Univariate linear regression tests.
215225
216226 Quick linear model for testing the effect of a single regressor,
217227 sequentially for many regressors.
218228
219229 This is done in 3 steps:
220- 1. the regressor of interest and the data are orthogonalized
221- wrt constant regressors
222- 2. the cross correlation between data and regressors is computed
223- 3. it is converted to an F score then to a p-value
230+
231+ 1. The regressor of interest and the data are orthogonalized
232+ wrt constant regressors.
233+ 2. The cross correlation between data and regressors is computed.
234+ 3. It is converted to an F score then to a p-value.
224235
225236 Parameters
226237 ----------
@@ -240,6 +251,11 @@ def f_regression(X, y, center=True):
240251
241252 pval : array, shape=(n_features,)
242253 p-values of F-scores.
254+
255+ See also
256+ --------
257+ f_classif: ANOVA F-value between labe/feature for classification tasks.
258+ chi2: Chi-squared stats of non-negative features for classification tasks.
243259 """
244260 if issparse (X ) and center :
245261 raise ValueError ("center=True only allowed for dense data" )
@@ -341,6 +357,16 @@ class SelectPercentile(_BaseFilter):
341357 Ties between features with equal scores will be broken in an unspecified
342358 way.
343359
360+ See also
361+ --------
362+ f_classif: ANOVA F-value between labe/feature for classification tasks.
363+ chi2: Chi-squared stats of non-negative features for classification tasks.
364+ f_regression: F-value between label/feature for regression tasks.
365+ SelectKBest: Select features based on the k highest scores.
366+ SelectFpr: Select features based on a false positive rate test.
367+ SelectFdr: Select features based on an estimated false discovery rate.
368+ SelectFwe: Select features based on family-wise error rate.
369+ GenericUnivariateSelect: Univariate feature selector with configurable mode.
344370 """
345371
346372 def __init__ (self , score_func = f_classif , percentile = 10 ):
@@ -399,6 +425,16 @@ class SelectKBest(_BaseFilter):
399425 Ties between features with equal scores will be broken in an unspecified
400426 way.
401427
428+ See also
429+ --------
430+ f_classif: ANOVA F-value between labe/feature for classification tasks.
431+ chi2: Chi-squared stats of non-negative features for classification tasks.
432+ f_regression: F-value between label/feature for regression tasks.
433+ SelectPercentile: Select features based on percentile of the highest scores.
434+ SelectFpr: Select features based on a false positive rate test.
435+ SelectFdr: Select features based on an estimated false discovery rate.
436+ SelectFwe: Select features based on family-wise error rate.
437+ GenericUnivariateSelect: Univariate feature selector with configurable mode.
402438 """
403439
404440 def __init__ (self , score_func = f_classif , k = 10 ):
@@ -450,6 +486,17 @@ class SelectFpr(_BaseFilter):
450486
451487 pvalues_ : array-like, shape=(n_features,)
452488 p-values of feature scores.
489+
490+ See also
491+ --------
492+ f_classif: ANOVA F-value between labe/feature for classification tasks.
493+ chi2: Chi-squared stats of non-negative features for classification tasks.
494+ f_regression: F-value between label/feature for regression tasks.
495+ SelectPercentile: Select features based on percentile of the highest scores.
496+ SelectKBest: Select features based on the k highest scores.
497+ SelectFdr: Select features based on an estimated false discovery rate.
498+ SelectFwe: Select features based on family-wise error rate.
499+ GenericUnivariateSelect: Univariate feature selector with configurable mode.
453500 """
454501
455502 def __init__ (self , score_func = f_classif , alpha = 5e-2 ):
@@ -490,6 +537,16 @@ class SelectFdr(_BaseFilter):
490537 ----------
491538 http://en.wikipedia.org/wiki/False_discovery_rate
492539
540+ See also
541+ --------
542+ f_classif: ANOVA F-value between labe/feature for classification tasks.
543+ chi2: Chi-squared stats of non-negative features for classification tasks.
544+ f_regression: F-value between label/feature for regression tasks.
545+ SelectPercentile: Select features based on percentile of the highest scores.
546+ SelectKBest: Select features based on the k highest scores.
547+ SelectFpr: Select features based on a false positive rate test.
548+ SelectFwe: Select features based on family-wise error rate.
549+ GenericUnivariateSelect: Univariate feature selector with configurable mode.
493550 """
494551
495552 def __init__ (self , score_func = f_classif , alpha = 5e-2 ):
@@ -527,6 +584,17 @@ class SelectFwe(_BaseFilter):
527584
528585 pvalues_ : array-like, shape=(n_features,)
529586 p-values of feature scores.
587+
588+ See also
589+ --------
590+ f_classif: ANOVA F-value between labe/feature for classification tasks.
591+ chi2: Chi-squared stats of non-negative features for classification tasks.
592+ f_regression: F-value between label/feature for regression tasks.
593+ SelectPercentile: Select features based on percentile of the highest scores.
594+ SelectKBest: Select features based on the k highest scores.
595+ SelectFpr: Select features based on a false positive rate test.
596+ SelectFdr: Select features based on an estimated false discovery rate.
597+ GenericUnivariateSelect: Univariate feature selector with configurable mode.
530598 """
531599
532600 def __init__ (self , score_func = f_classif , alpha = 5e-2 ):
@@ -567,6 +635,17 @@ class GenericUnivariateSelect(_BaseFilter):
567635
568636 pvalues_ : array-like, shape=(n_features,)
569637 p-values of feature scores.
638+
639+ See also
640+ --------
641+ f_classif: ANOVA F-value between labe/feature for classification tasks.
642+ chi2: Chi-squared stats of non-negative features for classification tasks.
643+ f_regression: F-value between label/feature for regression tasks.
644+ SelectPercentile: Select features based on percentile of the highest scores.
645+ SelectKBest: Select features based on the k highest scores.
646+ SelectFpr: Select features based on a false positive rate test.
647+ SelectFdr: Select features based on an estimated false discovery rate.
648+ SelectFwe: Select features based on family-wise error rate.
570649 """
571650
572651 _selection_modes = {'percentile' : SelectPercentile ,
0 commit comments