catchmrbharath
diff --git a/‎AUTHORS.rst‎
Lines changed: 3 additions & 3 deletions b/‎AUTHORS.rst‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/modules/classes.rst‎
Lines changed: 18 additions & 16 deletions b/‎doc/modules/classes.rst‎
Lines changed: 18 additions & 16 deletions
diff --git a/‎doc/modules/feature_selection.rst‎
Lines changed: 3 additions & 0 deletions b/‎doc/modules/feature_selection.rst‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎doc/modules/linear_model.rst‎
Lines changed: 2 additions & 2 deletions b/‎doc/modules/linear_model.rst‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/whats_new.rst‎
Lines changed: 3 additions & 0 deletions b/‎doc/whats_new.rst‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/linear_model/plot_logistic_l1_l2_sparsity.py‎
Lines changed: 5 additions & 5 deletions b/‎examples/linear_model/plot_logistic_l1_l2_sparsity.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎examples/plot_feature_selection.py‎
Lines changed: 23 additions & 11 deletions b/‎examples/plot_feature_selection.py‎
Lines changed: 23 additions & 11 deletions
diff --git a/‎sklearn/covariance/robust_covariance.py‎
Lines changed: 26 additions & 16 deletions b/‎sklearn/covariance/robust_covariance.py‎
Lines changed: 26 additions & 16 deletions
diff --git a/‎sklearn/covariance/tests/test_robust_covariance.py‎
Lines changed: 9 additions & 0 deletions b/‎sklearn/covariance/tests/test_robust_covariance.py‎
Lines changed: 9 additions & 0 deletions
@@ -44,7 +44,7 @@ People
 
   * Pearu Peterson
 
-  * `Fabian Pedregosa <http://fseoane.net/blog/>`_ (maintainer)
+  * `Fabian Pedregosa <http://fseoane.net/blog/>`_
 
   * `Gael Varoquaux <http://gael-varoquaux.info/blog/>`_
 
@@ -96,9 +96,9 @@ People
 
   * `Gilles Louppe <http://www.montefiore.ulg.ac.be/~glouppe>`_
 
-  * `Andreas Müller <http://www.ais.uni-bonn.de/~amueller/>`_
+  * `Andreas Müller <http://www.ais.uni-bonn.de/~amueller/>`_ (release manager)
 
-  * `Satra Ghosh <www.mit.edu/~satra>`_
+  * `Satra Ghosh <http://www.mit.edu/~satra>`_
 
 
 If I forgot anyone, do not hesitate to send me an email to
 
@@ -511,41 +511,43 @@ For dense data
    :toctree: generated/
    :template: class.rst
 
-   linear_model.LinearRegression
-   linear_model.Ridge
-   linear_model.RidgeClassifier
-   linear_model.RidgeClassifierCV
-   linear_model.RidgeCV
-   linear_model.Lasso
-   linear_model.LassoCV
+   linear_model.ARDRegression
+   linear_model.BayesianRidge
    linear_model.ElasticNet
    linear_model.ElasticNetCV
-   linear_model.MultiTaskLasso
-   linear_model.MultiTaskElasticNet
+   linear_model.IsotonicRegression
    linear_model.Lars
-   linear_model.LassoLars
    linear_model.LarsCV
+   linear_model.Lasso
+   linear_model.LassoCV
+   linear_model.LassoLars
    linear_model.LassoLarsCV
    linear_model.LassoLarsIC
+   linear_model.LinearRegression
    linear_model.LogisticRegression
+   linear_model.MultiTaskLasso
+   linear_model.MultiTaskElasticNet
    linear_model.OrthogonalMatchingPursuit
    linear_model.Perceptron
-   linear_model.SGDClassifier
-   linear_model.SGDRegressor
-   linear_model.BayesianRidge
-   linear_model.ARDRegression
    linear_model.RandomizedLasso
    linear_model.RandomizedLogisticRegression
+   linear_model.Ridge
+   linear_model.RidgeClassifier
+   linear_model.RidgeClassifierCV
+   linear_model.RidgeCV
+   linear_model.SGDClassifier
+   linear_model.SGDRegressor
 
 .. autosummary::
    :toctree: generated/
    :template: function.rst
 
-   linear_model.lasso_path
+   linear_model.isotonic_regression
    linear_model.lars_path
+   linear_model.lasso_path
+   linear_model.lasso_stability_path
    linear_model.orthogonal_mp
    linear_model.orthogonal_mp_gram
-   linear_model.lasso_stability_path
 
 For sparse data
 ---------------
 
@@ -62,6 +62,9 @@ are the smallest are pruned from the current set features. That procedure is
 recursively repeated on the pruned set until the desired number of features to
 select is eventually reached.
 
+:class:`RFECV` performs RFE in a cross-validation loop to find the optimal
+number of features.
+
 .. topic:: Examples:
 
     * :ref:`example_plot_rfe_digits.py`: A recursive feature elimination example
 
@@ -691,12 +691,12 @@ sparser.
 Isotonic regression
 ====================
 
-The :class:`Isotonic Regression` fits a non-decreasing function to the data.
+The :class:`IsotonicRegression` fits a non-decreasing function to the data.
 It solves the following problem:
 
   minimize :math:`\sum_i w_i (y_i - \hat{y}_i)^2`
 
-  subject to :math:`\hat{y}_min = \hat{y}_1 <= \hat{y}_2 ... <= \hat{y}_n = \hat{y}_max`
+  subject to :math:`\hat{y}_{min} = \hat{y}_1 \le \hat{y}_2 ... \le \hat{y}_n = \hat{y}_{max}`
 
 where each :math:`w_i` is strictly positive and each :math:`y_i` is an
 arbitrary real number. It yields the vector which is composed of non-decreasing
 
@@ -8,6 +8,9 @@
 Changelog
 ---------
 
+   - :class:`feature_selection.SelectPercentile` now breaks ties deterministically
+     instead of returning all equally ranked features.
+
 
 .. _changes_0_12:
 
 
@@ -53,11 +53,11 @@
     sparsity_l1_LR = np.mean(coef_l1_LR == 0) * 100
     sparsity_l2_LR = np.mean(coef_l2_LR == 0) * 100
 
-    print "C=%f" % C
-    print "Sparsity with L1 penalty: %f" % sparsity_l1_LR
-    print "score with L1 penalty: %f" % clf_l1_LR.score(X, y)
-    print "Sparsity with L2 penalty: %f" % sparsity_l2_LR
-    print "score with L2 penalty: %f" % clf_l2_LR.score(X, y)
+    print "C=%d" % C
+    print "Sparsity with L1 penalty: %.2f%%" % sparsity_l1_LR
+    print "score with L1 penalty: %.4f" % clf_l1_LR.score(X, y)
+    print "Sparsity with L2 penalty: %.2f%%" % sparsity_l2_LR
+    print "score with L2 penalty: %.4f" % clf_l2_LR.score(X, y)
 
     l1_plot = pl.subplot(3, 2, 2 * i + 1)
     l2_plot = pl.subplot(3, 2, 2 * (i + 1))
 
@@ -13,8 +13,9 @@
 
 In the total set of features, only the 4 first ones are significant. We
 can see that they have the highest score with univariate feature
-selection. The SVM attributes small weights to these features, but these
-weight are non zero. Applying univariate feature selection before the SVM
+selection. The SVM assigns a large weight to one of these features, but also
+Selects many of the non-informative features.
+Applying univariate feature selection before the SVM
 increases the SVM weight attributed to the significant features, and will
 thus improve classification.
 """
@@ -29,43 +30,54 @@
 ###############################################################################
 # import some data to play with
 
-# The IRIS dataset
+# The iris dataset
 iris = datasets.load_iris()
 
 # Some noisy data not correlated
-E = np.random.normal(size=(len(iris.data), 35))
+E = np.random.uniform(0, 0.1, size=(len(iris.data), 20))
 
 # Add the noisy data to the informative features
-x = np.hstack((iris.data, E))
+X = np.hstack((iris.data, E))
 y = iris.target
 
 ###############################################################################
 pl.figure(1)
 pl.clf()
 
-x_indices = np.arange(x.shape[-1])
+X_indices = np.arange(X.shape[-1])
 
 ###############################################################################
 # Univariate feature selection with F-test for feature scoring
 # We use the default selection function: the 10% most significant features
 selector = SelectPercentile(f_classif, percentile=10)
-selector.fit(x, y)
-scores = -np.log10(selector.scores_)
+selector.fit(X, y)
+scores = -np.log10(selector.pvalues_)
 scores /= scores.max()
-pl.bar(x_indices - .45, scores, width=.3,
+pl.bar(X_indices - .45, scores, width=.2,
         label=r'Univariate score ($-Log(p_{value})$)',
         color='g')
 
 ###############################################################################
 # Compare to the weights of an SVM
 clf = svm.SVC(kernel='linear')
-clf.fit(x, y)
+clf.fit(X, y)
 
 svm_weights = (clf.coef_ ** 2).sum(axis=0)
 svm_weights /= svm_weights.max()
-pl.bar(x_indices - .15, svm_weights, width=.3, label='SVM weight',
+
+pl.bar(X_indices - .25, svm_weights, width=.2, label='SVM weight',
         color='r')
 
+clf_selected = svm.SVC(kernel='linear')
+clf_selected.fit(selector.transform(X), y)
+
+svm_weights_selected = (clf_selected.coef_ ** 2).sum(axis=0)
+svm_weights_selected /= svm_weights_selected.max()
+
+pl.bar(X_indices[selector.get_support()] - .05, svm_weights_selected, width=.2,
+    label='SVM weights after selection', color='b')
+
+
 pl.title("Comparing feature selection")
 pl.xlabel('Feature number')
 pl.yticks(())
 
@@ -331,22 +331,32 @@ def fast_mcd(X, support_fraction=None,
     # (Rousseeuw, P. J. and Leroy, A. M. (2005) References, in Robust
     #  Regression and Outlier Detection, John Wiley & Sons, chapter 4)
     if n_features == 1:
-        # find the sample shortest halves
-        X_sorted = np.sort(np.ravel(X))
-        diff = X_sorted[n_support:] - X_sorted[:(n_samples - n_support)]
-        halves_start = np.where(diff == np.min(diff))[0]
-        # take the middle points' mean to get the robust location estimate
-        location = 0.5 * (X_sorted[n_support + halves_start]
-                          + X_sorted[halves_start]).mean()
-        support = np.zeros(n_samples).astype(bool)
-        X_centered = X - location
-        support[np.argsort(np.abs(X - location), axis=0)[:n_support]] = True
-        covariance = np.asarray([[np.var(X[support])]])
-        location = np.array([location])
-        # get precision matrix in an optimized way
-        precision = pinvh(covariance)
-        dist = (np.dot(X_centered, precision) \
-                    * (X_centered)).sum(axis=1)
+        if n_support < n_samples:
+            # find the sample shortest halves
+            X_sorted = np.sort(np.ravel(X))
+            diff = X_sorted[n_support:] - X_sorted[:(n_samples - n_support)]
+            halves_start = np.where(diff == np.min(diff))[0]
+            # take the middle points' mean to get the robust location estimate
+            location = 0.5 * (X_sorted[n_support + halves_start]
+                              + X_sorted[halves_start]).mean()
+            support = np.zeros(n_samples, dtype=bool)
+            X_centered = X - location
+            support[np.argsort(np.abs(X - location), 0)[:n_support]] = True
+            covariance = np.asarray([[np.var(X[support])]])
+            location = np.array([location])
+            # get precision matrix in an optimized way
+            precision = pinvh(covariance)
+            dist = (np.dot(X_centered, precision) \
+                        * (X_centered)).sum(axis=1)
+        else:
+            support = np.ones(n_samples, dtype=bool)
+            covariance = np.asarray([[np.var(X)]])
+            location = np.asarray([np.mean(X)])
+            X_centered = X - location
+            # get precision matrix in an optimized way
+            precision = pinvh(covariance)
+            dist = (np.dot(X_centered, precision) \
+                        * (X_centered)).sum(axis=1)
 
     ### Starting FastMCD algorithm for p-dimensional case
     if (n_samples > 500) and (n_features > 1):
 
@@ -68,6 +68,15 @@ def launch_mcd_on_dataset(
     assert_array_almost_equal(mcd_fit.mahalanobis(data), mcd_fit.dist_)
 
 
+def test_mcd_issue1127():
+    # Check that the code does not break with X.shape = (3, 1)
+    # (i.e. n_support = n_samples)
+    rnd = np.random.RandomState(0)
+    X = rnd.normal(size=(3, 1))
+    mcd = MinCovDet()
+    mcd.fit(X)
+
+
 def test_outlier_detection():
     rnd = np.random.RandomState(0)
     X = rnd.randn(100, 10)