btabibian
diff --git a/‎doc/whats_new.rst‎
Lines changed: 3 additions & 0 deletions b/‎doc/whats_new.rst‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎sklearn/linear_model/least_angle.py‎
Lines changed: 8 additions & 4 deletions b/‎sklearn/linear_model/least_angle.py‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎sklearn/linear_model/tests/test_least_angle.py‎
Lines changed: 2 additions & 11 deletions b/‎sklearn/linear_model/tests/test_least_angle.py‎
Lines changed: 2 additions & 11 deletions
diff --git a/‎sklearn/linear_model/tests/test_randomized_l1.py‎
Lines changed: 2 additions & 1 deletion b/‎sklearn/linear_model/tests/test_randomized_l1.py‎
Lines changed: 2 additions & 1 deletion
@@ -372,6 +372,9 @@ Bug fixes
    - Add ``shuffle`` parameter to :func:`model_selection.train_test_split`.
      :issue:`#8845` by  :user:`themrmax <themrmax>`
 
+   - Fix AIC/BIC criterion computation in :class:`linear_model.LassoLarsIC`
+     by `Alexandre Gramfort`_ and :user:`Mehmet Basbug <mehmetbasbug>`.
+
 API changes summary
 -------------------
 
 
@@ -1400,8 +1400,10 @@ class LassoLarsIC(LassoLars):
 
     criterion_ : array, shape (n_alphas,)
         The value of the information criteria ('aic', 'bic') across all
-        alphas. The alpha which has the smallest information criteria
-        is chosen.
+        alphas. The alpha which has the smallest information criteria is chosen.
+        This value is larger by a factor of ``n_samples`` compared to Eqns. 2.15
+        and 2.16 in (Zou et al, 2007).
+
 
     Examples
     --------
@@ -1487,6 +1489,7 @@ def fit(self, X, y, copy_X=True):
 
         R = y[:, np.newaxis] - np.dot(X, coef_path_)  # residuals
         mean_squared_error = np.mean(R ** 2, axis=0)
+        sigma2 = np.var(y)
 
         df = np.zeros(coef_path_.shape[1], dtype=np.int)  # Degrees of freedom
         for k, coef in enumerate(coef_path_.T):
@@ -1499,8 +1502,9 @@ def fit(self, X, y, copy_X=True):
             df[k] = np.sum(mask)
 
         self.alphas_ = alphas_
-        with np.errstate(divide='ignore'):
-            self.criterion_ = n_samples * np.log(mean_squared_error) + K * df
+        eps64 = np.finfo('float64').eps
+        self.criterion_ = (n_samples * mean_squared_error / (sigma2 + eps64) +
+                           K * df)  # Eqns. 2.15--16 in (Zou et al, 2007)
         n_best = np.argmin(self.criterion_)
 
         self.alpha_ = alphas_[n_best]
 
@@ -12,7 +12,7 @@
 from sklearn.utils.testing import assert_greater
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import ignore_warnings
-from sklearn.utils.testing import assert_no_warnings, assert_warns
+from sklearn.utils.testing import assert_warns
 from sklearn.utils.testing import TempMemmap
 from sklearn.exceptions import ConvergenceWarning
 from sklearn import linear_model, datasets
@@ -430,7 +430,7 @@ def test_lasso_lars_ic():
     rng = np.random.RandomState(42)
     X = diabetes.data
     y = diabetes.target
-    X = np.c_[X, rng.randn(X.shape[0], 4)]  # add 4 bad features
+    X = np.c_[X, rng.randn(X.shape[0], 5)]  # add 5 bad features
     lars_bic.fit(X, y)
     lars_aic.fit(X, y)
     nonzero_bic = np.where(lars_bic.coef_)[0]
@@ -444,15 +444,6 @@ def test_lasso_lars_ic():
     assert_raises(ValueError, lars_broken.fit, X, y)
 
 
-def test_no_warning_for_zero_mse():
-    # LassoLarsIC should not warn for log of zero MSE.
-    y = np.arange(10, dtype=float)
-    X = y.reshape(-1, 1)
-    lars = linear_model.LassoLarsIC(normalize=False)
-    assert_no_warnings(lars.fit, X, y)
-    assert_true(np.any(np.isinf(lars.criterion_)))
-
-
 def test_lars_path_readonly_data():
     # When using automated memory mapping on large input, the
     # fold data is in read-only mode
 
@@ -10,6 +10,7 @@
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_raises_regex
+from sklearn.utils.testing import assert_allclose
 
 from sklearn.linear_model.randomized_l1 import (lasso_stability_path,
                                                 RandomizedLasso,
@@ -94,7 +95,7 @@ def test_randomized_lasso():
     clf = RandomizedLasso(verbose=False, alpha='aic', random_state=42,
                           scaling=scaling)
     feature_scores = clf.fit(X, y).scores_
-    assert_array_equal(feature_scores, X.shape[1] * [1.])
+    assert_allclose(feature_scores, [1., 1., 1., 0.225, 1.], rtol=0.2)
 
     clf = RandomizedLasso(verbose=False, scaling=-0.1)
     assert_raises(ValueError, clf.fit, X, y)