Skip to content

Commit 6a35622

Browse files
agramfortvene
authored andcommitted
[FIX] BIC/AIC for Lasso (scikit-learn#9022)
* correcting information criterion calculation in least_angle.py The information criterion calculation is not compatible with the original paper Zou, Hui, Trevor Hastie, and Robert Tibshirani. "On the “degrees of freedom” of the lasso." The Annals of Statistics 35.5 (2007): 2173-2192. APA * FIX : fix AIC/BIC computation in LassoLarsIC * update what's new * fix test * fix test * address comments * DOC comments and docstring on criterion computation
1 parent 689f412 commit 6a35622

File tree

4 files changed

+15
-16
lines changed

4 files changed

+15
-16
lines changed

doc/whats_new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,6 +372,9 @@ Bug fixes
372372
- Add ``shuffle`` parameter to :func:`model_selection.train_test_split`.
373373
:issue:`#8845` by :user:`themrmax <themrmax>`
374374

375+
- Fix AIC/BIC criterion computation in :class:`linear_model.LassoLarsIC`
376+
by `Alexandre Gramfort`_ and :user:`Mehmet Basbug <mehmetbasbug>`.
377+
375378
API changes summary
376379
-------------------
377380

sklearn/linear_model/least_angle.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,8 +1400,10 @@ class LassoLarsIC(LassoLars):
14001400
14011401
criterion_ : array, shape (n_alphas,)
14021402
The value of the information criteria ('aic', 'bic') across all
1403-
alphas. The alpha which has the smallest information criteria
1404-
is chosen.
1403+
alphas. The alpha which has the smallest information criteria is chosen.
1404+
This value is larger by a factor of ``n_samples`` compared to Eqns. 2.15
1405+
and 2.16 in (Zou et al, 2007).
1406+
14051407
14061408
Examples
14071409
--------
@@ -1487,6 +1489,7 @@ def fit(self, X, y, copy_X=True):
14871489

14881490
R = y[:, np.newaxis] - np.dot(X, coef_path_) # residuals
14891491
mean_squared_error = np.mean(R ** 2, axis=0)
1492+
sigma2 = np.var(y)
14901493

14911494
df = np.zeros(coef_path_.shape[1], dtype=np.int) # Degrees of freedom
14921495
for k, coef in enumerate(coef_path_.T):
@@ -1499,8 +1502,9 @@ def fit(self, X, y, copy_X=True):
14991502
df[k] = np.sum(mask)
15001503

15011504
self.alphas_ = alphas_
1502-
with np.errstate(divide='ignore'):
1503-
self.criterion_ = n_samples * np.log(mean_squared_error) + K * df
1505+
eps64 = np.finfo('float64').eps
1506+
self.criterion_ = (n_samples * mean_squared_error / (sigma2 + eps64) +
1507+
K * df) # Eqns. 2.15--16 in (Zou et al, 2007)
15041508
n_best = np.argmin(self.criterion_)
15051509

15061510
self.alpha_ = alphas_[n_best]

sklearn/linear_model/tests/test_least_angle.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from sklearn.utils.testing import assert_greater
1313
from sklearn.utils.testing import assert_raises
1414
from sklearn.utils.testing import ignore_warnings
15-
from sklearn.utils.testing import assert_no_warnings, assert_warns
15+
from sklearn.utils.testing import assert_warns
1616
from sklearn.utils.testing import TempMemmap
1717
from sklearn.exceptions import ConvergenceWarning
1818
from sklearn import linear_model, datasets
@@ -430,7 +430,7 @@ def test_lasso_lars_ic():
430430
rng = np.random.RandomState(42)
431431
X = diabetes.data
432432
y = diabetes.target
433-
X = np.c_[X, rng.randn(X.shape[0], 4)] # add 4 bad features
433+
X = np.c_[X, rng.randn(X.shape[0], 5)] # add 5 bad features
434434
lars_bic.fit(X, y)
435435
lars_aic.fit(X, y)
436436
nonzero_bic = np.where(lars_bic.coef_)[0]
@@ -444,15 +444,6 @@ def test_lasso_lars_ic():
444444
assert_raises(ValueError, lars_broken.fit, X, y)
445445

446446

447-
def test_no_warning_for_zero_mse():
448-
# LassoLarsIC should not warn for log of zero MSE.
449-
y = np.arange(10, dtype=float)
450-
X = y.reshape(-1, 1)
451-
lars = linear_model.LassoLarsIC(normalize=False)
452-
assert_no_warnings(lars.fit, X, y)
453-
assert_true(np.any(np.isinf(lars.criterion_)))
454-
455-
456447
def test_lars_path_readonly_data():
457448
# When using automated memory mapping on large input, the
458449
# fold data is in read-only mode

sklearn/linear_model/tests/test_randomized_l1.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from sklearn.utils.testing import assert_array_equal
1111
from sklearn.utils.testing import assert_raises
1212
from sklearn.utils.testing import assert_raises_regex
13+
from sklearn.utils.testing import assert_allclose
1314

1415
from sklearn.linear_model.randomized_l1 import (lasso_stability_path,
1516
RandomizedLasso,
@@ -94,7 +95,7 @@ def test_randomized_lasso():
9495
clf = RandomizedLasso(verbose=False, alpha='aic', random_state=42,
9596
scaling=scaling)
9697
feature_scores = clf.fit(X, y).scores_
97-
assert_array_equal(feature_scores, X.shape[1] * [1.])
98+
assert_allclose(feature_scores, [1., 1., 1., 0.225, 1.], rtol=0.2)
9899

99100
clf = RandomizedLasso(verbose=False, scaling=-0.1)
100101
assert_raises(ValueError, clf.fit, X, y)

0 commit comments

Comments
 (0)