Skip to content

Commit 33efa17

Browse files
Fabian Pedregosalarsmans
authored andcommitted
ENH Improve Ridge's conjugate gradient descent
Reorder terms in the algorithm so that X'X is not explicitly computed and add LinearOperator as accepted types. This improves precedent code in two ways: * Speed: I get speedups ranging from 2.5x to more than 10x. Benchmark suite can be seen here: https://gist.github.com/1322711 where I print the time spent for various sizes and densities. * Interface. By allowing a LinearOperator type, we can use the conjugate gradient method almost for free on dense matrices with special structure, pytables, etc., about anything that implements a fast matrix-vector product.
1 parent 35704c8 commit 33efa17

File tree

1 file changed

+54
-55
lines changed

1 file changed

+54
-55
lines changed

sklearn/linear_model/ridge.py

Lines changed: 54 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,19 @@
22
Ridge regression
33
"""
44

5-
# Author: Mathieu Blondel <[email protected]>
6-
# Reuben Fletcher-Costin <[email protected]>
5+
# Author: Mathieu Blondel <[email protected]>
6+
# Reuben Fletcher-Costin <[email protected]>
7+
# Fabian Pedregosa <[email protected]>
78
# License: Simplified BSD
89

910

1011
from abc import ABCMeta, abstractmethod
1112
import warnings
13+
1214
import numpy as np
15+
from scipy import linalg
16+
from scipy import sparse
17+
from scipy.sparse import linalg as sp_linalg
1318

1419
from .base import LinearClassifierMixin, LinearModel
1520
from ..base import RegressorMixin
@@ -19,44 +24,12 @@
1924
from ..grid_search import GridSearchCV
2025

2126

22-
def _solve(A, b, solver, tol):
23-
# helper method for ridge_regression, A is symmetric positive
24-
25-
if solver == 'auto':
26-
if hasattr(A, 'todense'):
27-
solver = 'sparse_cg'
28-
else:
29-
solver = 'dense_cholesky'
30-
31-
if solver == 'sparse_cg':
32-
if b.ndim < 2:
33-
from scipy.sparse import linalg as sp_linalg
34-
sol, error = sp_linalg.cg(A, b, tol=tol)
35-
if error:
36-
raise ValueError("Failed with error code %d" % error)
37-
return sol
38-
else:
39-
# sparse_cg cannot handle a 2-d b.
40-
sol = []
41-
for j in range(b.shape[1]):
42-
sol.append(_solve(A, b[:, j], solver="sparse_cg", tol=tol))
43-
return np.array(sol).T
44-
45-
elif solver == 'dense_cholesky':
46-
from scipy import linalg
47-
if hasattr(A, 'todense'):
48-
A = A.todense()
49-
return linalg.solve(A, b, sym_pos=True, overwrite_a=True)
50-
else:
51-
raise NotImplementedError('Solver %s not implemented' % solver)
52-
53-
5427
def ridge_regression(X, y, alpha, sample_weight=1.0, solver='auto', tol=1e-3):
5528
"""Solve the ridge equation by the method of normal equations.
5629
5730
Parameters
5831
----------
59-
X : {array-like, sparse matrix}, shape = [n_samples, n_features]
32+
X : {array-like, sparse matrix, LinearOperator}, shape = [n_samples, n_features]
6033
Training data
6134
6235
y : array-like, shape = [n_samples] or [n_samples, n_responses]
@@ -86,26 +59,54 @@ def ridge_regression(X, y, alpha, sample_weight=1.0, solver='auto', tol=1e-3):
8659
"""
8760

8861
n_samples, n_features = X.shape
89-
is_sparse = False
90-
91-
if hasattr(X, 'todense'): # lazy import of scipy.sparse
92-
from scipy import sparse
93-
is_sparse = sparse.issparse(X)
9462

95-
if is_sparse:
96-
if n_features > n_samples or \
97-
isinstance(sample_weight, np.ndarray) or \
98-
sample_weight != 1.0:
63+
if solver == 'auto':
64+
# cholesky if it's a dense array and cg in
65+
# any other case
66+
if hasattr(X, '__array__'):
67+
solver = 'dense_cholesky'
68+
else:
69+
solver = 'sparse_cg'
9970

100-
I = sparse.lil_matrix((n_samples, n_samples))
101-
I.setdiag(np.ones(n_samples) * alpha * sample_weight)
102-
c = _solve(X * X.T + I, y, solver, tol)
103-
coef = X.T * c
71+
if solver == 'sparse_cg':
72+
# gradient descent
73+
X1 = sp_linalg.aslinearoperator(X)
74+
if y.ndim == 1:
75+
y1 = np.reshape(y, (-1, 1))
10476
else:
105-
I = sparse.lil_matrix((n_features, n_features))
106-
I.setdiag(np.ones(n_features) * alpha)
107-
coef = _solve(X.T * X + I, X.T * y, solver, tol)
77+
y1 = y
78+
coefs = np.empty((y1.shape[1], n_features))
79+
80+
for i in range(y1.shape[1]):
81+
y_column = y1[:, i]
82+
if n_features > n_samples:
83+
# kernel ridge
84+
# w = X.T * inv(X X^t + alpha*Id) y
85+
def mv(x):
86+
return X1.matvec(X1.rmatvec(x)) + alpha * x
87+
C = sp_linalg.LinearOperator(
88+
(n_samples, n_samples), matvec=mv, dtype=X.dtype)
89+
coef, info = sp_linalg.cg(C, y_column, tol=tol)
90+
coefs[i] = X1.rmatvec(coef)
91+
else:
92+
# ridge
93+
# w = inv(X^t X + alpha*Id) * X.T y
94+
def mv(x):
95+
return X1.rmatvec(X1.matvec(x)) + alpha * x
96+
y_column = X1.rmatvec(y_column)
97+
C = sp_linalg.LinearOperator(
98+
(n_features, n_features), matvec=mv, dtype=X.dtype)
99+
coefs[i], info = sp_linalg.cg(C, y_column, tol=tol)
100+
if info != 0:
101+
raise ValueError("Failed with error code %d" % info)
102+
103+
if y.ndim == 1:
104+
return np.ravel(coefs)
105+
return coefs
108106
else:
107+
# normal equations (cholesky) method
108+
if hasattr(X, 'todense'):
109+
X = X.todense()
109110
if n_features > n_samples or \
110111
isinstance(sample_weight, np.ndarray) or \
111112
sample_weight != 1.0:
@@ -114,13 +115,13 @@ def ridge_regression(X, y, alpha, sample_weight=1.0, solver='auto', tol=1e-3):
114115
# w = X.T * inv(X X^t + alpha*Id) y
115116
A = np.dot(X, X.T)
116117
A.flat[::n_samples + 1] += alpha * sample_weight
117-
coef = np.dot(X.T, _solve(A, y, solver, tol))
118+
coef = np.dot(X.T, linalg.solve(A, y, sym_pos=True, overwrite_a=True))
118119
else:
119120
# ridge
120121
# w = inv(X^t X + alpha*Id) * X.T y
121122
A = np.dot(X.T, X)
122123
A.flat[::n_features + 1] += alpha
123-
coef = _solve(A, np.dot(X.T, y), solver, tol)
124+
coef = linalg.solve(A, np.dot(X.T, y), sym_pos=True, overwrite_a=True)
124125

125126
return coef.T
126127

@@ -381,7 +382,6 @@ def __init__(self, alphas=[0.1, 1.0, 10.0], fit_intercept=True,
381382
def _pre_compute(self, X, y):
382383
# even if X is very sparse, K is usually very dense
383384
K = safe_sparse_dot(X, X.T, dense_output=True)
384-
from scipy import linalg
385385
v, Q = linalg.eigh(K)
386386
QT_y = np.dot(Q.T, y)
387387
return v, Q, QT_y
@@ -418,7 +418,6 @@ def _values(self, alpha, y, v, Q, QT_y):
418418
return y - (c / G_diag), c
419419

420420
def _pre_compute_svd(self, X, y):
421-
from scipy import sparse
422421
if sparse.issparse(X) and hasattr(X, 'toarray'):
423422
X = X.toarray()
424423
U, s, _ = np.linalg.svd(X, full_matrices=0)

0 commit comments

Comments
 (0)