Skip to content

Commit fb795eb

Browse files
committed
ENH factor out squared norm helper
1 parent d954846 commit fb795eb

File tree

5 files changed

+38
-13
lines changed

5 files changed

+38
-13
lines changed

sklearn/decomposition/factor_analysis.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
from ..base import BaseEstimator, TransformerMixin
2929
from ..externals.six.moves import xrange
3030
from ..utils import array2d, check_arrays, check_random_state
31-
from ..utils.extmath import fast_logdet, fast_dot, randomized_svd
31+
from ..utils.extmath import fast_logdet, fast_dot, randomized_svd, squared_norm
3232
from ..utils import ConvergenceWarning
3333

3434

@@ -188,15 +188,15 @@ def fit(self, X, y=None):
188188
def my_svd(X):
189189
_, s, V = linalg.svd(X, full_matrices=False)
190190
return (s[:n_components], V[:n_components],
191-
np.dot(s[n_components:].flat, s[n_components:].flat))
191+
squared_norm(s[n_components:]))
192192
elif self.svd_method == 'randomized':
193193
random_state = check_random_state(self.random_state)
194194

195195
def my_svd(X):
196196
_, s, V = randomized_svd(X, n_components,
197197
random_state=random_state,
198198
n_iter=self.iterated_power)
199-
return s, V, np.dot(X.flat, X.flat) - np.dot(s, s)
199+
return s, V, squared_norm(X) - squared_norm(s)
200200
else:
201201
raise ValueError('SVD method %s is not supported. Please consider'
202202
' the documentation' % self.svd_method)

sklearn/decomposition/nmf.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from ..base import BaseEstimator, TransformerMixin
2222
from ..utils import atleast2d_or_csr, check_random_state, check_arrays
23-
from ..utils.extmath import randomized_svd, safe_sparse_dot
23+
from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm
2424

2525

2626
def safe_vstack(Xs):
@@ -35,8 +35,7 @@ def norm(x):
3535
3636
See: http://fseoane.net/blog/2011/computing-the-vector-norm/
3737
"""
38-
x = x.ravel()
39-
return np.sqrt(np.dot(x, x))
38+
return sqrt(squared_norm(x))
4039

4140

4241
def trace_dot(X, Y):

sklearn/mixture/dpgmm.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,11 @@
1717

1818
from ..externals.six.moves import xrange
1919
from ..utils import check_random_state, deprecated
20-
from ..utils.extmath import norm, logsumexp, pinvh
20+
from ..utils.extmath import logsumexp, pinvh, squared_norm
2121
from .. import cluster
2222
from .gmm import GMM
2323

2424

25-
def sqnorm(v):
26-
return norm(v) ** 2
27-
28-
2925
def digamma(x):
3026
return _digamma(x + np.finfo(np.float32).eps)
3127

@@ -412,7 +408,7 @@ def _bound_concentration(self):
412408
def _bound_means(self):
413409
"The variational lower bound for the mean parameters"
414410
logprior = 0.
415-
logprior -= 0.5 * sqnorm(self.means_)
411+
logprior -= 0.5 * squared_norm(self.means_)
416412
logprior -= 0.5 * self.means_.shape[1] * self.n_components
417413
return logprior
418414

sklearn/utils/extmath.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
# Lars Buitinck
99
# License: BSD 3 clause
1010

11+
from functools import partial
1112
import warnings
13+
1214
import numpy as np
1315
from scipy import linalg
1416
from scipy.sparse import issparse
@@ -25,13 +27,30 @@ def norm(x):
2527
"""Compute the Euclidean or Frobenius norm of x.
2628
2729
Returns the Euclidean norm when x is a vector, the Frobenius norm when x
28-
is a matrix (2-d array).
30+
is a matrix (2-d array). More precise than sqrt(squared_norm(x)).
2931
"""
3032
x = np.asarray(x)
3133
nrm2, = linalg.get_blas_funcs(['nrm2'], [x])
3234
return nrm2(x)
3335

3436

37+
# Newer NumPy has a ravel that needs less copying.
38+
if np_version < (1, 7, 1):
39+
_ravel = np.ravel
40+
else:
41+
_ravel = partial(np.ravel, order='K')
42+
43+
44+
def squared_norm(x):
45+
"""Squared Euclidean or Frobenius norm of x.
46+
47+
Returns the Euclidean norm when x is a vector, the Frobenius norm when x
48+
is a matrix (2-d array). Faster than norm(x) ** 2.
49+
"""
50+
x = _ravel(x)
51+
return np.dot(x, x)
52+
53+
3554
def row_norms(X, squared=False):
3655
"""Row-wise (squared) Euclidean norm of X.
3756

sklearn/utils/tests/test_extmath.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from sklearn.utils.extmath import density
2222
from sklearn.utils.extmath import logsumexp
23+
from sklearn.utils.extmath import norm, squared_norm
2324
from sklearn.utils.extmath import randomized_svd
2425
from sklearn.utils.extmath import row_norms
2526
from sklearn.utils.extmath import weighted_mode
@@ -125,6 +126,16 @@ def test_randomized_svd_low_rank():
125126
assert_almost_equal(s[:rank], sa[:rank])
126127

127128

129+
def test_norm_squared_norm():
130+
X = np.random.RandomState(42).randn(50, 63)
131+
X *= 100 # check stability
132+
X += 200
133+
134+
assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
135+
assert_almost_equal(norm(X) ** 2, squared_norm(X), decimal=6)
136+
assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
137+
138+
128139
def test_row_norms():
129140
X = np.random.RandomState(42).randn(100, 100)
130141
sq_norm = (X ** 2).sum(axis=1)

0 commit comments

Comments
 (0)