ENH factor out squared norm helper

larsmans · larsmans · commit fb795ebef0e5 · 2014-04-08T23:02:54.000+02:00
diff --git a/sklearn/decomposition/factor_analysis.py b/sklearn/decomposition/factor_analysis.py
@@ -28,7 +28,7 @@
 from ..base import BaseEstimator, TransformerMixin
 from ..externals.six.moves import xrange
 from ..utils import array2d, check_arrays, check_random_state
-from ..utils.extmath import fast_logdet, fast_dot, randomized_svd
+from ..utils.extmath import fast_logdet, fast_dot, randomized_svd, squared_norm
 from ..utils import ConvergenceWarning
 
 
@@ -188,15 +188,15 @@ def fit(self, X, y=None):
             def my_svd(X):
                 _, s, V = linalg.svd(X, full_matrices=False)
                 return (s[:n_components], V[:n_components],
-                        np.dot(s[n_components:].flat, s[n_components:].flat))
+                        squared_norm(s[n_components:]))
         elif self.svd_method == 'randomized':
             random_state = check_random_state(self.random_state)
 
             def my_svd(X):
                 _, s, V = randomized_svd(X, n_components,
                                          random_state=random_state,
                                          n_iter=self.iterated_power)
-                return s, V, np.dot(X.flat, X.flat) - np.dot(s, s)
+                return s, V, squared_norm(X) - squared_norm(s)
         else:
             raise ValueError('SVD method %s is not supported. Please consider'
                              ' the documentation' % self.svd_method)
diff --git a/sklearn/decomposition/nmf.py b/sklearn/decomposition/nmf.py
@@ -20,7 +20,7 @@
 
 from ..base import BaseEstimator, TransformerMixin
 from ..utils import atleast2d_or_csr, check_random_state, check_arrays
-from ..utils.extmath import randomized_svd, safe_sparse_dot
+from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm
 
 
 def safe_vstack(Xs):
@@ -35,8 +35,7 @@ def norm(x):
 
     See: http://fseoane.net/blog/2011/computing-the-vector-norm/
     """
-    x = x.ravel()
-    return np.sqrt(np.dot(x, x))
+    return sqrt(squared_norm(x))
 
 
 def trace_dot(X, Y):
diff --git a/sklearn/mixture/dpgmm.py b/sklearn/mixture/dpgmm.py
@@ -17,15 +17,11 @@
 
 from ..externals.six.moves import xrange
 from ..utils import check_random_state, deprecated
-from ..utils.extmath import norm, logsumexp, pinvh
+from ..utils.extmath import logsumexp, pinvh, squared_norm
 from .. import cluster
 from .gmm import GMM
 
 
-def sqnorm(v):
-    return norm(v) ** 2
-
-
 def digamma(x):
     return _digamma(x + np.finfo(np.float32).eps)
 
@@ -412,7 +408,7 @@ def _bound_concentration(self):
     def _bound_means(self):
         "The variational lower bound for the mean parameters"
         logprior = 0.
-        logprior -= 0.5 * sqnorm(self.means_)
+        logprior -= 0.5 * squared_norm(self.means_)
         logprior -= 0.5 * self.means_.shape[1] * self.n_components
         return logprior
 
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
@@ -8,7 +8,9 @@
 #          Lars Buitinck
 # License: BSD 3 clause
 
+from functools import partial
 import warnings
+
 import numpy as np
 from scipy import linalg
 from scipy.sparse import issparse
@@ -25,13 +27,30 @@ def norm(x):
     """Compute the Euclidean or Frobenius norm of x.
 
     Returns the Euclidean norm when x is a vector, the Frobenius norm when x
-    is a matrix (2-d array).
+    is a matrix (2-d array). More precise than sqrt(squared_norm(x)).
     """
     x = np.asarray(x)
     nrm2, = linalg.get_blas_funcs(['nrm2'], [x])
     return nrm2(x)
 
 
+# Newer NumPy has a ravel that needs less copying.
+if np_version < (1, 7, 1):
+    _ravel = np.ravel
+else:
+    _ravel = partial(np.ravel, order='K')
+
+
+def squared_norm(x):
+    """Squared Euclidean or Frobenius norm of x.
+
+    Returns the Euclidean norm when x is a vector, the Frobenius norm when x
+    is a matrix (2-d array). Faster than norm(x) ** 2.
+    """
+    x = _ravel(x)
+    return np.dot(x, x)
+
+
 def row_norms(X, squared=False):
     """Row-wise (squared) Euclidean norm of X.
 
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
@@ -20,6 +20,7 @@
 
 from sklearn.utils.extmath import density
 from sklearn.utils.extmath import logsumexp
+from sklearn.utils.extmath import norm, squared_norm
 from sklearn.utils.extmath import randomized_svd
 from sklearn.utils.extmath import row_norms
 from sklearn.utils.extmath import weighted_mode
@@ -125,6 +126,16 @@ def test_randomized_svd_low_rank():
     assert_almost_equal(s[:rank], sa[:rank])
 
 
+def test_norm_squared_norm():
+    X = np.random.RandomState(42).randn(50, 63)
+    X *= 100        # check stability
+    X += 200
+
+    assert_almost_equal(np.linalg.norm(X.ravel()), norm(X))
+    assert_almost_equal(norm(X) ** 2, squared_norm(X), decimal=6)
+    assert_almost_equal(np.linalg.norm(X), np.sqrt(squared_norm(X)), decimal=6)
+
+
 def test_row_norms():
     X = np.random.RandomState(42).randn(100, 100)
     sq_norm = (X ** 2).sum(axis=1)