ENH : add score samples to PCA

agramfort · agramfort · commit 36ad48e6de39 · 2013-09-05T21:40:27.000+02:00
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
@@ -32,10 +32,11 @@ Changelog
 API changes summary
 -------------------
 
-   - Add score method to PCA following the model of probabilistic PCA and
-     deprecate ProbabilisticPCA model whose score implementation is not
-     correct. The computation now also exploits the matrix inversion
-     lemma for faster computation. By `Alexandre Gramfort`_.
+   - Add score method to :class:`PCA <decomposition.PCA>` following the model of
+     probabilistic PCA and deprecate
+     :class:`ProbabilisticPCA <decomposition.ProbabilisticPCA>` model whose
+     score implementation is not correct. The computation now also exploits the
+     matrix inversion lemma for faster computation. By `Alexandre Gramfort`_.
 
 
 .. _changes_0_14:
diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py
@@ -398,21 +398,21 @@ def inverse_transform(self, X):
         """
         return fast_dot(X, self.components_) + self.mean_
 
-    def score(self, X, y=None):
-        """Return a score associated to new data
+    def score_samples(self, X):
+        """Return the log-likelihood of each sample
 
         See. "Pattern Recognition and Machine Learning"
         by C. Bishop, 12.2.1 p. 574
 
         Parameters
         ----------
-        X: array of shape(n_samples, n_features)
-            The data to test
+        X: array, shape(n_samples, n_features)
+            The data.
 
         Returns
         -------
-        ll: array of shape (n_samples),
-            log-likelihood of each row of X under the current model
+        ll: array, shape (n_samples,)
+            Log-likelihood of each sample under the current model
         """
         Xr = X - self.mean_
         n_features = X.shape[1]
@@ -423,6 +423,24 @@ def score(self, X, y=None):
                           - fast_logdet(precision))
         return log_like
 
+    def score(self, X, y=None):
+        """Return the average log-likelihood of all samples
+
+        See. "Pattern Recognition and Machine Learning"
+        by C. Bishop, 12.2.1 p. 574
+
+        Parameters
+        ----------
+        X: array, shape(n_samples, n_features)
+            The data.
+
+        Returns
+        -------
+        ll: float
+            Average log-likelihood of the samples under the current model
+        """
+        return np.mean(self.score_samples(X))
+
 
 @deprecated("ProbabilisticPCA will be removed in 0.16. WARNING: The covariance"
             " estimation is NOT correct and is now moved to and corrected in"
diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
@@ -340,7 +340,7 @@ def test_pca_score():
     pca.fit(X)
     ll1 = pca.score(X)
     h = -0.5 * np.log(2 * np.pi * np.exp(1) * 0.1 ** 2) * p
-    np.testing.assert_almost_equal(ll1.mean() / h, 1, 0)
+    np.testing.assert_almost_equal(ll1 / h, 1, 0)
 
 
 def test_pca_score2():
@@ -352,13 +352,13 @@ def test_pca_score2():
     pca.fit(X)
     ll1 = pca.score(X)
     ll2 = pca.score(rng.randn(n, p) * .2 + np.array([3, 4, 5]))
-    assert_greater(ll1.mean(), ll2.mean())
+    assert_greater(ll1, ll2)
 
     # Test that it gives the same scores if whiten=True
     pca = PCA(n_components=2, whiten=True)
     pca.fit(X)
     ll2 = pca.score(X)
-    assert_array_almost_equal(ll1, ll2)
+    assert_almost_equal(ll1, ll2)
 
 
 def test_pca_score3():
@@ -373,7 +373,7 @@ def test_pca_score3():
     for k in range(p):
         pca = PCA(n_components=k)
         pca.fit(Xl)
-        ll[k] = pca.score(Xt).mean()
+        ll[k] = pca.score(Xt)
 
     assert_true(ll.argmax() == 1)