Skip to content

Commit 36ad48e

Browse files
committed
ENH : add score samples to PCA
1 parent c1ad0c7 commit 36ad48e

File tree

3 files changed

+33
-14
lines changed

3 files changed

+33
-14
lines changed

doc/whats_new.rst

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,11 @@ Changelog
3232
API changes summary
3333
-------------------
3434

35-
- Add score method to PCA following the model of probabilistic PCA and
36-
deprecate ProbabilisticPCA model whose score implementation is not
37-
correct. The computation now also exploits the matrix inversion
38-
lemma for faster computation. By `Alexandre Gramfort`_.
35+
- Add score method to :class:`PCA <decomposition.PCA>` following the model of
36+
probabilistic PCA and deprecate
37+
:class:`ProbabilisticPCA <decomposition.ProbabilisticPCA>` model whose
38+
score implementation is not correct. The computation now also exploits the
39+
matrix inversion lemma for faster computation. By `Alexandre Gramfort`_.
3940

4041

4142
.. _changes_0_14:

sklearn/decomposition/pca.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -398,21 +398,21 @@ def inverse_transform(self, X):
398398
"""
399399
return fast_dot(X, self.components_) + self.mean_
400400

401-
def score(self, X, y=None):
402-
"""Return a score associated to new data
401+
def score_samples(self, X):
402+
"""Return the log-likelihood of each sample
403403
404404
See. "Pattern Recognition and Machine Learning"
405405
by C. Bishop, 12.2.1 p. 574
406406
407407
Parameters
408408
----------
409-
X: array of shape(n_samples, n_features)
410-
The data to test
409+
X: array, shape(n_samples, n_features)
410+
The data.
411411
412412
Returns
413413
-------
414-
ll: array of shape (n_samples),
415-
log-likelihood of each row of X under the current model
414+
ll: array, shape (n_samples,)
415+
Log-likelihood of each sample under the current model
416416
"""
417417
Xr = X - self.mean_
418418
n_features = X.shape[1]
@@ -423,6 +423,24 @@ def score(self, X, y=None):
423423
- fast_logdet(precision))
424424
return log_like
425425

426+
def score(self, X, y=None):
427+
"""Return the average log-likelihood of all samples
428+
429+
See. "Pattern Recognition and Machine Learning"
430+
by C. Bishop, 12.2.1 p. 574
431+
432+
Parameters
433+
----------
434+
X: array, shape(n_samples, n_features)
435+
The data.
436+
437+
Returns
438+
-------
439+
ll: float
440+
Average log-likelihood of the samples under the current model
441+
"""
442+
return np.mean(self.score_samples(X))
443+
426444

427445
@deprecated("ProbabilisticPCA will be removed in 0.16. WARNING: The covariance"
428446
" estimation is NOT correct and is now moved to and corrected in"

sklearn/decomposition/tests/test_pca.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ def test_pca_score():
340340
pca.fit(X)
341341
ll1 = pca.score(X)
342342
h = -0.5 * np.log(2 * np.pi * np.exp(1) * 0.1 ** 2) * p
343-
np.testing.assert_almost_equal(ll1.mean() / h, 1, 0)
343+
np.testing.assert_almost_equal(ll1 / h, 1, 0)
344344

345345

346346
def test_pca_score2():
@@ -352,13 +352,13 @@ def test_pca_score2():
352352
pca.fit(X)
353353
ll1 = pca.score(X)
354354
ll2 = pca.score(rng.randn(n, p) * .2 + np.array([3, 4, 5]))
355-
assert_greater(ll1.mean(), ll2.mean())
355+
assert_greater(ll1, ll2)
356356

357357
# Test that it gives the same scores if whiten=True
358358
pca = PCA(n_components=2, whiten=True)
359359
pca.fit(X)
360360
ll2 = pca.score(X)
361-
assert_array_almost_equal(ll1, ll2)
361+
assert_almost_equal(ll1, ll2)
362362

363363

364364
def test_pca_score3():
@@ -373,7 +373,7 @@ def test_pca_score3():
373373
for k in range(p):
374374
pca = PCA(n_components=k)
375375
pca.fit(Xl)
376-
ll[k] = pca.score(Xt).mean()
376+
ll[k] = pca.score(Xt)
377377

378378
assert_true(ll.argmax() == 1)
379379

0 commit comments

Comments
 (0)