Skip to content

Commit 011c0b3

Browse files
TobiasMadsenglemaitre
authored andcommitted
[MRG+1] Allow vector pseudocounts Multinomial NB (scikit-learn#10346) (scikit-learn#10350)
1 parent 0390b28 commit 011c0b3

File tree

3 files changed

+54
-4
lines changed

3 files changed

+54
-4
lines changed

doc/whats_new/v0.20.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ Classifiers and regressors
221221
callable and b) the input to the NearestNeighbors model is sparse.
222222
:issue:`9579` by :user:`Thomas Kober <tttthomasssss>`.
223223

224+
- Fixed a bug in :class:`naive_bayes.MultinomialNB` which did not accept vector
225+
valued pseudocounts (alpha).
226+
:issue:`10346` by :user:`Tobias Madsen <TobiasMadsen>`
227+
224228
- Fixed a bug in :class:`svm.SVC` where when the argument ``kernel`` is
225229
unicode in Python2, the ``predict_proba`` method was raising an
226230
unexpected TypeError given dense inputs.

sklearn/naive_bayes.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -468,13 +468,17 @@ def _update_class_log_prior(self, class_prior=None):
468468
self.class_log_prior_ = np.zeros(n_classes) - np.log(n_classes)
469469

470470
def _check_alpha(self):
471-
if self.alpha < 0:
471+
if np.min(self.alpha) < 0:
472472
raise ValueError('Smoothing parameter alpha = %.1e. '
473-
'alpha should be > 0.' % self.alpha)
474-
if self.alpha < _ALPHA_MIN:
473+
'alpha should be > 0.' % np.min(self.alpha))
474+
if isinstance(self.alpha, np.ndarray):
475+
if not self.alpha.shape[0] == self.feature_count_.shape[1]:
476+
raise ValueError("alpha should be a scalar or a numpy array "
477+
"with shape [n_features]")
478+
if np.min(self.alpha) < _ALPHA_MIN:
475479
warnings.warn('alpha too small will result in numeric errors, '
476480
'setting alpha = %.1e' % _ALPHA_MIN)
477-
return _ALPHA_MIN
481+
return np.maximum(self.alpha, _ALPHA_MIN)
478482
return self.alpha
479483

480484
def partial_fit(self, X, y, classes=None, sample_weight=None):

sklearn/tests/test_naive_bayes.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -668,3 +668,45 @@ def test_alpha():
668668
X, y, classes=[0, 1])
669669
assert_raise_message(ValueError, expected_msg, m_nb.partial_fit,
670670
X, y, classes=[0, 1])
671+
672+
673+
def test_alpha_vector():
674+
X = np.array([[1, 0], [1, 1]])
675+
y = np.array([0, 1])
676+
677+
# Setting alpha=np.array with same length
678+
# as number of features should be fine
679+
alpha = np.array([1, 2])
680+
nb = MultinomialNB(alpha=alpha)
681+
nb.partial_fit(X, y, classes=[0, 1])
682+
683+
# Test feature probabilities uses pseudo-counts (alpha)
684+
feature_prob = np.array([[1 / 2, 1 / 2], [2 / 5, 3 / 5]])
685+
assert_array_almost_equal(nb.feature_log_prob_, np.log(feature_prob))
686+
687+
# Test predictions
688+
prob = np.array([[5 / 9, 4 / 9], [25 / 49, 24 / 49]])
689+
assert_array_almost_equal(nb.predict_proba(X), prob)
690+
691+
# Test alpha non-negative
692+
alpha = np.array([1., -0.1])
693+
expected_msg = ('Smoothing parameter alpha = -1.0e-01. '
694+
'alpha should be > 0.')
695+
m_nb = MultinomialNB(alpha=alpha)
696+
assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y)
697+
698+
# Test that too small pseudo-counts are replaced
699+
ALPHA_MIN = 1e-10
700+
alpha = np.array([ALPHA_MIN / 2, 0.5])
701+
m_nb = MultinomialNB(alpha=alpha)
702+
m_nb.partial_fit(X, y, classes=[0, 1])
703+
assert_array_almost_equal(m_nb._check_alpha(),
704+
[ALPHA_MIN, 0.5],
705+
decimal=12)
706+
707+
# Test correct dimensions
708+
alpha = np.array([1., 2., 3.])
709+
m_nb = MultinomialNB(alpha=alpha)
710+
expected_msg = ('alpha should be a scalar or a numpy array '
711+
'with shape [n_features]')
712+
assert_raise_message(ValueError, expected_msg, m_nb.fit, X, y)

0 commit comments

Comments
 (0)