Skip to content

Commit a7047ca

Browse files
paolo-losiFabian Pedregosa
authored andcommitted
liblinear bias/intercept handling
1 parent 4784e58 commit a7047ca

File tree

7 files changed

+100
-17
lines changed

7 files changed

+100
-17
lines changed

scikits/learn/linear_model/logistic.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,17 @@ class LogisticRegression(BaseLibLinear, ClassifierMixin):
2828
Specifies if a constant (a.k.a. bias or intercept) should be
2929
added the decision function
3030
31+
intercept_scaling : float, default: 1
32+
when self.fit_intercept is True, instance vector x becomes
33+
[x, self.intercept_scaling],
34+
i.e. a "synthetic" feature with constant value equals to
35+
intercept_scaling is appended to the instance vector.
36+
The intercept becomes intercept_scaling * synthetic feature weight
37+
Note! the synthetic feature weight is subject to l1/l2 regularization
38+
as all other features.
39+
To lessen the effect of regularization on synthetic feature weight
40+
(and therefore on the intercept) intercept_scaling has to be increased
41+
3142
Attributes
3243
----------
3344
@@ -56,11 +67,11 @@ class LogisticRegression(BaseLibLinear, ClassifierMixin):
5667
"""
5768

5869
def __init__(self, penalty='l2', dual=False, eps=1e-4, C=1.0,
59-
fit_intercept=True):
70+
fit_intercept=True, intercept_scaling=1):
6071

6172
super(LogisticRegression, self).__init__ (penalty=penalty,
6273
dual=dual, loss='lr', eps=eps, C=C,
63-
fit_intercept=fit_intercept)
74+
fit_intercept=fit_intercept, intercept_scaling=intercept_scaling)
6475

6576
def predict_proba(self, X):
6677
"""

scikits/learn/linear_model/sparse/logistic.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,17 @@ class LogisticRegression(SparseBaseLibLinear, ClassifierMixin):
3535
Specifies if a constant (a.k.a. bias or intercept) should be
3636
added the decision function
3737
38+
intercept_scaling : float, default: 1
39+
when self.fit_intercept is True, instance vector x becomes
40+
[x, self.intercept_scaling],
41+
i.e. a "synthetic" feature with constant value equals to
42+
intercept_scaling is appended to the instance vector.
43+
The intercept becomes intercept_scaling * synthetic feature weight
44+
Note! the synthetic feature weight is subject to l1/l2 regularization
45+
as all other features.
46+
To lessen the effect of regularization on synthetic feature weight
47+
(and therefore on the intercept) intercept_scaling has to be increased
48+
3849
Attributes
3950
----------
4051
@@ -63,11 +74,11 @@ class LogisticRegression(SparseBaseLibLinear, ClassifierMixin):
6374
"""
6475

6576
def __init__(self, penalty='l2', dual=False, eps=1e-4, C=1.0,
66-
fit_intercept=True):
77+
fit_intercept=True, intercept_scaling=1):
6778

6879
super(LogisticRegression, self).__init__ (penalty=penalty,
6980
dual=dual, loss='lr', eps=eps, C=C,
70-
fit_intercept=fit_intercept)
81+
fit_intercept=fit_intercept, intercept_scaling=intercept_scaling)
7182

7283
def predict_proba(self, X):
7384
"""

scikits/learn/svm/base.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -309,13 +309,14 @@ class BaseLibLinear(BaseEstimator):
309309
}
310310

311311
def __init__(self, penalty='l2', loss='l2', dual=True, eps=1e-4, C=1.0,
312-
multi_class=False, fit_intercept=True):
312+
multi_class=False, fit_intercept=True, intercept_scaling=1):
313313
self.penalty = penalty
314314
self.loss = loss
315315
self.dual = dual
316316
self.eps = eps
317317
self.C = C
318318
self.fit_intercept = fit_intercept
319+
self.intercept_scaling = intercept_scaling
319320
self.multi_class = multi_class
320321

321322
# Check that the arguments given are valid:
@@ -395,19 +396,19 @@ def predict(self, X):
395396

396397
def _check_n_features(self, X):
397398
n_features = self.raw_coef_.shape[1]
398-
if self.fit_intercept > 0: n_features -= 1
399+
if self.fit_intercept: n_features -= 1
399400
if X.shape[1] != n_features:
400401
raise ValueError("X.shape[1] should be %d, not %d." % (n_features,
401402
X.shape[1]))
402403
@property
403404
def intercept_(self):
404-
if self.fit_intercept > 0:
405-
return self.raw_coef_[:,-1]
405+
if self.fit_intercept:
406+
return self.intercept_scaling * self.raw_coef_[:,-1]
406407
return 0.0
407408

408409
@property
409410
def coef_(self):
410-
if self.fit_intercept > 0:
411+
if self.fit_intercept:
411412
return self.raw_coef_[:,:-1]
412413
return self.raw_coef_
413414

@@ -416,14 +417,11 @@ def predict_proba(self, T):
416417
raise NotImplementedError(
417418
'liblinear does not provide this functionality')
418419

419-
420420
def _get_bias(self):
421-
"""
422-
Due to some pecularities in libliner, parameter bias must be a
423-
double indicating if the intercept should be computed:
424-
positive for true, negative for false
425-
"""
426-
return int (self.fit_intercept) - .5
421+
if self.fit_intercept:
422+
return self.intercept_scaling
423+
else:
424+
return -1.0
427425

428426

429427
set_verbosity_wrap(0)

scikits/learn/svm/liblinear.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,17 @@ class LinearSVC(BaseLibLinear, ClassifierMixin):
3434
perform multi-class SVM by Cramer and Singer. If active,
3535
options loss, penalty and dual will be ignored.
3636
37+
intercept_scaling : float, default: 1
38+
when self.fit_intercept is True, instance vector x becomes
39+
[x, self.intercept_scaling],
40+
i.e. a "synthetic" feature with constant value equals to
41+
intercept_scaling is appended to the instance vector.
42+
The intercept becomes intercept_scaling * synthetic feature weight
43+
Note! the synthetic feature weight is subject to l1/l2 regularization
44+
as all other features.
45+
To lessen the effect of regularization on synthetic feature weight
46+
(and therefore on the intercept) intercept_scaling has to be increased
47+
3748
Attributes
3849
----------
3950
`coef_` : array, shape = [n_features] if n_classes == 2 else [n_classes, n_features]

scikits/learn/svm/sparse/liblinear.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,17 @@ class LinearSVC(SparseBaseLibLinear, ClassifierMixin):
3232
Select the algorithm to either solve the dual or primal
3333
optimization problem.
3434
35+
intercept_scaling : float, default: 1
36+
when self.fit_intercept is True, instance vector x becomes
37+
[x, self.intercept_scaling],
38+
i.e. a "synthetic" feature with constant value equals to
39+
intercept_scaling is appended to the instance vector.
40+
The intercept becomes intercept_scaling * synthetic feature weight
41+
Note! the synthetic feature weight is subject to l1/l2 regularization
42+
as all other features.
43+
To lessen the effect of regularization on synthetic feature weight
44+
(and therefore on the intercept) intercept_scaling has to be increased
45+
3546
Attributes
3647
----------
3748
`coef_` : array, shape = [n_features] if n_classes == 2 else [n_classes, n_features]

scikits/learn/svm/tests/test_sparse.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from nose.tools import assert_raises
88
from scikits.learn.datasets.samples_generator import test_dataset_classif
9+
from . import test_svm
910

1011
# test sample 1
1112
X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
@@ -143,6 +144,12 @@ def test_sample_weights():
143144
clf.fit(X, Y, sample_weight=sample_weight)
144145
assert_array_equal(clf.predict(X[2]), [2.])
145146

147+
def test_sparse_liblinear_intercept_handling():
148+
"""
149+
Test that sparse liblinear honours intercept_scaling param
150+
"""
151+
test_svm.test_dense_liblinear_intercept_handling(svm.sparse.LinearSVC)
152+
146153

147154
if __name__ == '__main__':
148155
import nose

scikits/learn/svm/tests/test_svm.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -346,7 +346,7 @@ def test_LinearSVC():
346346
assert clf.fit_intercept
347347

348348
assert_array_equal(clf.predict(T), true_result)
349-
assert_array_almost_equal(clf.intercept_, [0], decimal=5)
349+
assert_array_almost_equal(clf.intercept_, [0], decimal=3)
350350

351351
# the same with l1 penalty
352352
clf = svm.LinearSVC(penalty='l1', dual=False).fit(X, Y)
@@ -368,6 +368,40 @@ def test_LinearSVC_iris():
368368
clf = svm.LinearSVC().fit(iris.data, iris.target)
369369
assert np.mean(clf.predict(iris.data) == iris.target) > 0.95
370370

371+
def test_dense_liblinear_intercept_handling(classifier=svm.LinearSVC):
372+
"""
373+
Test that dense liblinear honours intercept_scaling param
374+
"""
375+
X = [[2, 1],
376+
[3, 1],
377+
[1, 3],
378+
[2, 3]]
379+
y = [0, 0, 1, 1]
380+
clf = classifier(fit_intercept=True, penalty='l1', loss='l2',
381+
dual=False, C=1, eps=1e-7)
382+
assert clf.intercept_scaling == 1, clf.intercept_scaling
383+
assert clf.fit_intercept
384+
385+
# when intercept_scaling is low the intercept value is highly "penalized"
386+
# by regularization
387+
clf.intercept_scaling = 1
388+
clf.fit(X, y)
389+
assert_almost_equal(clf.intercept_, 0, decimal=5)
390+
391+
# when intercept_scaling is sufficiently high, the intercept value
392+
# is not affected by regularization
393+
clf.intercept_scaling = 100
394+
clf.fit(X, y)
395+
intercept1 = clf.intercept_
396+
assert intercept1 > 1
397+
398+
# when intercept_scaling is sufficiently high, the intercept value
399+
# doesn't depend on intercept_scaling value
400+
clf.intercept_scaling = 1000
401+
clf.fit(X, y)
402+
intercept2 = clf.intercept_
403+
assert_array_almost_equal(intercept1, intercept2, decimal=2)
404+
371405

372406
if __name__ == '__main__':
373407
import nose

0 commit comments

Comments
 (0)