liblinear bias/intercept handling

paolo-losi · Fabian Pedregosa · commit a7047ca22bd3 · 2010-12-16T12:36:15.000+01:00
diff --git a/scikits/learn/linear_model/logistic.py b/scikits/learn/linear_model/logistic.py
@@ -28,6 +28,17 @@ class LogisticRegression(BaseLibLinear, ClassifierMixin):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added the decision function
 
+    intercept_scaling : float, default: 1
+        when self.fit_intercept is True, instance vector x becomes
+        [x, self.intercept_scaling],
+        i.e. a "synthetic" feature with constant value equals to
+        intercept_scaling is appended to the instance vector.
+        The intercept becomes intercept_scaling * synthetic feature weight
+        Note! the synthetic feature weight is subject to l1/l2 regularization
+        as all other features.
+        To lessen the effect of regularization on synthetic feature weight
+        (and therefore on the intercept) intercept_scaling has to be increased
+
     Attributes
     ----------
 
@@ -56,11 +67,11 @@ class LogisticRegression(BaseLibLinear, ClassifierMixin):
     """
 
     def __init__(self, penalty='l2', dual=False, eps=1e-4, C=1.0,
-                 fit_intercept=True):
+                 fit_intercept=True, intercept_scaling=1):
 
         super(LogisticRegression, self).__init__ (penalty=penalty,
             dual=dual, loss='lr', eps=eps, C=C,
-            fit_intercept=fit_intercept)
+            fit_intercept=fit_intercept, intercept_scaling=intercept_scaling)
 
     def predict_proba(self, X):
         """
diff --git a/scikits/learn/linear_model/sparse/logistic.py b/scikits/learn/linear_model/sparse/logistic.py
@@ -35,6 +35,17 @@ class LogisticRegression(SparseBaseLibLinear, ClassifierMixin):
         Specifies if a constant (a.k.a. bias or intercept) should be
         added the decision function
 
+    intercept_scaling : float, default: 1
+        when self.fit_intercept is True, instance vector x becomes
+        [x, self.intercept_scaling],
+        i.e. a "synthetic" feature with constant value equals to
+        intercept_scaling is appended to the instance vector.
+        The intercept becomes intercept_scaling * synthetic feature weight
+        Note! the synthetic feature weight is subject to l1/l2 regularization
+        as all other features.
+        To lessen the effect of regularization on synthetic feature weight
+        (and therefore on the intercept) intercept_scaling has to be increased
+
     Attributes
     ----------
 
@@ -63,11 +74,11 @@ class LogisticRegression(SparseBaseLibLinear, ClassifierMixin):
     """
 
     def __init__(self, penalty='l2', dual=False, eps=1e-4, C=1.0,
-                 fit_intercept=True):
+                 fit_intercept=True, intercept_scaling=1):
 
         super(LogisticRegression, self).__init__ (penalty=penalty,
             dual=dual, loss='lr', eps=eps, C=C,
-            fit_intercept=fit_intercept)
+            fit_intercept=fit_intercept, intercept_scaling=intercept_scaling)
 
     def predict_proba(self, X):
         """
diff --git a/scikits/learn/svm/base.py b/scikits/learn/svm/base.py
@@ -309,13 +309,14 @@ class BaseLibLinear(BaseEstimator):
         }
 
     def __init__(self, penalty='l2', loss='l2', dual=True, eps=1e-4, C=1.0,
-                 multi_class=False, fit_intercept=True):
+                 multi_class=False, fit_intercept=True, intercept_scaling=1):
         self.penalty = penalty
         self.loss = loss
         self.dual = dual
         self.eps = eps
         self.C = C
         self.fit_intercept = fit_intercept
+        self.intercept_scaling = intercept_scaling
         self.multi_class = multi_class
 
         # Check that the arguments given are valid:
@@ -395,19 +396,19 @@ def predict(self, X):
 
     def _check_n_features(self, X):
         n_features = self.raw_coef_.shape[1]
-        if self.fit_intercept > 0: n_features -= 1
+        if self.fit_intercept: n_features -= 1
         if X.shape[1] != n_features:
             raise ValueError("X.shape[1] should be %d, not %d." % (n_features,
                                                                    X.shape[1]))
     @property
     def intercept_(self):
-        if self.fit_intercept > 0:
-            return self.raw_coef_[:,-1]
+        if self.fit_intercept:
+            return self.intercept_scaling * self.raw_coef_[:,-1]
         return 0.0
 
     @property
     def coef_(self):
-        if self.fit_intercept > 0:
+        if self.fit_intercept:
             return self.raw_coef_[:,:-1]
         return self.raw_coef_
 
@@ -416,14 +417,11 @@ def predict_proba(self, T):
         raise NotImplementedError(
                 'liblinear does not provide this functionality')
 
-
     def _get_bias(self):
-        """
-        Due to some pecularities in libliner, parameter bias must be a
-        double indicating if the intercept should be computed:
-        positive for true, negative for false
-        """
-        return int  (self.fit_intercept) - .5
+        if self.fit_intercept:
+            return self.intercept_scaling
+        else:
+            return -1.0
 
 
 set_verbosity_wrap(0)
diff --git a/scikits/learn/svm/liblinear.py b/scikits/learn/svm/liblinear.py
@@ -34,6 +34,17 @@ class LinearSVC(BaseLibLinear, ClassifierMixin):
          perform multi-class SVM by Cramer and Singer. If active,
          options loss, penalty and dual will be ignored.
 
+    intercept_scaling : float, default: 1
+        when self.fit_intercept is True, instance vector x becomes
+        [x, self.intercept_scaling],
+        i.e. a "synthetic" feature with constant value equals to
+        intercept_scaling is appended to the instance vector.
+        The intercept becomes intercept_scaling * synthetic feature weight
+        Note! the synthetic feature weight is subject to l1/l2 regularization
+        as all other features.
+        To lessen the effect of regularization on synthetic feature weight
+        (and therefore on the intercept) intercept_scaling has to be increased
+
     Attributes
     ----------
     `coef_` : array, shape = [n_features] if n_classes == 2 else [n_classes, n_features]
diff --git a/scikits/learn/svm/sparse/liblinear.py b/scikits/learn/svm/sparse/liblinear.py
@@ -32,6 +32,17 @@ class LinearSVC(SparseBaseLibLinear, ClassifierMixin):
         Select the algorithm to either solve the dual or primal
         optimization problem.
 
+    intercept_scaling : float, default: 1
+        when self.fit_intercept is True, instance vector x becomes
+        [x, self.intercept_scaling],
+        i.e. a "synthetic" feature with constant value equals to
+        intercept_scaling is appended to the instance vector.
+        The intercept becomes intercept_scaling * synthetic feature weight
+        Note! the synthetic feature weight is subject to l1/l2 regularization
+        as all other features.
+        To lessen the effect of regularization on synthetic feature weight
+        (and therefore on the intercept) intercept_scaling has to be increased
+
     Attributes
     ----------
     `coef_` : array, shape = [n_features] if n_classes == 2 else [n_classes, n_features]
diff --git a/scikits/learn/svm/tests/test_sparse.py b/scikits/learn/svm/tests/test_sparse.py
@@ -6,6 +6,7 @@
 
 from nose.tools import assert_raises
 from scikits.learn.datasets.samples_generator import test_dataset_classif
+from . import test_svm
 
 # test sample 1
 X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
@@ -143,6 +144,12 @@ def test_sample_weights():
     clf.fit(X, Y, sample_weight=sample_weight)
     assert_array_equal(clf.predict(X[2]), [2.])
 
+def test_sparse_liblinear_intercept_handling():
+    """
+    Test that sparse liblinear honours intercept_scaling param
+    """
+    test_svm.test_dense_liblinear_intercept_handling(svm.sparse.LinearSVC)
+
 
 if __name__ == '__main__':
     import nose
diff --git a/scikits/learn/svm/tests/test_svm.py b/scikits/learn/svm/tests/test_svm.py
@@ -346,7 +346,7 @@ def test_LinearSVC():
     assert clf.fit_intercept
 
     assert_array_equal(clf.predict(T), true_result)
-    assert_array_almost_equal(clf.intercept_, [0], decimal=5)
+    assert_array_almost_equal(clf.intercept_, [0], decimal=3)
 
     # the same with l1 penalty
     clf = svm.LinearSVC(penalty='l1', dual=False).fit(X, Y)
@@ -368,6 +368,40 @@ def test_LinearSVC_iris():
     clf = svm.LinearSVC().fit(iris.data, iris.target)
     assert np.mean(clf.predict(iris.data) == iris.target) > 0.95
 
+def test_dense_liblinear_intercept_handling(classifier=svm.LinearSVC):
+    """
+    Test that dense liblinear honours intercept_scaling param
+    """
+    X = [[2, 1],
+         [3, 1],
+         [1, 3],
+         [2, 3]]
+    y = [0, 0, 1, 1]
+    clf = classifier(fit_intercept=True, penalty='l1', loss='l2',
+                     dual=False, C=1, eps=1e-7)
+    assert clf.intercept_scaling == 1, clf.intercept_scaling
+    assert clf.fit_intercept
+
+    # when intercept_scaling is low the intercept value is highly "penalized"
+    # by regularization
+    clf.intercept_scaling = 1
+    clf.fit(X, y)
+    assert_almost_equal(clf.intercept_, 0, decimal=5)
+
+    # when intercept_scaling is sufficiently high, the intercept value
+    # is not affected by regularization
+    clf.intercept_scaling = 100
+    clf.fit(X, y)
+    intercept1 = clf.intercept_
+    assert intercept1 > 1
+
+    # when intercept_scaling is sufficiently high, the intercept value
+    # doesn't depend on intercept_scaling value
+    clf.intercept_scaling = 1000
+    clf.fit(X, y)
+    intercept2 = clf.intercept_
+    assert_array_almost_equal(intercept1, intercept2, decimal=2)
+
 
 if __name__ == '__main__':
     import nose