COSMIT in cross-validation tests

amueller · amueller · commit d173ab11d8ea · 2012-12-02T19:29:11.000Z
diff --git a/sklearn/tests/test_cross_validation.py b/sklearn/tests/test_cross_validation.py
@@ -35,12 +35,14 @@ def __init__(self, a=0):
     def fit(self, X, Y=None, sample_weight=None, class_prior=None):
         if sample_weight is not None:
             assert_true(sample_weight.shape[0] == X.shape[0],
-            'MockClassifier extra fit_param sample_weight.shape[0] is {0}, '
-            'should be {1}'.format(sample_weight.shape[0], X.shape[0]))
+                        'MockClassifier extra fit_param sample_weight.shape[0]'
+                        ' is {0}, should be {1}'.format(sample_weight.shape[0],
+                                                        X.shape[0]))
         if class_prior is not None:
             assert_true(class_prior.shape[0] == len(np.unique(y)),
-            'MockClassifier extra fit_param class_prior.shape[0] is {0}, '
-            'should be {1}'.format(class_prior.shape[0], len(np.unique(y))))
+                        'MockClassifier extra fit_param class_prior.shape[0]'
+                        ' is {0}, should be {1}'.format(class_prior.shape[0],
+                                                        len(np.unique(y))))
         return self
 
     def predict(self, T):
@@ -144,26 +146,23 @@ def test_stratified_shuffle_split_init():
 
 
 def test_stratified_shuffle_split_iter():
-    ys = [
-        np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
-        np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
-        np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2]),
-        np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
-        np.array([-1] * 800 + [1] * 50)
-        ]
+    ys = [np.array([1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3]),
+          np.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3]),
+          np.array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2]),
+          np.array([1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4]),
+          np.array([-1] * 800 + [1] * 50)
+          ]
 
     for y in ys:
         sss = cval.StratifiedShuffleSplit(y, 6, test_size=0.33,
                                           random_state=0, indices=True)
         for train, test in sss:
             assert_array_equal(unique(y[train]), unique(y[test]))
             # Checks if folds keep classes proportions
-            p_train = np.bincount(
-                unique(y[train], return_inverse=True)[1]
-                ) / float(len(y[train]))
-            p_test = np.bincount(
-                unique(y[test], return_inverse=True)[1]
-                ) / float(len(y[test]))
+            p_train = (np.bincount(unique(y[train], return_inverse=True)[1]) /
+                       float(len(y[train])))
+            p_test = (np.bincount(unique(y[test], return_inverse=True)[1]) /
+                      float(len(y[test])))
             assert_array_almost_equal(p_train, p_test, 1)
             assert_equal(y[train].size + y[test].size, y.size)
             assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
@@ -245,27 +244,26 @@ class BrokenEstimator:
 
 def test_train_test_split_errors():
     assert_raises(ValueError, cval.train_test_split)
+    assert_raises(ValueError, cval.train_test_split, range(3), train_size=1.1)
+    assert_raises(ValueError, cval.train_test_split, range(3), test_size=0.6,
+                  train_size=0.6)
     assert_raises(ValueError, cval.train_test_split, range(3),
-            train_size=1.1)
+                  test_size=np.float32(0.6), train_size=np.float32(0.6))
     assert_raises(ValueError, cval.train_test_split, range(3),
-            test_size=0.6, train_size=0.6)
-    assert_raises(ValueError, cval.train_test_split, range(3),
-            test_size=np.float32(0.6), train_size=np.float32(0.6))
-    assert_raises(ValueError, cval.train_test_split, range(3),
-            test_size="wrong_type")
-    assert_raises(ValueError, cval.train_test_split, range(3),
-            test_size=2, train_size=4)
+                  test_size="wrong_type")
+    assert_raises(ValueError, cval.train_test_split, range(3), test_size=2,
+                  train_size=4)
     assert_raises(TypeError, cval.train_test_split, range(3),
-            some_argument=1.1)
+                  some_argument=1.1)
     assert_raises(ValueError, cval.train_test_split, range(3), range(42))
 
 
 def test_train_test_split():
     X = np.arange(100).reshape((10, 10))
     X_s = coo_matrix(X)
     y = range(10)
-    X_train, X_test, X_s_train, X_s_test, y_train, y_test = \
-            cval.train_test_split(X, X_s, y)
+    split = cval.train_test_split(X, X_s, y)
+    X_train, X_test, X_s_train, X_s_test, y_train, y_test = split
     assert_array_equal(X_train, X_s_train.toarray())
     assert_array_equal(X_test, X_s_test.toarray())
     assert_array_equal(X_train[:, 0], y_train * 10)
@@ -283,13 +281,13 @@ def test_cross_val_score_with_score_func_classification():
     # Correct classification score (aka. zero / one score) - should be the
     # same as the default estimator score
     zo_scores = cval.cross_val_score(clf, iris.data, iris.target,
-            score_func=zero_one_score, cv=5)
+                                     score_func=zero_one_score, cv=5)
     assert_array_almost_equal(zo_scores, [1., 0.97, 0.90, 0.97, 1.], 2)
 
     # F1 score (class are balanced so f1_score should be equal to zero/one
     # score
     f1_scores = cval.cross_val_score(clf, iris.data, iris.target,
-            score_func=f1_score, cv=5)
+                                     score_func=f1_score, cv=5)
     assert_array_almost_equal(f1_scores, [1., 0.97, 0.90, 0.97, 1.], 2)
 
 
@@ -309,13 +307,13 @@ def test_cross_val_score_with_score_func_regression():
 
     # Mean squared error
     mse_scores = cval.cross_val_score(reg, X, y, cv=5,
-            score_func=mean_squared_error)
+                                      score_func=mean_squared_error)
     expected_mse = np.array([763.07, 553.16, 274.38, 273.26, 1681.99])
     assert_array_almost_equal(mse_scores, expected_mse, 2)
 
     # Explained variance
     ev_scores = cval.cross_val_score(reg, X, y, cv=5,
-            score_func=explained_variance_score)
+                                     score_func=explained_variance_score)
     assert_array_almost_equal(ev_scores, [0.94, 0.97, 0.97, 0.99, 0.92], 2)
 
 
@@ -353,7 +351,7 @@ def test_permutation_score():
     y = np.mod(np.arange(len(y)), 3)
 
     score, scores, pvalue = cval.permutation_test_score(svm, X, y,
-            zero_one_score, cv)
+                                                        zero_one_score, cv)
 
     assert_less(score, 0.5)
     assert_greater(pvalue, 0.4)
@@ -411,11 +409,10 @@ def test_shufflesplit_errors():
     assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=2.0)
     assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=1.0)
     assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=0.1,
-            train_size=0.95)
+                  train_size=0.95)
     assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=11)
     assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=10)
-    assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=8,
-            train_size=3)
+    assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=8, train_size=3)
     assert_raises(ValueError, cval.ShuffleSplit, 10, train_size=1j)