|
22 | 22 | data = np.column_stack([X, y]) |
23 | 23 |
|
24 | 24 | # Add some faulty data |
25 | | -outliers = np.array((10, 30, 200)) |
26 | | -data[outliers[0], :] = (1000, 1000) |
27 | | -data[outliers[1], :] = (-1000, -1000) |
28 | | -data[outliers[2], :] = (-100, -50) |
| 25 | +rng = np.random.RandomState(1000) |
| 26 | +outliers = np.unique(rng.randint(len(X), size=200)) |
| 27 | +data[outliers, :] += 50 + rng.rand(len(outliers), 2) * 10 |
29 | 28 |
|
30 | 29 | X = data[:, 0][:, np.newaxis] |
31 | 30 | y = data[:, 1] |
@@ -90,13 +89,16 @@ def test_ransac_max_trials(): |
90 | 89 | random_state=0) |
91 | 90 | assert_raises(ValueError, ransac_estimator.fit, X, y) |
92 | 91 |
|
93 | | - ransac_estimator = RANSACRegressor(base_estimator, min_samples=2, |
94 | | - residual_threshold=5, max_trials=11, |
95 | | - random_state=0) |
96 | | - assert getattr(ransac_estimator, 'n_trials_', None) is None |
97 | | - ransac_estimator.fit(X, y) |
98 | | - assert_equal(ransac_estimator.n_trials_, 2) |
99 | | - |
| 92 | + # there is a 1e-9 chance it will take these many trials. No good reason |
| 93 | + # 1e-2 isn't enough, can still happen |
| 94 | + # 2 is the what ransac defines as min_samples = X.shape[1] + 1 |
| 95 | + max_trials = _dynamic_max_trials( |
| 96 | + len(X) - len(outliers), X.shape[0], 2, 1 - 1e-9) |
| 97 | + ransac_estimator = RANSACRegressor(base_estimator, min_samples=2) |
| 98 | + for i in range(50): |
| 99 | + ransac_estimator.set_params(min_samples=2, random_state=i) |
| 100 | + ransac_estimator.fit(X, y) |
| 101 | + assert_less(ransac_estimator.n_trials_, max_trials + 1) |
100 | 102 |
|
101 | 103 | def test_ransac_stop_n_inliers(): |
102 | 104 | base_estimator = LinearRegression() |
@@ -383,6 +385,7 @@ def test_ransac_residual_metric(): |
383 | 385 | assert_array_almost_equal(ransac_estimator0.predict(X), |
384 | 386 | ransac_estimator2.predict(X)) |
385 | 387 |
|
| 388 | + |
386 | 389 | def test_ransac_residual_loss(): |
387 | 390 | loss_multi1 = lambda y_true, y_pred: np.sum(np.abs(y_true - y_pred), axis=1) |
388 | 391 | loss_multi2 = lambda y_true, y_pred: np.sum((y_true - y_pred) ** 2, axis=1) |
|
0 commit comments