|
| 1 | +""" |
| 2 | +============================================ |
| 3 | +Curve Fitting with Bayesian Ridge Regression |
| 4 | +============================================ |
| 5 | +
|
| 6 | +Computes a Bayesian Ridge Regression of Sinusoids. |
| 7 | +
|
| 8 | +See :ref:`bayesian_ridge_regression` for more information on the regressor. |
| 9 | +
|
| 10 | +In general, when fitting a curve with a polynomial by Bayesian ridge |
| 11 | +regression, the selection of initial values of |
| 12 | +the regularization parameters (alpha, lambda) may be important. |
| 13 | +This is because the regularization parameters are determined by an iterative |
| 14 | +procedure that depends on initial values. |
| 15 | +
|
| 16 | +In this example, the sinusoid is approximated by a polynomial using different |
| 17 | +pairs of initial values. |
| 18 | +
|
| 19 | +When starting from the default values (alpha_init = 1.90, lambda_init = 1.), |
| 20 | +the bias of the resulting curve is large, and the variance is small. |
| 21 | +So, lambda_init should be relatively small (1.e-3) so as to reduce the bias. |
| 22 | +
|
| 23 | +Also, by evaluating log marginal likelihood (L) of |
| 24 | +these models, we can determine which one is better. |
| 25 | +It can be concluded that the model with larger L is more likely. |
| 26 | +""" |
| 27 | +print(__doc__) |
| 28 | + |
| 29 | +# Author: Yoshihiro Uchida <[email protected]> |
| 30 | + |
| 31 | +import numpy as np |
| 32 | +import matplotlib.pyplot as plt |
| 33 | + |
| 34 | +from sklearn.linear_model import BayesianRidge |
| 35 | + |
| 36 | + |
| 37 | +def func(x): return np.sin(2*np.pi*x) |
| 38 | + |
| 39 | + |
| 40 | +# ############################################################################# |
| 41 | +# Generate sinusoidal data with noise |
| 42 | +size = 25 |
| 43 | +rng = np.random.RandomState(1234) |
| 44 | +x_train = rng.uniform(0., 1., size) |
| 45 | +y_train = func(x_train) + rng.normal(scale=0.1, size=size) |
| 46 | +x_test = np.linspace(0., 1., 100) |
| 47 | + |
| 48 | + |
| 49 | +# ############################################################################# |
| 50 | +# Fit by cubic polynomial |
| 51 | +n_order = 3 |
| 52 | +X_train = np.vander(x_train, n_order + 1, increasing=True) |
| 53 | +X_test = np.vander(x_test, n_order + 1, increasing=True) |
| 54 | + |
| 55 | +# ############################################################################# |
| 56 | +# Plot the true and predicted curves with log marginal likelihood (L) |
| 57 | +reg = BayesianRidge(tol=1e-6, fit_intercept=False, compute_score=True) |
| 58 | +fig, axes = plt.subplots(1, 2, figsize=(8, 4)) |
| 59 | +for i, ax in enumerate(axes): |
| 60 | + # Bayesian ridge regression with different initial value pairs |
| 61 | + if i == 0: |
| 62 | + init = [1 / np.var(y_train), 1.] # Default values |
| 63 | + elif i == 1: |
| 64 | + init = [1., 1e-3] |
| 65 | + reg.set_params(alpha_init=init[0], lambda_init=init[1]) |
| 66 | + reg.fit(X_train, y_train) |
| 67 | + ymean, ystd = reg.predict(X_test, return_std=True) |
| 68 | + |
| 69 | + ax.plot(x_test, func(x_test), color="blue", label="sin($2\\pi x$)") |
| 70 | + ax.scatter(x_train, y_train, s=50, alpha=0.5, label="observation") |
| 71 | + ax.plot(x_test, ymean, color="red", label="predict mean") |
| 72 | + ax.fill_between(x_test, ymean-ystd, ymean+ystd, |
| 73 | + color="pink", alpha=0.5, label="predict std") |
| 74 | + ax.set_ylim(-1.3, 1.3) |
| 75 | + ax.legend() |
| 76 | + title = "$\\alpha$_init$={:.2f},\\ \\lambda$_init$={}$".format( |
| 77 | + init[0], init[1]) |
| 78 | + if i == 0: |
| 79 | + title += " (Default)" |
| 80 | + ax.set_title(title, fontsize=12) |
| 81 | + text = "$\\alpha={:.1f}$\n$\\lambda={:.3f}$\n$L={:.1f}$".format( |
| 82 | + reg.alpha_, reg.lambda_, reg.scores_[-1]) |
| 83 | + ax.text(0.05, -1.0, text, fontsize=12) |
| 84 | + |
| 85 | +plt.tight_layout() |
| 86 | +plt.show() |
0 commit comments