"print(__doc__)\n\n# Author: Gilles Louppe <
[email protected]>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.ensemble import BaggingRegressor\nfrom sklearn.tree import DecisionTreeRegressor\n\n# Settings\nn_repeat = 50 # Number of iterations for computing expectations\nn_train = 50 # Size of the training set\nn_test = 1000 # Size of the test set\nnoise = 0.1 # Standard deviation of the noise\nnp.random.seed(0)\n\n# Change this for exploring the bias-variance decomposition of other\n# estimators. This should work well for estimators with high variance (e.g.,\n# decision trees or KNN), but poorly for estimators with low variance (e.g.,\n# linear models).\nestimators = [(\"Tree\", DecisionTreeRegressor()),\n (\"Bagging(Tree)\", BaggingRegressor(DecisionTreeRegressor()))]\n\nn_estimators = len(estimators)\n\n\n# Generate data\ndef f(x):\n x = x.ravel()\n\n return np.exp(-x ** 2) + 1.5 * np.exp(-(x - 2) ** 2)\n\n\ndef generate(n_samples, noise, n_repeat=1):\n X = np.random.rand(n_samples) * 10 - 5\n X = np.sort(X)\n\n if n_repeat == 1:\n y = f(X) + np.random.normal(0.0, noise, n_samples)\n else:\n y = np.zeros((n_samples, n_repeat))\n\n for i in range(n_repeat):\n y[:, i] = f(X) + np.random.normal(0.0, noise, n_samples)\n\n X = X.reshape((n_samples, 1))\n\n return X, y\n\n\nX_train = []\ny_train = []\n\nfor i in range(n_repeat):\n X, y = generate(n_samples=n_train, noise=noise)\n X_train.append(X)\n y_train.append(y)\n\nX_test, y_test = generate(n_samples=n_test, noise=noise, n_repeat=n_repeat)\n\nplt.figure(figsize=(10, 8))\n\n# Loop over estimators to compare\nfor n, (name, estimator) in enumerate(estimators):\n # Compute predictions\n y_predict = np.zeros((n_test, n_repeat))\n\n for i in range(n_repeat):\n estimator.fit(X_train[i], y_train[i])\n y_predict[:, i] = estimator.predict(X_test)\n\n # Bias^2 + Variance + Noise decomposition of the mean squared error\n y_error = np.zeros(n_test)\n\n for i in range(n_repeat):\n for j in range(n_repeat):\n y_error += (y_test[:, j] - y_predict[:, i]) ** 2\n\n y_error /= (n_repeat * n_repeat)\n\n y_noise = np.var(y_test, axis=1)\n y_bias = (f(X_test) - np.mean(y_predict, axis=1)) ** 2\n y_var = np.var(y_predict, axis=1)\n\n print(\"{0}: {1:.4f} (error) = {2:.4f} (bias^2) \"\n \" + {3:.4f} (var) + {4:.4f} (noise)\".format(name,\n np.mean(y_error),\n np.mean(y_bias),\n np.mean(y_var),\n np.mean(y_noise)))\n\n # Plot figures\n plt.subplot(2, n_estimators, n + 1)\n plt.plot(X_test, f(X_test), \"b\", label=\"$f(x)$\")\n plt.plot(X_train[0], y_train[0], \".b\", label=\"LS ~ $y = f(x)+noise$\")\n\n for i in range(n_repeat):\n if i == 0:\n plt.plot(X_test, y_predict[:, i], \"r\", label=\"$\\^y(x)$\")\n else:\n plt.plot(X_test, y_predict[:, i], \"r\", alpha=0.05)\n\n plt.plot(X_test, np.mean(y_predict, axis=1), \"c\",\n label=\"$\\mathbb{E}_{LS} \\^y(x)$\")\n\n plt.xlim([-5, 5])\n plt.title(name)\n\n if n == n_estimators - 1:\n plt.legend(loc=(1.1, .5))\n\n plt.subplot(2, n_estimators, n_estimators + n + 1)\n plt.plot(X_test, y_error, \"r\", label=\"$error(x)$\")\n plt.plot(X_test, y_bias, \"b\", label=\"$bias^2(x)$\"),\n plt.plot(X_test, y_var, \"g\", label=\"$variance(x)$\"),\n plt.plot(X_test, y_noise, \"c\", label=\"$noise(x)$\")\n\n plt.xlim([-5, 5])\n plt.ylim([0, 0.1])\n\n if n == n_estimators - 1:\n\n plt.legend(loc=(1.1, .5))\n\nplt.subplots_adjust(right=.75)\nplt.show()"
0 commit comments