Skip to content

Commit a9444bc

Browse files
committed
Pushing the docs to dev/ for branch: master, commit 0e19710fa47deb4ca814b94ed64713da09ff89ba
1 parent a280f59 commit a9444bc

File tree

1,008 files changed

+3064
-3047
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,008 files changed

+3064
-3047
lines changed
126 Bytes
Binary file not shown.
121 Bytes
Binary file not shown.

dev/_downloads/plot_outlier_detection.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"import numpy as np\nfrom scipy import stats\nimport matplotlib.pyplot as plt\nimport matplotlib.font_manager\n\nfrom sklearn import svm\nfrom sklearn.covariance import EllipticEnvelope\nfrom sklearn.ensemble import IsolationForest\nfrom sklearn.neighbors import LocalOutlierFactor\n\nprint(__doc__)\n\nrng = np.random.RandomState(42)\n\n# Example settings\nn_samples = 200\noutliers_fraction = 0.25\nclusters_separation = [0, 1, 2]\n\n# define two outlier detection tools to be compared\nclassifiers = {\n \"One-Class SVM\": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,\n kernel=\"rbf\", gamma=0.1),\n \"Robust covariance\": EllipticEnvelope(contamination=outliers_fraction),\n \"Isolation Forest\": IsolationForest(max_samples=n_samples,\n contamination=outliers_fraction,\n random_state=rng),\n \"Local Outlier Factor\": LocalOutlierFactor(\n n_neighbors=35,\n contamination=outliers_fraction)}\n\n# Compare given classifiers under given settings\nxx, yy = np.meshgrid(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100))\nn_inliers = int((1. - outliers_fraction) * n_samples)\nn_outliers = int(outliers_fraction * n_samples)\nground_truth = np.ones(n_samples, dtype=int)\nground_truth[-n_outliers:] = -1\n\n# Fit the problem with varying cluster separation\nfor i, offset in enumerate(clusters_separation):\n np.random.seed(42)\n # Data generation\n X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset\n X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset\n X = np.r_[X1, X2]\n # Add outliers\n X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 2))]\n\n # Fit the model\n plt.figure(figsize=(9, 7))\n for i, (clf_name, clf) in enumerate(classifiers.items()):\n # fit the data and tag outliers\n if clf_name == \"Local Outlier Factor\":\n y_pred = clf.fit_predict(X)\n scores_pred = clf.negative_outlier_factor_\n else:\n clf.fit(X)\n scores_pred = clf.decision_function(X)\n y_pred = clf.predict(X)\n threshold = stats.scoreatpercentile(scores_pred,\n 100 * outliers_fraction)\n n_errors = (y_pred != ground_truth).sum()\n # plot the levels lines and the points\n if clf_name == \"Local Outlier Factor\":\n # decision_function is private for LOF\n Z = clf._decision_function(np.c_[xx.ravel(), yy.ravel()])\n else:\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n subplot = plt.subplot(2, 2, i + 1)\n subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),\n cmap=plt.cm.Blues_r)\n a = subplot.contour(xx, yy, Z, levels=[threshold],\n linewidths=2, colors='red')\n subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],\n colors='orange')\n b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',\n s=20, edgecolor='k')\n c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',\n s=20, edgecolor='k')\n subplot.axis('tight')\n subplot.legend(\n [a.collections[0], b, c],\n ['learned decision function', 'true inliers', 'true outliers'],\n prop=matplotlib.font_manager.FontProperties(size=10),\n loc='lower right')\n subplot.set_xlabel(\"%d. %s (errors: %d)\" % (i + 1, clf_name, n_errors))\n subplot.set_xlim((-7, 7))\n subplot.set_ylim((-7, 7))\n plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)\n plt.suptitle(\"Outlier detection\")\n\nplt.show()"
29+
"import numpy as np\nfrom scipy import stats\nimport matplotlib.pyplot as plt\nimport matplotlib.font_manager\n\nfrom sklearn import svm\nfrom sklearn.covariance import EllipticEnvelope\nfrom sklearn.ensemble import IsolationForest\nfrom sklearn.neighbors import LocalOutlierFactor\n\nprint(__doc__)\n\nSEED = 42\nGRID_PRECISION = 100\n\nrng = np.random.RandomState(SEED)\n\n# Example settings\nn_samples = 200\noutliers_fraction = 0.25\nclusters_separation = (0, 1, 2)\n\n# define two outlier detection tools to be compared\nclassifiers = {\n \"One-Class SVM\": svm.OneClassSVM(nu=0.95 * outliers_fraction + 0.05,\n kernel=\"rbf\", gamma=0.1),\n \"Robust covariance\": EllipticEnvelope(contamination=outliers_fraction),\n \"Isolation Forest\": IsolationForest(max_samples=n_samples,\n contamination=outliers_fraction,\n random_state=rng),\n \"Local Outlier Factor\": LocalOutlierFactor(\n n_neighbors=35,\n contamination=outliers_fraction)}\n\n# Compare given classifiers under given settings\nxx, yy = np.meshgrid(np.linspace(-7, 7, GRID_PRECISION),\n np.linspace(-7, 7, GRID_PRECISION))\nn_outliers = int(outliers_fraction * n_samples)\nn_inliers = n_samples - n_outliers\nground_truth = np.ones(n_samples, dtype=int)\nground_truth[-n_outliers:] = -1\n\n# Fit the problem with varying cluster separation\nfor _, offset in enumerate(clusters_separation):\n np.random.seed(SEED)\n # Data generation\n X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset\n X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset\n X = np.concatenate([X1, X2], axis=0)\n # Add outliers\n X = np.concatenate([X, np.random.uniform(low=-6, high=6,\n size=(n_outliers, 2))], axis=0)\n\n # Fit the model\n plt.figure(figsize=(9, 7))\n for i, (clf_name, clf) in enumerate(classifiers.items()):\n # fit the data and tag outliers\n if clf_name == \"Local Outlier Factor\":\n y_pred = clf.fit_predict(X)\n scores_pred = clf.negative_outlier_factor_\n else:\n clf.fit(X)\n scores_pred = clf.decision_function(X)\n y_pred = clf.predict(X)\n threshold = stats.scoreatpercentile(scores_pred,\n 100 * outliers_fraction)\n n_errors = (y_pred != ground_truth).sum()\n # plot the levels lines and the points\n if clf_name == \"Local Outlier Factor\":\n # decision_function is private for LOF\n Z = clf._decision_function(np.c_[xx.ravel(), yy.ravel()])\n else:\n Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()])\n Z = Z.reshape(xx.shape)\n subplot = plt.subplot(2, 2, i + 1)\n subplot.contourf(xx, yy, Z, levels=np.linspace(Z.min(), threshold, 7),\n cmap=plt.cm.Blues_r)\n a = subplot.contour(xx, yy, Z, levels=[threshold],\n linewidths=2, colors='red')\n subplot.contourf(xx, yy, Z, levels=[threshold, Z.max()],\n colors='orange')\n b = subplot.scatter(X[:-n_outliers, 0], X[:-n_outliers, 1], c='white',\n s=20, edgecolor='k')\n c = subplot.scatter(X[-n_outliers:, 0], X[-n_outliers:, 1], c='black',\n s=20, edgecolor='k')\n subplot.axis('tight')\n subplot.legend(\n [a.collections[0], b, c],\n ['learned decision function', 'true inliers', 'true outliers'],\n prop=matplotlib.font_manager.FontProperties(size=10),\n loc='lower right')\n subplot.set_xlabel(\"%d. %s (errors: %d)\" % (i + 1, clf_name, n_errors))\n subplot.set_xlim((-7, 7))\n subplot.set_ylim((-7, 7))\n plt.subplots_adjust(0.04, 0.1, 0.96, 0.94, 0.1, 0.26)\n plt.suptitle(\"Outlier detection\")\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_outlier_detection.py

+13-8
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,15 @@
4343

4444
print(__doc__)
4545

46-
rng = np.random.RandomState(42)
46+
SEED = 42
47+
GRID_PRECISION = 100
48+
49+
rng = np.random.RandomState(SEED)
4750

4851
# Example settings
4952
n_samples = 200
5053
outliers_fraction = 0.25
51-
clusters_separation = [0, 1, 2]
54+
clusters_separation = (0, 1, 2)
5255

5356
# define two outlier detection tools to be compared
5457
classifiers = {
@@ -63,21 +66,23 @@
6366
contamination=outliers_fraction)}
6467

6568
# Compare given classifiers under given settings
66-
xx, yy = np.meshgrid(np.linspace(-7, 7, 100), np.linspace(-7, 7, 100))
67-
n_inliers = int((1. - outliers_fraction) * n_samples)
69+
xx, yy = np.meshgrid(np.linspace(-7, 7, GRID_PRECISION),
70+
np.linspace(-7, 7, GRID_PRECISION))
6871
n_outliers = int(outliers_fraction * n_samples)
72+
n_inliers = n_samples - n_outliers
6973
ground_truth = np.ones(n_samples, dtype=int)
7074
ground_truth[-n_outliers:] = -1
7175

7276
# Fit the problem with varying cluster separation
73-
for i, offset in enumerate(clusters_separation):
74-
np.random.seed(42)
77+
for _, offset in enumerate(clusters_separation):
78+
np.random.seed(SEED)
7579
# Data generation
7680
X1 = 0.3 * np.random.randn(n_inliers // 2, 2) - offset
7781
X2 = 0.3 * np.random.randn(n_inliers // 2, 2) + offset
78-
X = np.r_[X1, X2]
82+
X = np.concatenate([X1, X2], axis=0)
7983
# Add outliers
80-
X = np.r_[X, np.random.uniform(low=-6, high=6, size=(n_outliers, 2))]
84+
X = np.concatenate([X, np.random.uniform(low=-6, high=6,
85+
size=(n_outliers, 2))], axis=0)
8186

8287
# Fit the model
8388
plt.figure(figsize=(9, 7))

dev/_downloads/scikit-learn-docs.pdf

-781 Bytes
Binary file not shown.
805 Bytes
805 Bytes
-32 Bytes
-108 Bytes
-108 Bytes
228 Bytes
247 Bytes
-367 Bytes
12 Bytes
199 Bytes
-95 Bytes
-44 Bytes
-101 Bytes
-101 Bytes
-247 Bytes
-247 Bytes
-82 Bytes
-82 Bytes
21 Bytes
21 Bytes
2 Bytes
2 Bytes
-32 Bytes
-32 Bytes
83 Bytes
104 Bytes
153 Bytes
153 Bytes
-1.46 KB
-35 Bytes
5 Bytes
27 Bytes
-103 Bytes
-103 Bytes
-85 Bytes
-9 Bytes

dev/_sources/auto_examples/applications/plot_face_recognition.rst.txt

+19-19

dev/_sources/auto_examples/applications/plot_model_complexity_influence.rst.txt

+15-15

0 commit comments

Comments
 (0)