Skip to content

Commit 98cd909

Browse files
committed
Pushing the docs to dev/ for branch: master, commit c28ef9ef2a6980ecdb8a904c025fddf3bd477a7f
1 parent f9414ec commit 98cd909

File tree

1,093 files changed

+3484
-3495
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,093 files changed

+3484
-3495
lines changed
-292 Bytes
Binary file not shown.
-289 Bytes
Binary file not shown.

dev/_downloads/plot_changed_only_pprint_parameter.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn import set_config\n\n\nlr = LogisticRegression(penalty='l1')\nprint('Default representation:')\nprint(lr)\n# LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n# intercept_scaling=1, l1_ratio=None, max_iter=100,\n# multi_class='warn', n_jobs=None, penalty='l1',\n# random_state=None, solver='warn', tol=0.0001, verbose=0,\n# warm_start=False)\n\nset_config(print_changed_only=True)\nprint('\\nWith changed_only option:')\nprint(lr)\n# LogisticRegression(penalty='l1')"
29+
"print(__doc__)\n\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn import set_config\n\n\nlr = LogisticRegression(penalty='l1')\nprint('Default representation:')\nprint(lr)\n# LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n# intercept_scaling=1, l1_ratio=None, max_iter=100,\n# multi_class='auto', n_jobs=None, penalty='l1',\n# random_state=None, solver='warn', tol=0.0001, verbose=0,\n# warm_start=False)\n\nset_config(print_changed_only=True)\nprint('\\nWith changed_only option:')\nprint(lr)\n# LogisticRegression(penalty='l1')"
3030
]
3131
}
3232
],

dev/_downloads/plot_changed_only_pprint_parameter.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
print(lr)
2121
# LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
2222
# intercept_scaling=1, l1_ratio=None, max_iter=100,
23-
# multi_class='warn', n_jobs=None, penalty='l1',
23+
# multi_class='auto', n_jobs=None, penalty='l1',
2424
# random_state=None, solver='warn', tol=0.0001, verbose=0,
2525
# warm_start=False)
2626

dev/_downloads/plot_digits_classification_exercise.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\nfrom sklearn import datasets, neighbors, linear_model\n\ndigits = datasets.load_digits()\nX_digits = digits.data / digits.data.max()\ny_digits = digits.target\n\nn_samples = len(X_digits)\n\nX_train = X_digits[:int(.9 * n_samples)]\ny_train = y_digits[:int(.9 * n_samples)]\nX_test = X_digits[int(.9 * n_samples):]\ny_test = y_digits[int(.9 * n_samples):]\n\nknn = neighbors.KNeighborsClassifier()\nlogistic = linear_model.LogisticRegression(max_iter=1000,\n multi_class='multinomial')\n\nprint('KNN score: %f' % knn.fit(X_train, y_train).score(X_test, y_test))\nprint('LogisticRegression score: %f'\n % logistic.fit(X_train, y_train).score(X_test, y_test))"
29+
"print(__doc__)\n\nfrom sklearn import datasets, neighbors, linear_model\n\ndigits = datasets.load_digits()\nX_digits = digits.data / digits.data.max()\ny_digits = digits.target\n\nn_samples = len(X_digits)\n\nX_train = X_digits[:int(.9 * n_samples)]\ny_train = y_digits[:int(.9 * n_samples)]\nX_test = X_digits[int(.9 * n_samples):]\ny_test = y_digits[int(.9 * n_samples):]\n\nknn = neighbors.KNeighborsClassifier()\nlogistic = linear_model.LogisticRegression(max_iter=1000)\n\nprint('KNN score: %f' % knn.fit(X_train, y_train).score(X_test, y_test))\nprint('LogisticRegression score: %f'\n % logistic.fit(X_train, y_train).score(X_test, y_test))"
3030
]
3131
}
3232
],

dev/_downloads/plot_digits_classification_exercise.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@
2626
y_test = y_digits[int(.9 * n_samples):]
2727

2828
knn = neighbors.KNeighborsClassifier()
29-
logistic = linear_model.LogisticRegression(max_iter=1000,
30-
multi_class='multinomial')
29+
logistic = linear_model.LogisticRegression(max_iter=1000)
3130

3231
print('KNN score: %f' % knn.fit(X_train, y_train).score(X_test, y_test))
3332
print('LogisticRegression score: %f'

dev/_downloads/plot_iris_logistic.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn import datasets\n\n# import some data to play with\niris = datasets.load_iris()\nX = iris.data[:, :2] # we only take the first two features.\nY = iris.target\n\nlogreg = LogisticRegression(C=1e5, multi_class='multinomial')\n\n# Create an instance of Logistic Regression Classifier and fit the data.\nlogreg.fit(X, Y)\n\n# Plot the decision boundary. For that, we will assign a color to each\n# point in the mesh [x_min, x_max]x[y_min, y_max].\nx_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\ny_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\nh = .02 # step size in the mesh\nxx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\nZ = logreg.predict(np.c_[xx.ravel(), yy.ravel()])\n\n# Put the result into a color plot\nZ = Z.reshape(xx.shape)\nplt.figure(1, figsize=(4, 3))\nplt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)\n\n# Plot also the training points\nplt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)\nplt.xlabel('Sepal length')\nplt.ylabel('Sepal width')\n\nplt.xlim(xx.min(), xx.max())\nplt.ylim(yy.min(), yy.max())\nplt.xticks(())\nplt.yticks(())\n\nplt.show()"
29+
"print(__doc__)\n\n# Code source: Ga\u00ebl Varoquaux\n# Modified for documentation by Jaques Grobler\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn.linear_model import LogisticRegression\nfrom sklearn import datasets\n\n# import some data to play with\niris = datasets.load_iris()\nX = iris.data[:, :2] # we only take the first two features.\nY = iris.target\n\nlogreg = LogisticRegression(C=1e5)\n\n# Create an instance of Logistic Regression Classifier and fit the data.\nlogreg.fit(X, Y)\n\n# Plot the decision boundary. For that, we will assign a color to each\n# point in the mesh [x_min, x_max]x[y_min, y_max].\nx_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5\ny_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5\nh = .02 # step size in the mesh\nxx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))\nZ = logreg.predict(np.c_[xx.ravel(), yy.ravel()])\n\n# Put the result into a color plot\nZ = Z.reshape(xx.shape)\nplt.figure(1, figsize=(4, 3))\nplt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired)\n\n# Plot also the training points\nplt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired)\nplt.xlabel('Sepal length')\nplt.ylabel('Sepal width')\n\nplt.xlim(xx.min(), xx.max())\nplt.ylim(yy.min(), yy.max())\nplt.xticks(())\nplt.yticks(())\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_iris_logistic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
X = iris.data[:, :2] # we only take the first two features.
2929
Y = iris.target
3030

31-
logreg = LogisticRegression(C=1e5, multi_class='multinomial')
31+
logreg = LogisticRegression(C=1e5)
3232

3333
# Create an instance of Logistic Regression Classifier and fit the data.
3434
logreg.fit(X, Y)

dev/_downloads/plot_rbm_logistic_classification.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"print(__doc__)\n\n# Authors: Yann N. Dauphin, Vlad Niculae, Gabriel Synnaeve\n# License: BSD\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom scipy.ndimage import convolve\nfrom sklearn import linear_model, datasets, metrics\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.neural_network import BernoulliRBM\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.base import clone\n\n\n# #############################################################################\n# Setting up\n\ndef nudge_dataset(X, Y):\n \"\"\"\n This produces a dataset 5 times bigger than the original one,\n by moving the 8x8 images in X around by 1px to left, right, down, up\n \"\"\"\n direction_vectors = [\n [[0, 1, 0],\n [0, 0, 0],\n [0, 0, 0]],\n\n [[0, 0, 0],\n [1, 0, 0],\n [0, 0, 0]],\n\n [[0, 0, 0],\n [0, 0, 1],\n [0, 0, 0]],\n\n [[0, 0, 0],\n [0, 0, 0],\n [0, 1, 0]]]\n\n def shift(x, w):\n return convolve(x.reshape((8, 8)), mode='constant', weights=w).ravel()\n\n X = np.concatenate([X] +\n [np.apply_along_axis(shift, 1, X, vector)\n for vector in direction_vectors])\n Y = np.concatenate([Y for _ in range(5)], axis=0)\n return X, Y\n\n\n# Load Data\ndigits = datasets.load_digits()\nX = np.asarray(digits.data, 'float32')\nX, Y = nudge_dataset(X, digits.target)\nX = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001) # 0-1 scaling\n\nX_train, X_test, Y_train, Y_test = train_test_split(\n X, Y, test_size=0.2, random_state=0)\n\n# Models we will use\nlogistic = linear_model.LogisticRegression(solver='newton-cg', tol=1,\n multi_class='multinomial')\nrbm = BernoulliRBM(random_state=0, verbose=True)\n\nrbm_features_classifier = Pipeline(\n steps=[('rbm', rbm), ('logistic', logistic)])\n\n# #############################################################################\n# Training\n\n# Hyper-parameters. These were set by cross-validation,\n# using a GridSearchCV. Here we are not performing cross-validation to\n# save time.\nrbm.learning_rate = 0.06\nrbm.n_iter = 20\n# More components tend to give better prediction performance, but larger\n# fitting time\nrbm.n_components = 100\nlogistic.C = 6000\n\n# Training RBM-Logistic Pipeline\nrbm_features_classifier.fit(X_train, Y_train)\n\n# Training the Logistic regression classifier directly on the pixel\nraw_pixel_classifier = clone(logistic)\nraw_pixel_classifier.C = 100.\nraw_pixel_classifier.fit(X_train, Y_train)\n\n# #############################################################################\n# Evaluation\n\nY_pred = rbm_features_classifier.predict(X_test)\nprint(\"Logistic regression using RBM features:\\n%s\\n\" % (\n metrics.classification_report(Y_test, Y_pred)))\n\nY_pred = raw_pixel_classifier.predict(X_test)\nprint(\"Logistic regression using raw pixel features:\\n%s\\n\" % (\n metrics.classification_report(Y_test, Y_pred)))\n\n# #############################################################################\n# Plotting\n\nplt.figure(figsize=(4.2, 4))\nfor i, comp in enumerate(rbm.components_):\n plt.subplot(10, 10, i + 1)\n plt.imshow(comp.reshape((8, 8)), cmap=plt.cm.gray_r,\n interpolation='nearest')\n plt.xticks(())\n plt.yticks(())\nplt.suptitle('100 components extracted by RBM', fontsize=16)\nplt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)\n\nplt.show()"
29+
"print(__doc__)\n\n# Authors: Yann N. Dauphin, Vlad Niculae, Gabriel Synnaeve\n# License: BSD\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom scipy.ndimage import convolve\nfrom sklearn import linear_model, datasets, metrics\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.neural_network import BernoulliRBM\nfrom sklearn.pipeline import Pipeline\nfrom sklearn.base import clone\n\n\n# #############################################################################\n# Setting up\n\ndef nudge_dataset(X, Y):\n \"\"\"\n This produces a dataset 5 times bigger than the original one,\n by moving the 8x8 images in X around by 1px to left, right, down, up\n \"\"\"\n direction_vectors = [\n [[0, 1, 0],\n [0, 0, 0],\n [0, 0, 0]],\n\n [[0, 0, 0],\n [1, 0, 0],\n [0, 0, 0]],\n\n [[0, 0, 0],\n [0, 0, 1],\n [0, 0, 0]],\n\n [[0, 0, 0],\n [0, 0, 0],\n [0, 1, 0]]]\n\n def shift(x, w):\n return convolve(x.reshape((8, 8)), mode='constant', weights=w).ravel()\n\n X = np.concatenate([X] +\n [np.apply_along_axis(shift, 1, X, vector)\n for vector in direction_vectors])\n Y = np.concatenate([Y for _ in range(5)], axis=0)\n return X, Y\n\n\n# Load Data\ndigits = datasets.load_digits()\nX = np.asarray(digits.data, 'float32')\nX, Y = nudge_dataset(X, digits.target)\nX = (X - np.min(X, 0)) / (np.max(X, 0) + 0.0001) # 0-1 scaling\n\nX_train, X_test, Y_train, Y_test = train_test_split(\n X, Y, test_size=0.2, random_state=0)\n\n# Models we will use\nlogistic = linear_model.LogisticRegression(solver='newton-cg', tol=1)\nrbm = BernoulliRBM(random_state=0, verbose=True)\n\nrbm_features_classifier = Pipeline(\n steps=[('rbm', rbm), ('logistic', logistic)])\n\n# #############################################################################\n# Training\n\n# Hyper-parameters. These were set by cross-validation,\n# using a GridSearchCV. Here we are not performing cross-validation to\n# save time.\nrbm.learning_rate = 0.06\nrbm.n_iter = 20\n# More components tend to give better prediction performance, but larger\n# fitting time\nrbm.n_components = 100\nlogistic.C = 6000\n\n# Training RBM-Logistic Pipeline\nrbm_features_classifier.fit(X_train, Y_train)\n\n# Training the Logistic regression classifier directly on the pixel\nraw_pixel_classifier = clone(logistic)\nraw_pixel_classifier.C = 100.\nraw_pixel_classifier.fit(X_train, Y_train)\n\n# #############################################################################\n# Evaluation\n\nY_pred = rbm_features_classifier.predict(X_test)\nprint(\"Logistic regression using RBM features:\\n%s\\n\" % (\n metrics.classification_report(Y_test, Y_pred)))\n\nY_pred = raw_pixel_classifier.predict(X_test)\nprint(\"Logistic regression using raw pixel features:\\n%s\\n\" % (\n metrics.classification_report(Y_test, Y_pred)))\n\n# #############################################################################\n# Plotting\n\nplt.figure(figsize=(4.2, 4))\nfor i, comp in enumerate(rbm.components_):\n plt.subplot(10, 10, i + 1)\n plt.imshow(comp.reshape((8, 8)), cmap=plt.cm.gray_r,\n interpolation='nearest')\n plt.xticks(())\n plt.yticks(())\nplt.suptitle('100 components extracted by RBM', fontsize=16)\nplt.subplots_adjust(0.08, 0.02, 0.92, 0.85, 0.08, 0.23)\n\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_rbm_logistic_classification.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,7 @@ def shift(x, w):
8585
X, Y, test_size=0.2, random_state=0)
8686

8787
# Models we will use
88-
logistic = linear_model.LogisticRegression(solver='newton-cg', tol=1,
89-
multi_class='multinomial')
88+
logistic = linear_model.LogisticRegression(solver='newton-cg', tol=1)
9089
rbm = BernoulliRBM(random_state=0, verbose=True)
9190

9291
rbm_features_classifier = Pipeline(

dev/_downloads/plot_sgd_comparison.ipynb

+1-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
},
2727
"outputs": [],
2828
"source": [
29-
"# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\nrounds = 20\ndigits = datasets.load_digits()\nX, y = digits.data, digits.target\n\nclassifiers = [\n (\"SGD\", SGDClassifier(max_iter=100, tol=1e-3)),\n (\"ASGD\", SGDClassifier(average=True, max_iter=1000, tol=1e-3)),\n (\"Perceptron\", Perceptron(tol=1e-3)),\n (\"Passive-Aggressive I\", PassiveAggressiveClassifier(loss='hinge',\n C=1.0, tol=1e-4)),\n (\"Passive-Aggressive II\", PassiveAggressiveClassifier(loss='squared_hinge',\n C=1.0, tol=1e-4)),\n (\"SAG\", LogisticRegression(solver='sag', tol=1e-1, C=1.e4 / X.shape[0],\n multi_class='auto'))\n]\n\nxx = 1. - np.array(heldout)\n\nfor name, clf in classifiers:\n print(\"training %s\" % name)\n rng = np.random.RandomState(42)\n yy = []\n for i in heldout:\n yy_ = []\n for r in range(rounds):\n X_train, X_test, y_train, y_test = \\\n train_test_split(X, y, test_size=i, random_state=rng)\n clf.fit(X_train, y_train)\n y_pred = clf.predict(X_test)\n yy_.append(1 - np.mean(y_pred == y_test))\n yy.append(np.mean(yy_))\n plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
29+
"# Author: Rob Zinkov <rob at zinkov dot com>\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\nfrom sklearn import datasets\n\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.linear_model import SGDClassifier, Perceptron\nfrom sklearn.linear_model import PassiveAggressiveClassifier\nfrom sklearn.linear_model import LogisticRegression\n\nheldout = [0.95, 0.90, 0.75, 0.50, 0.01]\nrounds = 20\ndigits = datasets.load_digits()\nX, y = digits.data, digits.target\n\nclassifiers = [\n (\"SGD\", SGDClassifier(max_iter=100, tol=1e-3)),\n (\"ASGD\", SGDClassifier(average=True, max_iter=1000, tol=1e-3)),\n (\"Perceptron\", Perceptron(tol=1e-3)),\n (\"Passive-Aggressive I\", PassiveAggressiveClassifier(loss='hinge',\n C=1.0, tol=1e-4)),\n (\"Passive-Aggressive II\", PassiveAggressiveClassifier(loss='squared_hinge',\n C=1.0, tol=1e-4)),\n (\"SAG\", LogisticRegression(solver='sag', tol=1e-1, C=1.e4 / X.shape[0]))\n]\n\nxx = 1. - np.array(heldout)\n\nfor name, clf in classifiers:\n print(\"training %s\" % name)\n rng = np.random.RandomState(42)\n yy = []\n for i in heldout:\n yy_ = []\n for r in range(rounds):\n X_train, X_test, y_train, y_test = \\\n train_test_split(X, y, test_size=i, random_state=rng)\n clf.fit(X_train, y_train)\n y_pred = clf.predict(X_test)\n yy_.append(1 - np.mean(y_pred == y_test))\n yy.append(np.mean(yy_))\n plt.plot(xx, yy, label=name)\n\nplt.legend(loc=\"upper right\")\nplt.xlabel(\"Proportion train\")\nplt.ylabel(\"Test Error Rate\")\nplt.show()"
3030
]
3131
}
3232
],

dev/_downloads/plot_sgd_comparison.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,7 @@
3232
C=1.0, tol=1e-4)),
3333
("Passive-Aggressive II", PassiveAggressiveClassifier(loss='squared_hinge',
3434
C=1.0, tol=1e-4)),
35-
("SAG", LogisticRegression(solver='sag', tol=1e-1, C=1.e4 / X.shape[0],
36-
multi_class='auto'))
35+
("SAG", LogisticRegression(solver='sag', tol=1e-1, C=1.e4 / X.shape[0]))
3736
]
3837

3938
xx = 1. - np.array(heldout)

0 commit comments

Comments
 (0)