|
| 1 | +""" |
| 2 | +Comparison between grid search and successive halving |
| 3 | +===================================================== |
| 4 | +
|
| 5 | +This example compares the parameter search performed by |
| 6 | +:class:`~sklearn.model_selection.HalvingGridSearchCV` and |
| 7 | +:class:`~sklearn.model_selection.GridSearchCV`. |
| 8 | +
|
| 9 | +""" |
| 10 | +from time import time |
| 11 | + |
| 12 | +import matplotlib.pyplot as plt |
| 13 | +import numpy as np |
| 14 | +import pandas as pd |
| 15 | + |
| 16 | +from sklearn.svm import SVC |
| 17 | +from sklearn import datasets |
| 18 | +from sklearn.model_selection import GridSearchCV |
| 19 | +from sklearn.experimental import enable_successive_halving # noqa |
| 20 | +from sklearn.model_selection import HalvingGridSearchCV |
| 21 | + |
| 22 | + |
| 23 | +print(__doc__) |
| 24 | + |
| 25 | +# %% |
| 26 | +# We first define the parameter space for an :class:`~sklearn.svm.SVC` |
| 27 | +# estimator, and compute the time required to train a |
| 28 | +# :class:`~sklearn.model_selection.HalvingGridSearchCV` instance, as well as a |
| 29 | +# :class:`~sklearn.model_selection.GridSearchCV` instance. |
| 30 | + |
| 31 | +rng = np.random.RandomState(0) |
| 32 | +X, y = datasets.make_classification(n_samples=1000, random_state=rng) |
| 33 | + |
| 34 | +gammas = [1e-1, 1e-2, 1e-3, 1e-4, 1e-5, 1e-6, 1e-7] |
| 35 | +Cs = [1, 10, 100, 1e3, 1e4, 1e5] |
| 36 | +param_grid = {'gamma': gammas, 'C': Cs} |
| 37 | + |
| 38 | +clf = SVC(random_state=rng) |
| 39 | + |
| 40 | +tic = time() |
| 41 | +gsh = HalvingGridSearchCV(estimator=clf, param_grid=param_grid, factor=2, |
| 42 | + random_state=rng) |
| 43 | +gsh.fit(X, y) |
| 44 | +gsh_time = time() - tic |
| 45 | + |
| 46 | +tic = time() |
| 47 | +gs = GridSearchCV(estimator=clf, param_grid=param_grid) |
| 48 | +gs.fit(X, y) |
| 49 | +gs_time = time() - tic |
| 50 | + |
| 51 | +# %% |
| 52 | +# We now plot heatmaps for both search estimators. |
| 53 | + |
| 54 | + |
| 55 | +def make_heatmap(ax, gs, is_sh=False, make_cbar=False): |
| 56 | + """Helper to make a heatmap.""" |
| 57 | + results = pd.DataFrame.from_dict(gs.cv_results_) |
| 58 | + results['params_str'] = results.params.apply(str) |
| 59 | + if is_sh: |
| 60 | + # SH dataframe: get mean_test_score values for the highest iter |
| 61 | + scores_matrix = results.sort_values('iter').pivot_table( |
| 62 | + index='param_gamma', columns='param_C', |
| 63 | + values='mean_test_score', aggfunc='last' |
| 64 | + ) |
| 65 | + else: |
| 66 | + scores_matrix = results.pivot(index='param_gamma', columns='param_C', |
| 67 | + values='mean_test_score') |
| 68 | + |
| 69 | + im = ax.imshow(scores_matrix) |
| 70 | + |
| 71 | + ax.set_xticks(np.arange(len(Cs))) |
| 72 | + ax.set_xticklabels(['{:.0E}'.format(x) for x in Cs]) |
| 73 | + ax.set_xlabel('C', fontsize=15) |
| 74 | + |
| 75 | + ax.set_yticks(np.arange(len(gammas))) |
| 76 | + ax.set_yticklabels(['{:.0E}'.format(x) for x in gammas]) |
| 77 | + ax.set_ylabel('gamma', fontsize=15) |
| 78 | + |
| 79 | + # Rotate the tick labels and set their alignment. |
| 80 | + plt.setp(ax.get_xticklabels(), rotation=45, ha="right", |
| 81 | + rotation_mode="anchor") |
| 82 | + |
| 83 | + if is_sh: |
| 84 | + iterations = results.pivot_table(index='param_gamma', |
| 85 | + columns='param_C', values='iter', |
| 86 | + aggfunc='max').values |
| 87 | + for i in range(len(gammas)): |
| 88 | + for j in range(len(Cs)): |
| 89 | + ax.text(j, i, iterations[i, j], |
| 90 | + ha="center", va="center", color="w", fontsize=20) |
| 91 | + |
| 92 | + if make_cbar: |
| 93 | + fig.subplots_adjust(right=0.8) |
| 94 | + cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7]) |
| 95 | + fig.colorbar(im, cax=cbar_ax) |
| 96 | + cbar_ax.set_ylabel('mean_test_score', rotation=-90, va="bottom", |
| 97 | + fontsize=15) |
| 98 | + |
| 99 | + |
| 100 | +fig, axes = plt.subplots(ncols=2, sharey=True) |
| 101 | +ax1, ax2 = axes |
| 102 | + |
| 103 | +make_heatmap(ax1, gsh, is_sh=True) |
| 104 | +make_heatmap(ax2, gs, make_cbar=True) |
| 105 | + |
| 106 | +ax1.set_title('Successive Halving\ntime = {:.3f}s'.format(gsh_time), |
| 107 | + fontsize=15) |
| 108 | +ax2.set_title('GridSearch\ntime = {:.3f}s'.format(gs_time), fontsize=15) |
| 109 | + |
| 110 | +plt.show() |
| 111 | + |
| 112 | +# %% |
| 113 | +# The heatmaps show the mean test score of the parameter combinations for an |
| 114 | +# :class:`~sklearn.svm.SVC` instance. The |
| 115 | +# :class:`~sklearn.model_selection.HalvingGridSearchCV` also shows the |
| 116 | +# iteration at which the combinations where last used. The combinations marked |
| 117 | +# as ``0`` were only evaluated at the first iteration, while the ones with |
| 118 | +# ``5`` are the parameter combinations that are considered the best ones. |
| 119 | +# |
| 120 | +# We can see that the :class:`~sklearn.model_selection.HalvingGridSearchCV` |
| 121 | +# class is able to find parameter combinations that are just as accurate as |
| 122 | +# :class:`~sklearn.model_selection.GridSearchCV`, in much less time. |
0 commit comments