|
| 1 | +""" |
| 2 | +Benchmarks of Lasso vs LassoLARS |
| 3 | +
|
| 4 | +First, we fix a training set and increase the number of |
| 5 | +samples. Then we plot the computation time as function of |
| 6 | +the number of samples. |
| 7 | +
|
| 8 | +In the second benchmark, we increase the number of dimensions of the |
| 9 | +training set. Then we plot the computation time as function of |
| 10 | +the number of dimensions. |
| 11 | +
|
| 12 | +In both cases, only 10% of the features are informative. |
| 13 | +""" |
| 14 | +import gc |
| 15 | +from time import time |
| 16 | +import numpy as np |
| 17 | + |
| 18 | +from bench_glm import make_data |
| 19 | + |
| 20 | +def bench(clf, X_train, Y_train, X_test, Y_test): |
| 21 | + gc.collect() |
| 22 | + |
| 23 | + # start time |
| 24 | + tstart = time() |
| 25 | + clf = clf.fit(X_train, Y_train) |
| 26 | + delta = (time() - tstart) |
| 27 | + # stop time |
| 28 | + |
| 29 | + as_size = np.sum(np.abs(clf.coef_) > 0) |
| 30 | + print "active set size: %s (%s %%)" % (as_size, float(as_size) / X_train.shape[1]) |
| 31 | + return delta |
| 32 | + |
| 33 | +def compute_bench(alpha, n_samples, n_features): |
| 34 | + |
| 35 | + def LassoFactory(alpha): |
| 36 | + return Lasso(alpha=alpha, fit_intercept=False) |
| 37 | + |
| 38 | + def LassoLARSFactory(alpha): |
| 39 | + return LassoLARS(alpha=alpha, normalize=False) |
| 40 | + # return LassoLARS(alpha=alpha, fit_intercept=False, normalize=False) |
| 41 | + |
| 42 | + lasso_results = [] |
| 43 | + larslasso_results = [] |
| 44 | + |
| 45 | + n_tests = 1000 |
| 46 | + it = 0 |
| 47 | + |
| 48 | + for ns in n_samples: |
| 49 | + for nf in n_features: |
| 50 | + it += 1 |
| 51 | + print '============' |
| 52 | + print 'Iteration %s' % it |
| 53 | + print '============' |
| 54 | + k = nf // 10 |
| 55 | + X, Y, X_test, Y_test, coef_ = make_data( |
| 56 | + n_samples=ns, n_tests=n_tests, n_features=nf, |
| 57 | + noise=0.1, k=k) |
| 58 | + |
| 59 | + X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data |
| 60 | + |
| 61 | + print "benching Lasso: " |
| 62 | + lasso_results.append(bench(LassoFactory(alpha), |
| 63 | + X, Y, X_test, Y_test)) |
| 64 | + print "benching LassoLARS: " |
| 65 | + larslasso_results.append(bench(LassoLARSFactory(alpha), |
| 66 | + X, Y, X_test, Y_test)) |
| 67 | + |
| 68 | + return lasso_results, larslasso_results |
| 69 | + |
| 70 | +if __name__ == '__main__': |
| 71 | + from scikits.learn.glm import Lasso, LassoLARS |
| 72 | + import pylab as pl |
| 73 | + |
| 74 | + alpha = 0.01 # regularization parameter |
| 75 | + |
| 76 | + n_features = 500 |
| 77 | + list_n_samples = range(500, 10001, 500); |
| 78 | + lasso_results, larslasso_results = compute_bench(alpha, list_n_samples, |
| 79 | + [n_features]) |
| 80 | + |
| 81 | + pl.close('all') |
| 82 | + pl.title('Lasso benchmark (%d features - alpha=%s)' % (n_features, alpha)) |
| 83 | + pl.plot(list_n_samples, lasso_results, 'b-', label='Lasso') |
| 84 | + pl.plot(list_n_samples, larslasso_results,'r-', label='LassoLARS') |
| 85 | + pl.legend() |
| 86 | + pl.xlabel('number of samples') |
| 87 | + pl.ylabel('time (in seconds)') |
| 88 | + pl.show() |
| 89 | + |
| 90 | + n_samples = 500 |
| 91 | + list_n_features = range(500, 3001, 500); |
| 92 | + lasso_results, larslasso_results = compute_bench(alpha, [n_samples], |
| 93 | + list_n_features) |
| 94 | + |
| 95 | + pl.figure() |
| 96 | + pl.title('Lasso benchmark (%d samples - alpha=%s)' % (n_samples, alpha)) |
| 97 | + pl.plot(list_n_features, lasso_results, 'b-', label='Lasso') |
| 98 | + pl.plot(list_n_features, larslasso_results,'r-', label='LassoLARS') |
| 99 | + pl.legend() |
| 100 | + pl.xlabel('number of features') |
| 101 | + pl.ylabel('time (in seconds)') |
| 102 | + pl.show() |
| 103 | + |
0 commit comments