ronnix
diff --git a/‎examples/glm/plot_lasso_lars.py‎
Lines changed: 17 additions & 2 deletions b/‎examples/glm/plot_lasso_lars.py‎
Lines changed: 17 additions & 2 deletions
diff --git a/‎scikits/learn/benchmarks/bench_lasso.py‎
Lines changed: 103 additions & 0 deletions b/‎scikits/learn/benchmarks/bench_lasso.py‎
Lines changed: 103 additions & 0 deletions
@@ -24,14 +24,29 @@
 # someting's wrong with our dataset
 X[:, 6] = -X[:, 6]
 
+
+m, n = 200, 200
+np.random.seed(0)
+X = np.random.randn(m, n)
+y = np.random.randn(m)
+
+
+_xmean = X.mean(0)
+_ymean = y.mean(0)
+X = X - _xmean
+y = y - _ymean
+_norms = np.apply_along_axis (np.linalg.norm, 0, X)
+nonzeros = np.flatnonzero(_norms)
+X[:, nonzeros] /= _norms[nonzeros]
+
 ################################################################################
 # Demo path functions
 ################################################################################
 
-
+G = np.dot(X.T, X)
 print "Computing regularization path using the LARS ..."
 start = datetime.now()
-alphas, active, path = glm.lars_path(X, y, method='lasso', max_iter=12)
+alphas, active, path = glm.lars_path(X, y, Gram=G, method='lasso')
 print "This took ", datetime.now() - start
 
 alphas = np.sum(np.abs(path.T), axis=1)
 
@@ -0,0 +1,103 @@
+"""
+Benchmarks of Lasso vs LassoLARS
+
+First, we fix a training set and increase the number of
+samples. Then we plot the computation time as function of
+the number of samples.
+
+In the second benchmark, we increase the number of dimensions of the
+training set. Then we plot the computation time as function of
+the number of dimensions.
+
+In both cases, only 10% of the features are informative.
+"""
+import gc
+from time import time
+import numpy as np
+
+from bench_glm import make_data
+
+def bench(clf, X_train, Y_train, X_test, Y_test):
+    gc.collect()
+
+    # start time
+    tstart = time()
+    clf = clf.fit(X_train, Y_train)
+    delta = (time() - tstart)
+    # stop time
+
+    as_size = np.sum(np.abs(clf.coef_) > 0)
+    print "active set size: %s (%s %%)" % (as_size, float(as_size) / X_train.shape[1])
+    return delta
+
+def compute_bench(alpha, n_samples, n_features):
+
+    def LassoFactory(alpha):
+        return Lasso(alpha=alpha, fit_intercept=False)
+
+    def LassoLARSFactory(alpha):
+        return LassoLARS(alpha=alpha, normalize=False)
+        # return LassoLARS(alpha=alpha, fit_intercept=False, normalize=False)
+
+    lasso_results = []
+    larslasso_results = []
+
+    n_tests = 1000
+    it = 0
+
+    for ns in n_samples:
+        for nf in n_features:
+            it += 1
+            print '============'
+            print 'Iteration %s' % it
+            print '============'
+            k = nf // 10
+            X, Y, X_test, Y_test, coef_ = make_data(
+                n_samples=ns, n_tests=n_tests, n_features=nf,
+                noise=0.1, k=k)
+
+            X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data
+
+            print "benching Lasso: "
+            lasso_results.append(bench(LassoFactory(alpha),
+                                                X, Y, X_test, Y_test))
+            print "benching LassoLARS: "
+            larslasso_results.append(bench(LassoLARSFactory(alpha),
+                                                X, Y, X_test, Y_test))
+
+    return lasso_results, larslasso_results
+
+if __name__ == '__main__':
+    from scikits.learn.glm import Lasso, LassoLARS
+    import pylab as pl
+
+    alpha = 0.01 # regularization parameter
+
+    n_features = 500
+    list_n_samples = range(500, 10001, 500);
+    lasso_results, larslasso_results = compute_bench(alpha, list_n_samples,
+                                                                [n_features])
+
+    pl.close('all')
+    pl.title('Lasso benchmark (%d features - alpha=%s)' % (n_features, alpha))
+    pl.plot(list_n_samples, lasso_results, 'b-', label='Lasso')
+    pl.plot(list_n_samples, larslasso_results,'r-', label='LassoLARS')
+    pl.legend()
+    pl.xlabel('number of samples')
+    pl.ylabel('time (in seconds)')
+    pl.show()
+
+    n_samples = 500
+    list_n_features = range(500, 3001, 500);
+    lasso_results, larslasso_results = compute_bench(alpha, [n_samples],
+                                                                list_n_features)
+
+    pl.figure()
+    pl.title('Lasso benchmark (%d samples - alpha=%s)' % (n_samples, alpha))
+    pl.plot(list_n_features, lasso_results, 'b-', label='Lasso')
+    pl.plot(list_n_features, larslasso_results,'r-', label='LassoLARS')
+    pl.legend()
+    pl.xlabel('number of features')
+    pl.ylabel('time (in seconds)')
+    pl.show()
+