netconstructor
diff --git a/‎.gitattributes‎
Lines changed: 1 addition & 0 deletions b/‎.gitattributes‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.travis.yml‎
Lines changed: 4 additions & 3 deletions b/‎.travis.yml‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎benchmarks/bench_covertype.py‎
Lines changed: 39 additions & 60 deletions b/‎benchmarks/bench_covertype.py‎
Lines changed: 39 additions & 60 deletions
diff --git a/‎benchmarks/bench_glm.py‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/bench_glm.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/bench_glmnet.py‎
Lines changed: 13 additions & 13 deletions b/‎benchmarks/bench_glmnet.py‎
Lines changed: 13 additions & 13 deletions
diff --git a/‎benchmarks/bench_lasso.py‎
Lines changed: 6 additions & 8 deletions b/‎benchmarks/bench_lasso.py‎
Lines changed: 6 additions & 8 deletions
@@ -16,6 +16,7 @@
 /sklearn/utils/arraybuilder.c -diff
 /sklearn/utils/arrayfuncs.c -diff
 /sklearn/utils/graph_shortest_path.c -diff
+/sklearn/utils/lgamma.c -diff
 /sklearn/utils/murmurhash.c -diff
 /sklearn/utils/seq_dataset.c -diff
 /sklearn/utils/sparsefuncs.c -diff
 
@@ -1,10 +1,11 @@
 language: python
 python:
     - "2.7"
-virtualenv: 
-    system_site_packages: true
 before_install:
+    - deactivate
     - sudo apt-get update -qq
-    - sudo apt-get install -qq python-scipy
+    - sudo apt-get install -qq python-scipy python-nose
+    - virtualenv --system-site-packages ~/virtualenv/this
+    - source ~/virtualenv/this/bin/activate
 install: python setup.py build_ext --inplace
 script: make test
@@ -40,29 +40,33 @@
 [1] http://archive.ics.uci.edu/ml/datasets/Covertype
 
 """
-from __future__ import division
+from __future__ import division, print_function
 
-print __doc__
+print(__doc__)
 
-# Author: Peter Prettenhoer <[email protected]>
+# Author: Peter Prettenhofer <[email protected]>
 # License: BSD Style.
 
-# $Id$
-
-from time import time
+import logging
 import os
 import sys
-import numpy as np
+from time import time
 from optparse import OptionParser
 
+import numpy as np
+
+from sklearn.datasets import fetch_covtype
 from sklearn.svm import LinearSVC
 from sklearn.linear_model import SGDClassifier
 from sklearn.naive_bayes import GaussianNB
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
 from sklearn import metrics
 from sklearn.externals.joblib import Memory
-from sklearn.utils import check_random_state
+
+logging.basicConfig(level=logging.INFO,
+                    format='%(asctime)s %(levelname)s %(message)s')
+logger = logging.getLogger(__name__)
 
 op = OptionParser()
 op.add_option("--classifiers",
@@ -80,8 +84,7 @@
 # estimators.
 op.add_option("--random-seed",
               dest="random_seed", default=13, type=int,
-              help="Common seed used by random number generator."
-              )
+              help="Common seed used by random number generator.")
 
 op.print_help()
 
@@ -97,57 +100,31 @@
 joblib_cache_folder = os.path.join(bench_folder, 'bench_covertype_data')
 m = Memory(joblib_cache_folder, mmap_mode='r')
 
-# Set seed for rng
-rng = check_random_state(opts.random_seed)
-
 
 # Load the data, then cache and memmap the train/test split
 @m.cache
 def load_data(dtype=np.float32, order='F'):
-    ######################################################################
-    ## Download the data, if not already on disk
-    if not os.path.exists(original_archive):
-        # Download the data
-        import urllib
-        print "Downloading data, Please Wait (11MB)..."
-        opener = urllib.urlopen(
-            'http://archive.ics.uci.edu/ml/'
-            'machine-learning-databases/covtype/covtype.data.gz')
-        open(original_archive, 'wb').write(opener.read())
-
     ######################################################################
     ## Load dataset
     print("Loading dataset...")
-    import gzip
-    f = gzip.open(original_archive)
-    X = np.fromstring(f.read().replace(",", " "), dtype=dtype, sep=" ",
-                      count=-1)
-    X = X.reshape((581012, 55))
+    data = fetch_covtype(download_if_missing=True, shuffle=True,
+                         random_state=opts.random_seed)
+    X, y = data.data, data.target
     if order.lower() == 'f':
         X = np.asfortranarray(X)
-    f.close()
 
     # class 1 vs. all others.
-    y = np.ones(X.shape[0]) * -1
-    y[np.where(X[:, -1] == 1)] = 1
-    X = X[:, :-1]
+    y[np.where(y != 1)] = -1
 
     ######################################################################
     ## Create train-test split (as [Joachims, 2006])
-    print("Creating train-test split...")
-    idx = np.arange(X.shape[0])
-    rng.shuffle(idx)
-    train_idx = idx[:522911]
-    test_idx = idx[522911:]
+    logger.info("Creating train-test split...")
+    n_train = 522911
 
-    X_train = X[train_idx]
-    y_train = y[train_idx]
-    X_test = X[test_idx]
-    y_test = y[test_idx]
-
-    # free memory
-    del X
-    del y
+    X_train = X[:n_train]
+    y_train = y[:n_train]
+    X_test = X[n_train:]
+    y_test = y[n_train:]
 
     ######################################################################
     ## Standardize first 10 features (the numerical ones)
@@ -172,12 +149,14 @@ def load_data(dtype=np.float32, order='F'):
 print("%s %d" % ("number of classes:".ljust(25),
                  np.unique(y_train).shape[0]))
 print("%s %s" % ("data type:".ljust(25), X_train.dtype))
-print("%s %d (pos=%d, neg=%d, size=%dMB)" % ("number of train samples:".ljust(25),
-                          X_train.shape[0], np.sum(y_train == 1),
-                          np.sum(y_train == -1), int(X_train.nbytes / 1e6)))
-print("%s %d (pos=%d, neg=%d, size=%dMB)" % ("number of test samples:".ljust(25),
-                          X_test.shape[0], np.sum(y_test == 1),
-                          np.sum(y_test == -1), int(X_test.nbytes / 1e6)))
+print("%s %d (pos=%d, neg=%d, size=%dMB)"
+      % ("number of train samples:".ljust(25),
+         X_train.shape[0], np.sum(y_train == 1),
+         np.sum(y_train == -1), int(X_train.nbytes / 1e6)))
+print("%s %d (pos=%d, neg=%d, size=%dMB)"
+      % ("number of test samples:".ljust(25),
+      X_test.shape[0], np.sum(y_test == 1),
+      np.sum(y_test == -1), int(X_test.nbytes / 1e6)))
 
 
 classifiers = dict()
@@ -204,7 +183,7 @@ def benchmark(clf):
     'dual': False,
     'tol': 1e-3,
     "random_state": opts.random_seed,
-    }
+}
 classifiers['liblinear'] = LinearSVC(**liblinear_parameters)
 
 ######################################################################
@@ -218,7 +197,7 @@ def benchmark(clf):
     'n_iter': 2,
     'n_jobs': opts.n_jobs,
     "random_state": opts.random_seed,
-    }
+}
 classifiers['SGD'] = SGDClassifier(**sgd_parameters)
 
 ######################################################################
@@ -255,21 +234,21 @@ def benchmark(clf):
         op.error('classifier %r unknown' % name)
         sys.exit(1)
 
-print("")
+print()
 print("Training Classifiers")
 print("====================")
-print("")
+print()
 err, train_time, test_time = {}, {}, {}
 for name in sorted(selected_classifiers):
     print("Training %s ..." % name)
     err[name], train_time[name], test_time[name] = benchmark(classifiers[name])
 
 ######################################################################
 ## Print classification performance
-print("")
+print()
 print("Classification performance:")
 print("===========================")
-print("")
+print()
 
 
 def print_row(clf_type, train_time, test_time, err):
@@ -284,5 +263,5 @@ def print_row(clf_type, train_time, test_time, err):
 
 for name in sorted(selected_classifiers, key=lambda name: err[name]):
     print_row(name, train_time[name], test_time[name], err[name])
-print("")
-print("")
+print()
+print()
@@ -24,7 +24,7 @@
 
     for i in range(n_iter):
 
-        print 'Iteration %s of %s' % (i, n_iter)
+        print('Iteration %s of %s' % (i, n_iter))
 
         n_samples, n_features = 10 * i + 3, 10 * i + 3
 
 
@@ -38,9 +38,9 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef):
     delta = (time() - tstart)
     # stop time
 
-    print "duration: %0.3fs" % delta
-    print "rmse: %f" % rmse(Y_test, clf.predict(X_test))
-    print "mean coef abs diff: %f" % abs(ref_coef - clf.coef_.ravel()).mean()
+    print("duration: %0.3fs" % delta)
+    print("rmse: %f" % rmse(Y_test, clf.predict(X_test)))
+    print("mean coef abs diff: %f" % abs(ref_coef - clf.coef_.ravel()).mean())
     return delta
 
 
@@ -58,9 +58,9 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef):
     n_informative = n_features / 10
     n_test_samples = 1000
     for i in range(1, n + 1):
-        print '=================='
-        print 'Iteration %s of %s' % (i, n)
-        print '=================='
+        print('==================')
+        print('Iteration %s of %s' % (i, n))
+        print('==================')
 
         X, Y, coef_ = make_regression(
             n_samples=(i * step) + n_test_samples, n_features=n_features,
@@ -71,9 +71,9 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef):
         X = X[:(i * step)]
         Y = Y[:(i * step)]
 
-        print "benching scikit: "
+        print("benching scikit-learn: ")
         scikit_results.append(bench(ScikitLasso, X, Y, X_test, Y_test, coef_))
-        print "benching glmnet: "
+        print("benching glmnet: ")
         glmnet_results.append(bench(GlmnetLasso, X, Y, X_test, Y_test, coef_))
 
     pl.clf()
@@ -96,9 +96,9 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef):
     n_samples = 500
 
     for i in range(1, n + 1):
-        print '=================='
-        print 'Iteration %02d of %02d' % (i, n)
-        print '=================='
+        print('==================')
+        print('Iteration %02d of %02d' % (i, n))
+        print('==================')
         n_features = i * step
         n_informative = n_features / 10
 
@@ -111,9 +111,9 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef):
         X = X[:n_samples]
         Y = Y[:n_samples]
 
-        print "benching scikit: "
+        print("benching scikit-learn: ")
         scikit_results.append(bench(ScikitLasso, X, Y, X_test, Y_test, coef_))
-        print "benching glmnet: "
+        print("benching glmnet: ")
         glmnet_results.append(bench(GlmnetLasso, X, Y, X_test, Y_test, coef_))
 
     xx = np.arange(100, 100 + n * step, step)
 
@@ -19,20 +19,18 @@
 
 
 def compute_bench(alpha, n_samples, n_features, precompute):
-
     lasso_results = []
     lars_lasso_results = []
 
-    n_test_samples = 0
     it = 0
 
     for ns in n_samples:
         for nf in n_features:
             it += 1
-            print '=================='
-            print 'Iteration %s of %s' % (it, max(len(n_samples),
-                                          len(n_features)))
-            print '=================='
+            print('==================')
+            print('Iteration %s of %s' % (it, max(len(n_samples),
+                                          len(n_features))))
+            print('==================')
             n_informative = nf // 10
             X, Y, coef_ = make_regression(n_samples=ns, n_features=nf,
                                           n_informative=n_informative,
@@ -41,15 +39,15 @@ def compute_bench(alpha, n_samples, n_features, precompute):
             X /= np.sqrt(np.sum(X ** 2, axis=0))  # Normalize data
 
             gc.collect()
-            print "- benching Lasso"
+            print("- benching Lasso")
             clf = Lasso(alpha=alpha, fit_intercept=False,
                         precompute=precompute)
             tstart = time()
             clf.fit(X, Y)
             lasso_results.append(time() - tstart)
 
             gc.collect()
-            print "- benching LassoLars"
+            print("- benching LassoLars")
             clf = LassoLars(alpha=alpha, fit_intercept=False,
                             normalize=False, precompute=precompute)
             tstart = time()