coreylynch
diff --git a/‎doc/modules/manifold.rst‎
Lines changed: 3 additions & 2 deletions b/‎doc/modules/manifold.rst‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎examples/manifold/plot_compare_methods.py‎
Lines changed: 3 additions & 0 deletions b/‎examples/manifold/plot_compare_methods.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎examples/manifold/plot_manifold_sphere.py‎
Lines changed: 127 additions & 0 deletions b/‎examples/manifold/plot_manifold_sphere.py‎
Lines changed: 127 additions & 0 deletions
diff --git a/‎sklearn/cluster/tests/test_k_means.py‎
Lines changed: 1 addition & 1 deletion b/‎sklearn/cluster/tests/test_k_means.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/feature_extraction/hashing.py‎
Lines changed: 1 addition & 2 deletions b/‎sklearn/feature_extraction/hashing.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎sklearn/manifold/spectral_embedding.py‎
Lines changed: 4 additions & 1 deletion b/‎sklearn/manifold/spectral_embedding.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎sklearn/manifold/tests/test_spectral_embedding.py‎
Lines changed: 16 additions & 36 deletions b/‎sklearn/manifold/tests/test_spectral_embedding.py‎
Lines changed: 16 additions & 36 deletions
@@ -359,7 +359,7 @@ tangent spaces to learn the embedding.  LTSA can be performed with function
    :target: ../auto_examples/manifold/plot_lle_digits.html
    :align: center
    :scale: 50
-   
+
 Complexity
 ----------
 
@@ -393,7 +393,8 @@ The overall complexity of standard LTSA is
 Multi-dimensional Scaling (MDS)
 ===============================
 
-Multidimensional scaling (:class:`MDS`) seeks a low-dimensional
+`Multidimensional scaling <http://en.wikipedia.org/wiki/Multidimensional_scaling>`_
+(:class:`MDS`) seeks a low-dimensional
 representation of the data in which the distances respect well the
 distances in the original high-dimensional space.
 
 
@@ -9,6 +9,9 @@
 For a discussion and comparison of these algorithms, see the
 :ref:`manifold module page <manifold>`
 
+For a similiar example, where the methods are applied to a
+sphere dataset, see :ref:`example_manifold_plot_manifold_sphere.py`
+
 Note that the purpose of the MDS is to find a low-dimensional
 representation of the data (here 2D) in which the distances respect well
 the distances in the original high-dimensional space, unlike other
 
@@ -0,0 +1,127 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+"""
+=========================================
+Manifold Learning methods on a severed sphere
+=========================================
+
+An application of the different :ref:`manifold` techniques
+on a spherical data-set. Here one can see the use of
+dimensionality reduction in order to gain some intuition
+regarding the Manifold learning methods. Regarding the dataset,
+the poles are cut from the sphere, as well as a thin slice down its
+side. This enables the manifold learning techniques to
+'spread it open' whilst projecting it onto two dimensions.
+
+For a similiar example, where the methods are applied to the
+S-curve dataset, see :ref:`example_manifold_plot_compare_methods.py`
+
+Note that the purpose of the :ref:`MDS <multidimensional_scaling>` is
+to find a low-dimensional representation of the data (here 2D) in
+which the distances respect well the distances in the original
+high-dimensional space, unlike other manifold-learning algorithms,
+it does not seeks an isotropic representation of the data in
+the low-dimensional space. Here the manifold problem matches fairly
+that of representing a flat map of the Earth, as with
+`map projection<http://en.wikipedia.org/wiki/Map_projection>`_
+"""
+
+# Author: Jaques Grobler <[email protected]>
+# License: BSD
+
+print __doc__
+
+from time import time
+
+import numpy as np
+import pylab as pl
+from mpl_toolkits.mplot3d import Axes3D
+from matplotlib.ticker import NullFormatter
+
+from sklearn import manifold
+from sklearn.metrics import euclidean_distances
+from sklearn.utils import check_random_state
+
+# Next line to silence pyflakes.
+Axes3D
+
+# Variables for manifold learning.
+n_neighbors = 10
+n_samples = 1000
+
+# Create our sphere.
+random_state = check_random_state(0)
+p = random_state.rand(n_samples) * (2 * np.pi - 0.55)
+t = random_state.rand(n_samples) * np.pi
+
+# Sever the poles from the sphere.
+indices = ((t < (np.pi - (np.pi / 8))) & (t > ((np.pi / 8))))
+colors = p[indices]
+x, y, z = np.sin(t[indices]) * np.cos(p[indices]), \
+    np.sin(t[indices]) * np.sin(p[indices]), \
+    np.cos(t[indices])
+
+# Plot our dataset.
+fig = pl.figure(figsize=(15, 8))
+pl.suptitle("Manifold Learning with %i points, %i neighbors"
+            % (1000, n_neighbors), fontsize=14)
+
+ax = fig.add_subplot(241, projection='3d')
+ax.scatter(x, y, z, c=p[indices], cmap=pl.cm.rainbow)
+try:
+    # compatibility matplotlib < 1.0
+    ax.view_init(40, -10)
+except:
+    pass
+
+sphere_data = np.array([x, y, z]).T
+
+# Perform Locally Linear Embedding Manifold learning
+methods = ['standard', 'ltsa', 'hessian', 'modified']
+labels = ['LLE', 'LTSA', 'Hessian LLE', 'Modified LLE']
+
+for i, method in enumerate(methods):
+    t0 = time()
+    trans_data = manifold\
+        .LocallyLinearEmbedding(n_neighbors, 2,
+                                method=method).fit_transform(sphere_data).T
+    t1 = time()
+    print "%s: %.2g sec" % (methods[i], t1 - t0)
+
+    ax = fig.add_subplot(242 + i)
+    pl.scatter(trans_data[0], trans_data[1], c=colors, cmap=pl.cm.rainbow)
+    pl.title("%s (%.2g sec)" % (labels[i], t1 - t0))
+    ax.xaxis.set_major_formatter(NullFormatter())
+    ax.yaxis.set_major_formatter(NullFormatter())
+    pl.axis('tight')
+
+# Perform Isomap Manifold learning.
+t0 = time()
+trans_data = manifold.Isomap(n_neighbors, n_components=2)\
+    .fit_transform(sphere_data).T
+t1 = time()
+print "%s: %.2g sec" % ('ISO', t1 - t0)
+
+ax = fig.add_subplot(246)
+pl.scatter(trans_data[0], trans_data[1],  c=colors, cmap=pl.cm.rainbow)
+pl.title("%s (%.2g sec)" % ('Isomap', t1 - t0))
+ax.xaxis.set_major_formatter(NullFormatter())
+ax.yaxis.set_major_formatter(NullFormatter())
+pl.axis('tight')
+
+# Perform Multi-dimensional scaling.
+t0 = time()
+mds = manifold.MDS(2, max_iter=100, n_init=1)
+trans_data = mds.fit_transform(euclidean_distances(sphere_data)).T
+t1 = time()
+print "MDS: %.2g sec" % (t1 - t0)
+
+ax = fig.add_subplot(247)
+pl.scatter(trans_data[0], trans_data[1],  c=colors, cmap=pl.cm.rainbow)
+pl.title("MDS (%.2g sec)" % (t1 - t0))
+ax.xaxis.set_major_formatter(NullFormatter())
+ax.yaxis.set_major_formatter(NullFormatter())
+pl.axis('tight')
+
+pl.show()
@@ -207,7 +207,7 @@ def _get_mac_os_version():
 
 
 def test_k_means_plus_plus_init_2_jobs():
-    if _get_mac_os_version() == '10.7':
+    if _get_mac_os_version() >= '10.7':
         raise SkipTest('Multi-process bug in Mac OS X Lion (see issue #636)')
     k_means = KMeans(init="k-means++", n_clusters=n_clusters, n_jobs=2,
                      random_state=42).fit(X)
 
@@ -1,7 +1,6 @@
 # Author: Lars Buitinck <[email protected]>
 # License: 3-clause BSD.
 
-import itertools
 import numbers
 
 import numpy as np
@@ -76,7 +75,7 @@ def _validate_params(n_features, input_type):
         if not isinstance(n_features, (numbers.Integral, np.integer)):
             raise TypeError("n_features must be integral, got %r (%s)."
                             % (n_features, type(n_features)))
-        elif n_features < 1 or n_features >= 2**31:
+        elif n_features < 1 or n_features >= 2 ** 31:
             raise ValueError("Invalid number of features (%d)." % n_features)
 
         if input_type not in ("dict", "pair", "string"):
 
@@ -264,7 +264,10 @@ def spectral_embedding(adjacency, n_components=8, eigen_solver=None,
     if eigen_solver == 'amg':
         # Use AMG to get a preconditioner and speed up the eigenvalue
         # problem.
+        if not sparse.issparse(laplacian):
+            warnings.warn("AMG works better for sparse matrices")
         laplacian = laplacian.astype(np.float)  # lobpcg needs native floats
+        laplacian = _set_diag(laplacian, 1)
         ml = smoothed_aggregation_solver(atleast2d_or_csr(laplacian))
         M = ml.aspreconditioner()
         X = random_state.rand(laplacian.shape[0], n_components + 1)
@@ -446,7 +449,7 @@ def fit(self, X, y=None):
         self.random_state = check_random_state(self.random_state)
         if isinstance(self.affinity, basestring):
             if self.affinity not in set(("nearest_neighbors", "rbf",
-                                          "precomputed")):
+                                         "precomputed")):
                 raise ValueError(("%s is not a valid affinity. Expected "
                                   "'precomputed', 'rbf', 'nearest_neighbors' "
                                   "or a callable.") % self.affinity)
 
@@ -1,21 +1,19 @@
 from nose.tools import assert_true
 from nose.tools import assert_equal
 
-from scipy import sparse
 from scipy.sparse import csr_matrix
 from scipy.sparse import csc_matrix
 import numpy as np
-from numpy.testing import assert_almost_equal, assert_array_almost_equal
+from numpy.testing import assert_array_almost_equal
 
 from nose.tools import assert_raises
 from nose.plugins.skip import SkipTest
 
 from sklearn.manifold.spectral_embedding import SpectralEmbedding
 from sklearn.manifold.spectral_embedding import _graph_is_connected
 from sklearn.metrics.pairwise import rbf_kernel
-from sklearn.pipeline import Pipeline
 from sklearn.metrics import normalized_mutual_info_score
-from sklearn.cluster import KMeans, SpectralClustering
+from sklearn.cluster import KMeans
 from sklearn.datasets.samples_generator import make_blobs
 
 
@@ -84,7 +82,7 @@ def test_spectral_embedding_precomputed_affinity(seed=36):
     embed_rbf = se_rbf.fit_transform(S)
     assert_array_almost_equal(
         se_precomp.affinity_matrix_, se_rbf.affinity_matrix_)
-    assert_true(_check_with_col_sign_flipping(embed_precomp, embed_rbf, 0.01))
+    assert_true(_check_with_col_sign_flipping(embed_precomp, embed_rbf, 0.02))
 
 
 def test_spectral_embedding_callable_affinity(seed=36):
@@ -105,8 +103,9 @@ def test_spectral_embedding_callable_affinity(seed=36):
     embed_callable = se_callable.fit_transform(S)
     assert_array_almost_equal(
         se_callable.affinity_matrix_, se_rbf.affinity_matrix_)
+    assert_array_almost_equal(kern, se_rbf.affinity_matrix_)
     assert_true(
-        _check_with_col_sign_flipping(embed_rbf, embed_callable, 0.01))
+        _check_with_col_sign_flipping(embed_rbf, embed_callable, 0.02))
 
 
 def test_spectral_embedding_amg_solver(seed=36):
@@ -116,17 +115,14 @@ def test_spectral_embedding_amg_solver(seed=36):
     except ImportError:
         raise SkipTest
 
-    gamma = 0.9
-    se_amg = SpectralEmbedding(n_components=3, affinity="rbf",
-                               gamma=gamma, eigen_solver="amg",
+    se_amg = SpectralEmbedding(n_components=3, affinity="nearest_neighbors",
+                               eigen_solver="amg", n_neighbors=5,
                                random_state=np.random.RandomState(seed))
-    se_arpack = SpectralEmbedding(n_components=3, affinity="rbf",
-                                  gamma=gamma, eigen_solver="arpack",
+    se_arpack = SpectralEmbedding(n_components=3, affinity="nearest_neighbors",
+                                  eigen_solver="arpack", n_neighbors=5,
                                   random_state=np.random.RandomState(seed))
     embed_amg = se_amg.fit_transform(S)
     embed_arpack = se_arpack.fit_transform(S)
-    assert_array_almost_equal(
-        se_amg.affinity_matrix_, se_arpack.affinity_matrix_)
     assert_true(_check_with_col_sign_flipping(embed_amg, embed_arpack, 0.01))
 
 
@@ -151,33 +147,17 @@ def test_pipline_spectral_clustering(seed=36):
 
 def test_spectral_embedding_unknown_eigensolver(seed=36):
     """Test that SpectralClustering fails with an unknown eigensolver"""
-    centers = np.array([
-        [0., 0., 0.],
-        [10., 10., 10.],
-        [20., 20., 20.],
-    ])
-    X, true_labels = make_blobs(n_samples=100, centers=centers,
-                                cluster_std=1., random_state=42)
-
-    se_precomp = SpectralEmbedding(n_components=1, affinity="precomputed",
-                                   random_state=np.random.RandomState(seed),
-                                   eigen_solver="<unknown>")
-    assert_raises(ValueError, se_precomp.fit, S)
+    se = SpectralEmbedding(n_components=1, affinity="precomputed",
+                           random_state=np.random.RandomState(seed),
+                           eigen_solver="<unknown>")
+    assert_raises(ValueError, se.fit, S)
 
 
 def test_spectral_embedding_unknown_affinity(seed=36):
     """Test that SpectralClustering fails with an unknown affinity type"""
-    centers = np.array([
-        [0., 0., 0.],
-        [10., 10., 10.],
-        [20., 20., 20.],
-    ])
-    X, true_labels = make_blobs(n_samples=100, centers=centers,
-                                cluster_std=1., random_state=42)
-
-    se_precomp = SpectralEmbedding(n_components=1, affinity="<unknown>",
-                                   random_state=np.random.RandomState(seed))
-    assert_raises(ValueError, se_precomp.fit, S)
+    se = SpectralEmbedding(n_components=1, affinity="<unknown>",
+                           random_state=np.random.RandomState(seed))
+    assert_raises(ValueError, se.fit, S)
 
 
 def test_connectivity(seed=36):