fannix
diff --git a/‎sklearn/cluster/spectral.py‎
Lines changed: 44 additions & 13 deletions b/‎sklearn/cluster/spectral.py‎
Lines changed: 44 additions & 13 deletions
diff --git a/‎sklearn/cluster/tests/test_spectral.py‎
Lines changed: 31 additions & 2 deletions b/‎sklearn/cluster/tests/test_spectral.py‎
Lines changed: 31 additions & 2 deletions
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
 """Algorithms for spectral clustering"""
 
 # Author: Gael Varoquaux [email protected]
@@ -11,7 +12,7 @@
 from ..base import BaseEstimator, ClusterMixin
 from ..utils import check_random_state, as_float_array, deprecated
 from ..utils.extmath import norm
-from ..metrics.pairwise import rbf_kernel
+from ..metrics.pairwise import pairwise_kernels
 from ..neighbors import kneighbors_graph
 from ..manifold import spectral_embedding
 from .k_means_ import k_means
@@ -287,8 +288,9 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
     If affinity is the adjacency matrix of a graph, this method can be
     used to find normalized graph cuts.
 
-    When calling ``fit``, an affinity matrix is constructed using either the
-    Gaussian (aka RBF) kernel of the euclidean distanced ``d(X, X)``::
+    When calling ``fit``, an affinity matrix is constructed using either 
+    kernel function such the Gaussian (aka RBF) kernel of the euclidean
+    distanced ``d(X, X)``::
 
             np.exp(-gamma * d(X,X) ** 2)
 
@@ -302,12 +304,27 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
     n_clusters : integer, optional
         The dimension of the projection subspace.
 
-    affinity: string, 'nearest_neighbors', 'rbf' or 'precomputed'
+    affinity : string, array-like or callable, default 'rbf'
+        If a string, this may be one of 'nearest_neighbors', 'precomputed',
+        'rbf' or one of the kernels supported by
+        `sklearn.metrics.pairwise_kernels`.
+
+        Only kernels that produce similarity scores (non-negative values that
+        increase with similarity) should be used. This property is not checked
+        by the clustering algorithm.
 
     gamma: float
-        Scaling factor of Gaussian (rbf) affinity kernel. Ignored for
+        Scaling factor of RBF, polynomial, exponential chi² and
+        sigmoid affinity kernel. Ignored for
         ``affinity='nearest_neighbors'``.
 
+    degree : float, default=3
+        Degree of the polynomial kernel. Ignored by other kernels.
+
+    coef0 : float, default=1
+        Zero coefficient for polynomial and sigmoid kernels.
+        Ignored by other kernels.
+
     n_neighbors: integer
         Number of neighbors to use when constructing the affinity matrix using
         the nearest neighbors method. Ignored for ``affinity='rbf'``.
@@ -338,6 +355,10 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
         also be sensitive to initialization. Discretization is another approach
         which is less sensitive to random initialization.
 
+    kernel_params : dictionary of string to any, optional
+        Parameters (keyword arguments) and values for kernel passed as
+        callable object. Ignored by other kernels.
+
     Attributes
     ----------
     `affinity_matrix_` : array-like, shape (n_samples, n_samples)
@@ -381,7 +402,8 @@ class SpectralClustering(BaseEstimator, ClusterMixin):
 
     def __init__(self, n_clusters=8, eigen_solver=None, random_state=None,
                  n_init=10, gamma=1., affinity='rbf', n_neighbors=10, k=None,
-                 eigen_tol=0.0, assign_labels='kmeans', mode=None):
+                 eigen_tol=0.0, assign_labels='kmeans', mode=None,
+                 degree=3, coef0=1, kernel_params=None):
         if k is not None:
             warnings.warn("'k' was renamed to n_clusters and "
                           "will be removed in 0.15.",
@@ -402,6 +424,9 @@ def __init__(self, n_clusters=8, eigen_solver=None, random_state=None,
         self.n_neighbors = n_neighbors
         self.eigen_tol = eigen_tol
         self.assign_labels = assign_labels
+        self.degree = degree
+        self.coef0 = coef0
+        self.kernel_params = kernel_params
 
     def fit(self, X):
         """Creates an affinity matrix for X using the selected affinity,
@@ -419,18 +444,22 @@ def fit(self, X):
                           " a custom affinity matrix, "
                           "set ``affinity=precomputed``.")
 
-        if self.affinity == 'rbf':
-            self.affinity_matrix_ = rbf_kernel(X, gamma=self.gamma)
-
-        elif self.affinity == 'nearest_neighbors':
+        if self.affinity == 'nearest_neighbors':
             connectivity = kneighbors_graph(X, n_neighbors=self.n_neighbors)
             self.affinity_matrix_ = 0.5 * (connectivity + connectivity.T)
         elif self.affinity == 'precomputed':
             self.affinity_matrix_ = X
         else:
-            raise ValueError("Invalid 'affinity'. Expected 'rbf', "
-                             "'nearest_neighbors' or 'precomputed', got '%s'."
-                             % self.affinity)
+            params = self.kernel_params
+            if params is None:
+                params = {}
+            if not callable(self.affinity):
+                params['gamma'] = self.gamma
+                params['degree'] = self.degree
+                params['coef0'] = self.coef0
+            self.affinity_matrix_ = pairwise_kernels(X, metric=self.affinity,
+                                                     filter_params=True,
+                                                     **params)
 
         random_state = check_random_state(self.random_state)
         self.labels_ = spectral_clustering(self.affinity_matrix_,
@@ -457,3 +486,5 @@ def mode(self):
                 " 0.15.")
     def k(self):
         return self.n_clusters
+
+
@@ -1,11 +1,14 @@
 """Testing for Spectral Clustering methods"""
 
 from sklearn.externals.six.moves import cPickle
+from sklearn.metrics.pairwise import kernel_metrics
+
 dumps, loads = cPickle.dumps, cPickle.loads
 
 import numpy as np
 from scipy import sparse
 
+from sklearn.utils import check_random_state
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_raises
@@ -156,8 +159,8 @@ def test_affinities():
     # Note: in the following, random_state has been selected to have
     # a dataset that yields a stable eigen decomposition both when built
     # on OSX and Linux
-    X, y = make_blobs(n_samples=40, random_state=2, centers=[[1, 1], [-1, -1]],
-                      cluster_std=0.4)
+    X, y = make_blobs(n_samples=40, random_state=2,
+                      centers=[[1, 1], [-1, -1]], cluster_std=0.4)
     # nearest neighbors affinity
     sp = SpectralClustering(n_clusters=2, affinity='nearest_neighbors',
                             random_state=0)
@@ -168,6 +171,32 @@ def test_affinities():
     labels = sp.fit(X).labels_
     assert_equal(adjusted_rand_score(y, labels), 1)
 
+    X = check_random_state(10).rand(10, 5) * 10
+
+    kernels_available = kernel_metrics()
+    for kern in kernels_available:
+        # Additive chi^2 gives a negative similarity matrix which
+        # doesn't make sense for spectral clustering
+        if kern != 'additive_chi2':
+            sp = SpectralClustering(n_clusters=2, affinity=kern, random_state=0)
+            labels = sp.fit(X).labels_
+            print(labels)
+            assert_equal((X.shape[0],), labels.shape)
+
+    sp = SpectralClustering(n_clusters=2, affinity=lambda x, y: 1,
+                            random_state=0)
+    labels = sp.fit(X).labels_
+    assert_equal((X.shape[0],), labels.shape)
+
+    def histogram(x, y, **kwargs):
+        """Histogram kernel implemented as a callable."""
+        assert_equal(kwargs, {})    # no kernel_params that we didn't ask for
+        return np.minimum(x, y).sum()
+
+    sp = SpectralClustering(n_clusters=2, affinity=histogram, random_state=0)
+    labels = sp.fit(X).labels_
+    assert_equal((X.shape[0],), labels.shape)
+
     # raise error on unknown affinity
     sp = SpectralClustering(n_clusters=2, affinity='<unknown>')
     assert_raises(ValueError, sp.fit, X)