julien-c
diff --git a/‎sklearn/metrics/cluster/tests/test_unsupervised.py‎
Lines changed: 47 additions & 0 deletions b/‎sklearn/metrics/cluster/tests/test_unsupervised.py‎
Lines changed: 47 additions & 0 deletions
@@ -81,6 +81,53 @@ def test_cluster_size_1():
     assert_array_equal(ss, [0, .5, .5, 0, 1, 1])
 
 
+def test_silhouette_paper_example():
+    # Explicitly check per-sample results against Rousseeuw (1987)
+    # Data from Table 1
+    lower = [5.58,
+             7.00, 6.50,
+             7.08, 7.00, 3.83,
+             4.83, 5.08, 8.17, 5.83,
+             2.17, 5.75, 6.67, 6.92, 4.92,
+             6.42, 5.00, 5.58, 6.00, 4.67, 6.42,
+             3.42, 5.50, 6.42, 6.42, 5.00, 3.92, 6.17,
+             2.50, 4.92, 6.25, 7.33, 4.50, 2.25, 6.33, 2.75,
+             6.08, 6.67, 4.25, 2.67, 6.00, 6.17, 6.17, 6.92, 6.17,
+             5.25, 6.83, 4.50, 3.75, 5.75, 5.42, 6.08, 5.83, 6.67, 3.67,
+             4.75, 3.00, 6.08, 6.67, 5.00, 5.58, 4.83, 6.17, 5.67, 6.50, 6.92]
+    D = np.zeros((12, 12))
+    D[np.tril_indices(12, -1)] = lower
+    D += D.T
+
+    names = ['BEL', 'BRA', 'CHI', 'CUB', 'EGY', 'FRA', 'IND', 'ISR', 'USA',
+             'USS', 'YUG', 'ZAI']
+
+    # Data from Figure 2
+    labels1 = [1, 1, 2, 2, 1, 1, 2, 1, 1, 2, 2, 1]
+    expected1 = {'USA': .43, 'BEL': .39, 'FRA': .35, 'ISR': .30, 'BRA': .22,
+                 'EGY': .20, 'ZAI': .19, 'CUB': .40, 'USS': .34, 'CHI': .33,
+                 'YUG': .26, 'IND': -.04}
+    score1 = .28
+
+    # Data from Figure 3
+    labels2 = [1, 2, 3, 3, 1, 1, 2, 1, 1, 3, 3, 2]
+    expected2 = {'USA': .47, 'FRA': .44, 'BEL': .42, 'ISR': .37, 'EGY': .02,
+                 'ZAI': .28, 'BRA': .25, 'IND': .17, 'CUB': .48, 'USS': .44,
+                 'YUG': .31, 'CHI': .31}
+    score2 = .33
+
+    for labels, expected, score in [(labels1, expected1, score1),
+                                    (labels2, expected2, score2)]:
+        expected = [expected[name] for name in names]
+        # we check to 2dp because that's what's in the paper
+        assert_almost_equal(expected, silhouette_samples(D, np.array(labels),
+                                                         metric='precomputed'),
+                            decimal=2)
+        assert_almost_equal(score, silhouette_score(D, np.array(labels),
+                                                    metric='precomputed'),
+                            decimal=2)
+
+
 def test_correct_labelsize():
     # Assert 1 < n_labels < n_samples
     dataset = datasets.load_iris()