FIX n_iter_without_progress and min_grad_norm in TSNE

Sebastian Saeger · jnothman · commit 22213876d750 · 2016-10-12T23:46:13.000+11:00
Adds tests for n_iter_without_progress and min_grad_norm
diff --git a/sklearn/manifold/t_sne.py b/sklearn/manifold/t_sne.py
@@ -546,15 +546,19 @@ class TSNE(BaseEstimator):
         least 200.
 
     n_iter_without_progress : int, optional (default: 30)
+        Only used if method='exact'
         Maximum number of iterations without progress before we abort the
-        optimization.
+        optimization. If method='barnes_hut' this parameter is fixed to
+        a value of 30 and cannot be changed.
 
         .. versionadded:: 0.17
            parameter *n_iter_without_progress* to control stopping criteria.
 
-    min_grad_norm : float, optional (default: 1E-7)
+    min_grad_norm : float, optional (default: 1e-7)
+        Only used if method='exact'
         If the gradient norm is below this threshold, the optimization will
-        be aborted.
+        be aborted. If method='barnes_hut' this parameter is fixed to a value
+        of 1e-3 and cannot be changed.
 
     metric : string or callable, optional
         The metric to use when calculating distance between instances in a
@@ -802,9 +806,9 @@ def _tsne(self, P, degrees_of_freedom, n_samples, random_state,
                                                    self.n_components)
         params = X_embedded.ravel()
 
-        opt_args = {}
         opt_args = {"n_iter": 50, "momentum": 0.5, "it": 0,
                     "learning_rate": self.learning_rate,
+                    "n_iter_without_progress": self.n_iter_without_progress,
                     "verbose": self.verbose, "n_iter_check": 25,
                     "kwargs": dict(skip_num_points=skip_num_points)}
         if self.method == 'barnes_hut':
@@ -829,7 +833,7 @@ def _tsne(self, P, degrees_of_freedom, n_samples, random_state,
             opt_args['args'] = [P, degrees_of_freedom, n_samples,
                                 self.n_components]
             opt_args['min_error_diff'] = 0.0
-            opt_args['min_grad_norm'] = 0.0
+            opt_args['min_grad_norm'] = self.min_grad_norm
 
         # Early exaggeration
         P *= self.early_exaggeration
diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
@@ -11,6 +11,7 @@
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_raises_regexp
+from sklearn.utils.testing import assert_in
 from sklearn.utils import check_random_state
 from sklearn.manifold.t_sne import _joint_probabilities
 from sklearn.manifold.t_sne import _joint_probabilities_nn
@@ -560,3 +561,67 @@ def test_index_offset():
     # Make sure translating between 1D and N-D indices are preserved
     assert_equal(_barnes_hut_tsne.test_index2offset(), 1)
     assert_equal(_barnes_hut_tsne.test_index_offset(), 1)
+
+
+def test_n_iter_without_progress():
+    # Make sure that the parameter n_iter_without_progress is used correctly
+    random_state = check_random_state(0)
+    X = random_state.randn(100, 2)
+    tsne = TSNE(n_iter_without_progress=2, verbose=2,
+                random_state=0, method='exact')
+
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        tsne.fit_transform(X)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+
+    # The output needs to contain the value of n_iter_without_progress
+    assert_in("did not make any progress during the "
+              "last 2 episodes. Finished.", out)
+
+
+def test_min_grad_norm():
+    # Make sure that the parameter min_grad_norm is used correctly
+    random_state = check_random_state(0)
+    X = random_state.randn(100, 2)
+    min_grad_norm = 0.002
+    tsne = TSNE(min_grad_norm=min_grad_norm, verbose=2,
+                random_state=0, method='exact')
+
+    old_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        tsne.fit_transform(X)
+    finally:
+        out = sys.stdout.getvalue()
+        sys.stdout.close()
+        sys.stdout = old_stdout
+
+    lines_out = out.split('\n')
+
+    # extract the gradient norm from the verbose output
+    gradient_norm_values = []
+    for line in lines_out:
+        # When the computation is Finished just an old gradient norm value
+        # is repeated that we do not need to store
+        if 'Finished' in line:
+            break
+
+        start_grad_norm = line.find('gradient norm')
+        if start_grad_norm >= 0:
+            line = line[start_grad_norm:]
+            line = line.replace('gradient norm = ', '')
+            gradient_norm_values.append(float(line))
+
+    # Compute how often the gradient norm is smaller than min_grad_norm
+    gradient_norm_values = np.array(gradient_norm_values)
+    n_smaller_gradient_norms = \
+        len(gradient_norm_values[gradient_norm_values <= min_grad_norm])
+
+    # The gradient norm can be smaller than min_grad_norm at most once,
+    # because in the moment it becomes smaller the optimization stops
+    assert_less_equal(n_smaller_gradient_norms, 1)