Skip to content

Commit 43eb87e

Browse files
TomDLTglemaitre
authored andcommitted
[MRG+1] EHN: Avoid power computation if exponent is 1 in TSNE (scikit-learn#10610)
1 parent b27e285 commit 43eb87e

File tree

3 files changed

+23
-14
lines changed

3 files changed

+23
-14
lines changed

doc/whats_new/v0.20.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ random sampling procedures.
2929
- :class:`neural_network.BaseMultilayerPerceptron` (bug fix)
3030
- :class:`neural_network.MLPRegressor` (bug fix)
3131
- :class:`neural_network.MLPClassifier` (bug fix)
32-
32+
3333
Details are listed in the changelog below.
3434

3535
(While we are trying to better inform users by providing this information, we
@@ -88,7 +88,8 @@ Decomposition, manifold learning and clustering
8888
:user:`Leland McInnes <lmcinnes>` and :user:`Steve Astels <sastels>`.
8989

9090
- Speed improvements for both 'exact' and 'barnes_hut' methods in
91-
:class:`manifold.TSNE`. :issue:`10593` by `Tom Dupre la Tour`_.
91+
:class:`manifold.TSNE`. :issue:`10593` and :issue:`10610` by
92+
`Tom Dupre la Tour`_.
9293

9394

9495
Enhancements

sklearn/manifold/_barnes_hut_tsne.pyx

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ cdef float compute_gradient(float[:] val_P,
5050
float[:, :] tot_force,
5151
quad_tree._QuadTree qt,
5252
float theta,
53-
float dof,
53+
int dof,
5454
long start,
5555
long stop,
5656
bint compute_error) nogil:
@@ -106,7 +106,7 @@ cdef float compute_gradient_positive(float[:] val_P,
106106
np.int64_t[:] indptr,
107107
float* pos_f,
108108
int n_dimensions,
109-
float dof,
109+
int dof,
110110
double sum_Q,
111111
np.int64_t start,
112112
int verbose,
@@ -122,9 +122,11 @@ cdef float compute_gradient_positive(float[:] val_P,
122122
long n_samples = indptr.shape[0] - 1
123123
float dij, qij, pij
124124
float C = 0.0
125-
float exponent = (dof + 1.0) / -2.0
125+
float exponent = (dof + 1.0) / 2.0
126+
float float_dof = (float) (dof)
126127
float[3] buff
127128
clock_t t1 = 0, t2 = 0
129+
float dt
128130

129131
if verbose > 10:
130132
t1 = clock()
@@ -140,7 +142,9 @@ cdef float compute_gradient_positive(float[:] val_P,
140142
for ax in range(n_dimensions):
141143
buff[ax] = pos_reference[i, ax] - pos_reference[j, ax]
142144
dij += buff[ax] * buff[ax]
143-
qij = ((1.0 + dij / dof) ** exponent)
145+
qij = float_dof / (float_dof + dij)
146+
if dof != 1: # i.e. exponent != 1
147+
qij **= exponent
144148
dij = pij * qij
145149

146150
# only compute the error when needed
@@ -160,7 +164,7 @@ cdef void compute_gradient_negative(float[:, :] pos_reference,
160164
float* neg_f,
161165
quad_tree._QuadTree qt,
162166
double* sum_Q,
163-
float dof,
167+
int dof,
164168
float theta,
165169
long start,
166170
long stop) nogil:
@@ -174,8 +178,9 @@ cdef void compute_gradient_negative(float[:, :] pos_reference,
174178
long dta = 0
175179
long dtb = 0
176180
long offset = n_dimensions + 2
177-
long* l
178181
float size, dist2s, mult
182+
float exponent = (dof + 1.0) / 2.0
183+
float float_dof = (float) (dof)
179184
double qijZ
180185
float[1] iQ
181186
float[3] force, neg_force, pos
@@ -202,12 +207,13 @@ cdef void compute_gradient_negative(float[:, :] pos_reference,
202207
# for the digits dataset, walking the tree
203208
# is about 10-15x more expensive than the
204209
# following for loop
205-
exponent = (dof + 1.0) / -2.0
206210
for j in range(idx // offset):
207211

208212
dist2s = summary[j * offset + n_dimensions]
209213
size = summary[j * offset + n_dimensions + 1]
210-
qijZ = (1.0 + dist2s / dof) ** exponent # 1/(1+dist)
214+
qijZ = float_dof / (float_dof + dist2s) # 1/(1+dist)
215+
if dof != 1: # i.e. exponent != 1
216+
qijZ **= exponent
211217
sum_Q[0] += size * qijZ # size of the node * q
212218
mult = size * qijZ * qijZ
213219
for ax in range(n_dimensions):
@@ -236,13 +242,14 @@ def gradient(float[:] val_P,
236242
float theta,
237243
int n_dimensions,
238244
int verbose,
239-
float dof = 1.0,
245+
int dof=1,
240246
long skip_num_points=0,
241247
bint compute_error=1):
242248
# This function is designed to be called from external Python
243249
# it passes the 'forces' array by reference and fills thats array
244250
# up in-place
245251
cdef float C
252+
cdef int n
246253
n = pos_output.shape[0]
247254
assert val_P.itemsize == 4
248255
assert pos_output.itemsize == 4

sklearn/manifold/t_sne.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
# modifications of the algorithm:
88
# * Fast Optimization for t-SNE:
99
# http://cseweb.ucsd.edu/~lvdmaaten/workshops/nips2010/papers/vandermaaten.pdf
10+
from __future__ import division
1011

1112
from time import time
1213
import numpy as np
@@ -131,7 +132,7 @@ def _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components,
131132
P : array, shape (n_samples * (n_samples-1) / 2,)
132133
Condensed joint probability matrix.
133134
134-
degrees_of_freedom : float
135+
degrees_of_freedom : int
135136
Degrees of freedom of the Student's-t distribution.
136137
137138
n_samples : int
@@ -207,7 +208,7 @@ def _kl_divergence_bh(params, P, degrees_of_freedom, n_samples, n_components,
207208
Sparse approximate joint probability matrix, computed only for the
208209
k nearest-neighbors and symmetrized.
209210
210-
degrees_of_freedom : float
211+
degrees_of_freedom : int
211212
Degrees of freedom of the Student's-t distribution.
212213
213214
n_samples : int
@@ -782,7 +783,7 @@ def _fit(self, X, skip_num_points=0):
782783
# degrees_of_freedom = n_components - 1 comes from
783784
# "Learning a Parametric Embedding by Preserving Local Structure"
784785
# Laurens van der Maaten, 2009.
785-
degrees_of_freedom = max(self.n_components - 1.0, 1)
786+
degrees_of_freedom = max(self.n_components - 1, 1)
786787

787788
return self._tsne(P, degrees_of_freedom, n_samples,
788789
X_embedded=X_embedded,

0 commit comments

Comments
 (0)