Skip to content

Commit 1ac9d68

Browse files
authored
DOC Update TSNE docstring (scikit-learn#18120)
1 parent e400fa8 commit 1ac9d68

File tree

1 file changed

+41
-38
lines changed

1 file changed

+41
-38
lines changed

sklearn/manifold/_t_sne.py

Lines changed: 41 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def _joint_probabilities(distances, desired_perplexity, verbose):
3737
3838
Parameters
3939
----------
40-
distances : array, shape (n_samples * (n_samples-1) / 2,)
40+
distances : ndarray of shape (n_samples * (n_samples-1) / 2,)
4141
Distances of samples are stored as condensed matrices, i.e.
4242
we omit the diagonal and duplicate entries and store everything
4343
in a one-dimensional array.
@@ -50,7 +50,7 @@ def _joint_probabilities(distances, desired_perplexity, verbose):
5050
5151
Returns
5252
-------
53-
P : array, shape (n_samples * (n_samples-1) / 2,)
53+
P : ndarray of shape (n_samples * (n_samples-1) / 2,)
5454
Condensed joint probability matrix.
5555
"""
5656
# Compute conditional probabilities such that they approximately match
@@ -74,9 +74,10 @@ def _joint_probabilities_nn(distances, desired_perplexity, verbose):
7474
7575
Parameters
7676
----------
77-
distances : CSR sparse matrix, shape (n_samples, n_samples)
77+
distances : sparse matrix of shape (n_samples, n_samples)
7878
Distances of samples to its n_neighbors nearest neighbors. All other
7979
distances are left to zero (and are not materialized in memory).
80+
Matrix should be of CSR format.
8081
8182
desired_perplexity : float
8283
Desired perplexity of the joint probability distributions.
@@ -86,8 +87,9 @@ def _joint_probabilities_nn(distances, desired_perplexity, verbose):
8687
8788
Returns
8889
-------
89-
P : csr sparse matrix, shape (n_samples, n_samples)
90-
Condensed joint probability matrix with only nearest neighbors.
90+
P : sparse matrix of shape (n_samples, n_samples)
91+
Condensed joint probability matrix with only nearest neighbors. Matrix
92+
will be of CSR format.
9193
"""
9294
t0 = time()
9395
# Compute conditional probabilities such that they approximately match
@@ -126,10 +128,10 @@ def _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components,
126128
127129
Parameters
128130
----------
129-
params : array, shape (n_params,)
131+
params : ndarray of shape (n_params,)
130132
Unraveled embedding.
131133
132-
P : array, shape (n_samples * (n_samples-1) / 2,)
134+
P : ndarray of shape (n_samples * (n_samples-1) / 2,)
133135
Condensed joint probability matrix.
134136
135137
degrees_of_freedom : int
@@ -141,20 +143,20 @@ def _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components,
141143
n_components : int
142144
Dimension of the embedded space.
143145
144-
skip_num_points : int (optional, default:0)
146+
skip_num_points : int, default=0
145147
This does not compute the gradient for points with indices below
146148
`skip_num_points`. This is useful when computing transforms of new
147149
data where you'd like to keep the old data fixed.
148150
149-
compute_error: bool (optional, default:True)
151+
compute_error: bool, default=True
150152
If False, the kl_divergence is not computed and returns NaN.
151153
152154
Returns
153155
-------
154156
kl_divergence : float
155157
Kullback-Leibler divergence of p_ij and q_ij.
156158
157-
grad : array, shape (n_params,)
159+
grad : ndarray of shape (n_params,)
158160
Unraveled gradient of the Kullback-Leibler divergence with respect to
159161
the embedding.
160162
"""
@@ -197,16 +199,16 @@ def _kl_divergence_bh(params, P, degrees_of_freedom, n_samples, n_components,
197199
"""t-SNE objective function: KL divergence of p_ijs and q_ijs.
198200
199201
Uses Barnes-Hut tree methods to calculate the gradient that
200-
runs in O(NlogN) instead of O(N^2)
202+
runs in O(NlogN) instead of O(N^2).
201203
202204
Parameters
203205
----------
204-
params : array, shape (n_params,)
206+
params : ndarray of shape (n_params,)
205207
Unraveled embedding.
206208
207-
P : csr sparse matrix, shape (n_samples, n_sample)
209+
P : sparse matrix of shape (n_samples, n_sample)
208210
Sparse approximate joint probability matrix, computed only for the
209-
k nearest-neighbors and symmetrized.
211+
k nearest-neighbors and symmetrized. Matrix should be of CSR format.
210212
211213
degrees_of_freedom : int
212214
Degrees of freedom of the Student's-t distribution.
@@ -217,7 +219,7 @@ def _kl_divergence_bh(params, P, degrees_of_freedom, n_samples, n_components,
217219
n_components : int
218220
Dimension of the embedded space.
219221
220-
angle : float (default: 0.5)
222+
angle : float, default=0.5
221223
This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.
222224
'angle' is the angular size (referred to as theta in [3]) of a distant
223225
node as measured from a point. If this size is below 'angle' then it is
@@ -226,18 +228,18 @@ def _kl_divergence_bh(params, P, degrees_of_freedom, n_samples, n_components,
226228
in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing
227229
computation time and angle greater 0.8 has quickly increasing error.
228230
229-
skip_num_points : int (optional, default:0)
231+
skip_num_points : int, default=0
230232
This does not compute the gradient for points with indices below
231233
`skip_num_points`. This is useful when computing transforms of new
232234
data where you'd like to keep the old data fixed.
233235
234-
verbose : int
236+
verbose : int, default=False
235237
Verbosity level.
236238
237-
compute_error: bool (optional, default:True)
239+
compute_error: bool, default=True
238240
If False, the kl_divergence is not computed and returns NaN.
239241
240-
num_threads : int (optional, default:1)
242+
num_threads : int, default=1
241243
Number of threads used to compute the gradient. This is set here to
242244
avoid calling _openmp_effective_n_threads for each gradient step.
243245
@@ -246,7 +248,7 @@ def _kl_divergence_bh(params, P, degrees_of_freedom, n_samples, n_components,
246248
kl_divergence : float
247249
Kullback-Leibler divergence of p_ij and q_ij.
248250
249-
grad : array, shape (n_params,)
251+
grad : ndarray of shape (n_params,)
250252
Unraveled gradient of the Kullback-Leibler divergence with respect to
251253
the embedding.
252254
"""
@@ -278,13 +280,13 @@ def _gradient_descent(objective, p0, it, n_iter,
278280
279281
Parameters
280282
----------
281-
objective : function or callable
283+
objective : callable
282284
Should return a tuple of cost and gradient for a given parameter
283285
vector. When expensive to compute, the cost can optionally
284286
be None and can be computed every n_iter_check steps using
285287
the objective_error function.
286288
287-
p0 : array-like, shape (n_params,)
289+
p0 : array-like of shape (n_params,)
288290
Initial parameter vector.
289291
290292
it : int
@@ -302,7 +304,7 @@ def _gradient_descent(objective, p0, it, n_iter,
302304
Maximum number of iterations without progress before we abort the
303305
optimization.
304306
305-
momentum : float, within (0.0, 1.0), default=0.8
307+
momentum : float within (0.0, 1.0), default=0.8
306308
The momentum generates a weight for previous gradients that decays
307309
exponentially.
308310
@@ -331,7 +333,7 @@ def _gradient_descent(objective, p0, it, n_iter,
331333
332334
Returns
333335
-------
334-
p : array, shape (n_params,)
336+
p : ndarray of shape (n_params,)
335337
Optimum parameters.
336338
337339
error : float
@@ -424,17 +426,17 @@ def trustworthiness(X, X_embedded, *, n_neighbors=5, metric='euclidean'):
424426
425427
Parameters
426428
----------
427-
X : array, shape (n_samples, n_features) or (n_samples, n_samples)
429+
X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)
428430
If the metric is 'precomputed' X must be a square distance
429431
matrix. Otherwise it contains a sample per row.
430432
431-
X_embedded : array, shape (n_samples, n_components)
433+
X_embedded : ndarray of shape (n_samples, n_components)
432434
Embedding of the training data in low-dimensional space.
433435
434436
n_neighbors : int, default=5
435437
Number of neighbors k that will be considered.
436438
437-
metric : string, or callable, default='euclidean'
439+
metric : str or callable, default='euclidean'
438440
Which metric to use for computing pairwise distances between samples
439441
from the original input space. If metric is 'precomputed', X must be a
440442
matrix of pairwise distances or squared distances. Otherwise, see the
@@ -499,7 +501,7 @@ class TSNE(BaseEstimator):
499501
n_components : int, default=2
500502
Dimension of the embedded space.
501503
502-
perplexity : float, default=30
504+
perplexity : float, default=30.0
503505
The perplexity is related to the number of nearest neighbors that
504506
is used in other manifold learning algorithms. Larger datasets
505507
usually require a larger perplexity. Consider selecting a value
@@ -540,7 +542,7 @@ class TSNE(BaseEstimator):
540542
If the gradient norm is below this threshold, the optimization will
541543
be stopped.
542544
543-
metric : string or callable, default='euclidean'
545+
metric : str or callable, default='euclidean'
544546
The metric to use when calculating distance between instances in a
545547
feature array. If metric is a string, it must be one of the options
546548
allowed by scipy.spatial.distance.pdist for its metric parameter, or
@@ -552,7 +554,8 @@ class TSNE(BaseEstimator):
552554
the distance between them. The default is "euclidean" which is
553555
interpreted as squared euclidean distance.
554556
555-
init : string or numpy array, default="random"
557+
init : {'random', 'pca'} or ndarray of shape (n_samples, n_components), \
558+
default='random'
556559
Initialization of embedding. Possible options are 'random', 'pca',
557560
and a numpy array of shape (n_samples, n_components).
558561
PCA initialization cannot be used with precomputed distances and is
@@ -561,13 +564,13 @@ class TSNE(BaseEstimator):
561564
verbose : int, default=0
562565
Verbosity level.
563566
564-
random_state : int, RandomState instance, default=None
567+
random_state : int or RandomState instance, default=None
565568
Determines the random number generator. Pass an int for reproducible
566569
results across multiple function calls. Note that different
567570
initializations might result in different local minima of the cost
568571
function. See :term: `Glossary <random_state>`.
569572
570-
method : string, default='barnes_hut'
573+
method : str, default='barnes_hut'
571574
By default the gradient calculation algorithm uses Barnes-Hut
572575
approximation running in O(NlogN) time. method='exact'
573576
will run on the slower, but exact, algorithm in O(N^2) time. The
@@ -588,7 +591,7 @@ class TSNE(BaseEstimator):
588591
in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing
589592
computation time and angle greater 0.8 has quickly increasing error.
590593
591-
n_jobs : int or None, default=None
594+
n_jobs : int, default=None
592595
The number of parallel jobs to run for neighbors search. This parameter
593596
has no impact when ``metric="precomputed"`` or
594597
(``metric="euclidean"`` and ``method="exact"``).
@@ -598,7 +601,7 @@ class TSNE(BaseEstimator):
598601
599602
.. versionadded:: 0.22
600603
601-
square_distances : {True, 'legacy'}, default='legacy'
604+
square_distances : True or 'legacy', default='legacy'
602605
Whether TSNE should square the distance values. ``'legacy'`` means
603606
that distance values are squared only when ``metric="euclidean"``.
604607
``True`` means that distance values are squared for all metrics.
@@ -613,7 +616,7 @@ class TSNE(BaseEstimator):
613616
614617
Attributes
615618
----------
616-
embedding_ : array-like, shape (n_samples, n_components)
619+
embedding_ : array-like of shape (n_samples, n_components)
617620
Stores the embedding vectors.
618621
619622
kl_divergence_ : float
@@ -910,7 +913,7 @@ def fit_transform(self, X, y=None):
910913
911914
Parameters
912915
----------
913-
X : array, shape (n_samples, n_features) or (n_samples, n_samples)
916+
X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)
914917
If the metric is 'precomputed' X must be a square distance
915918
matrix. Otherwise it contains a sample per row. If the method
916919
is 'exact', X may be a sparse matrix of type 'csr', 'csc'
@@ -921,7 +924,7 @@ def fit_transform(self, X, y=None):
921924
922925
Returns
923926
-------
924-
X_new : array, shape (n_samples, n_components)
927+
X_new : ndarray of shape (n_samples, n_components)
925928
Embedding of the training data in low-dimensional space.
926929
"""
927930
embedding = self._fit(X)
@@ -933,7 +936,7 @@ def fit(self, X, y=None):
933936
934937
Parameters
935938
----------
936-
X : array, shape (n_samples, n_features) or (n_samples, n_samples)
939+
X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)
937940
If the metric is 'precomputed' X must be a square distance
938941
matrix. Otherwise it contains a sample per row. If the method
939942
is 'exact', X may be a sparse matrix of type 'csr', 'csc'

0 commit comments

Comments
 (0)