@@ -37,7 +37,7 @@ def _joint_probabilities(distances, desired_perplexity, verbose):
3737
3838 Parameters
3939 ----------
40- distances : array, shape (n_samples * (n_samples-1) / 2,)
40+ distances : ndarray of shape (n_samples * (n_samples-1) / 2,)
4141 Distances of samples are stored as condensed matrices, i.e.
4242 we omit the diagonal and duplicate entries and store everything
4343 in a one-dimensional array.
@@ -50,7 +50,7 @@ def _joint_probabilities(distances, desired_perplexity, verbose):
5050
5151 Returns
5252 -------
53- P : array, shape (n_samples * (n_samples-1) / 2,)
53+ P : ndarray of shape (n_samples * (n_samples-1) / 2,)
5454 Condensed joint probability matrix.
5555 """
5656 # Compute conditional probabilities such that they approximately match
@@ -74,9 +74,10 @@ def _joint_probabilities_nn(distances, desired_perplexity, verbose):
7474
7575 Parameters
7676 ----------
77- distances : CSR sparse matrix, shape (n_samples, n_samples)
77+ distances : sparse matrix of shape (n_samples, n_samples)
7878 Distances of samples to its n_neighbors nearest neighbors. All other
7979 distances are left to zero (and are not materialized in memory).
80+ Matrix should be of CSR format.
8081
8182 desired_perplexity : float
8283 Desired perplexity of the joint probability distributions.
@@ -86,8 +87,9 @@ def _joint_probabilities_nn(distances, desired_perplexity, verbose):
8687
8788 Returns
8889 -------
89- P : csr sparse matrix, shape (n_samples, n_samples)
90- Condensed joint probability matrix with only nearest neighbors.
90+ P : sparse matrix of shape (n_samples, n_samples)
91+ Condensed joint probability matrix with only nearest neighbors. Matrix
92+ will be of CSR format.
9193 """
9294 t0 = time ()
9395 # Compute conditional probabilities such that they approximately match
@@ -126,10 +128,10 @@ def _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components,
126128
127129 Parameters
128130 ----------
129- params : array, shape (n_params,)
131+ params : ndarray of shape (n_params,)
130132 Unraveled embedding.
131133
132- P : array, shape (n_samples * (n_samples-1) / 2,)
134+ P : ndarray of shape (n_samples * (n_samples-1) / 2,)
133135 Condensed joint probability matrix.
134136
135137 degrees_of_freedom : int
@@ -141,20 +143,20 @@ def _kl_divergence(params, P, degrees_of_freedom, n_samples, n_components,
141143 n_components : int
142144 Dimension of the embedded space.
143145
144- skip_num_points : int (optional , default:0)
146+ skip_num_points : int, default=0
145147 This does not compute the gradient for points with indices below
146148 `skip_num_points`. This is useful when computing transforms of new
147149 data where you'd like to keep the old data fixed.
148150
149- compute_error: bool (optional , default: True)
151+ compute_error: bool, default= True
150152 If False, the kl_divergence is not computed and returns NaN.
151153
152154 Returns
153155 -------
154156 kl_divergence : float
155157 Kullback-Leibler divergence of p_ij and q_ij.
156158
157- grad : array, shape (n_params,)
159+ grad : ndarray of shape (n_params,)
158160 Unraveled gradient of the Kullback-Leibler divergence with respect to
159161 the embedding.
160162 """
@@ -197,16 +199,16 @@ def _kl_divergence_bh(params, P, degrees_of_freedom, n_samples, n_components,
197199 """t-SNE objective function: KL divergence of p_ijs and q_ijs.
198200
199201 Uses Barnes-Hut tree methods to calculate the gradient that
200- runs in O(NlogN) instead of O(N^2)
202+ runs in O(NlogN) instead of O(N^2).
201203
202204 Parameters
203205 ----------
204- params : array, shape (n_params,)
206+ params : ndarray of shape (n_params,)
205207 Unraveled embedding.
206208
207- P : csr sparse matrix, shape (n_samples, n_sample)
209+ P : sparse matrix of shape (n_samples, n_sample)
208210 Sparse approximate joint probability matrix, computed only for the
209- k nearest-neighbors and symmetrized.
211+ k nearest-neighbors and symmetrized. Matrix should be of CSR format.
210212
211213 degrees_of_freedom : int
212214 Degrees of freedom of the Student's-t distribution.
@@ -217,7 +219,7 @@ def _kl_divergence_bh(params, P, degrees_of_freedom, n_samples, n_components,
217219 n_components : int
218220 Dimension of the embedded space.
219221
220- angle : float ( default: 0.5)
222+ angle : float, default= 0.5
221223 This is the trade-off between speed and accuracy for Barnes-Hut T-SNE.
222224 'angle' is the angular size (referred to as theta in [3]) of a distant
223225 node as measured from a point. If this size is below 'angle' then it is
@@ -226,18 +228,18 @@ def _kl_divergence_bh(params, P, degrees_of_freedom, n_samples, n_components,
226228 in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing
227229 computation time and angle greater 0.8 has quickly increasing error.
228230
229- skip_num_points : int (optional , default:0)
231+ skip_num_points : int, default=0
230232 This does not compute the gradient for points with indices below
231233 `skip_num_points`. This is useful when computing transforms of new
232234 data where you'd like to keep the old data fixed.
233235
234- verbose : int
236+ verbose : int, default=False
235237 Verbosity level.
236238
237- compute_error: bool (optional , default: True)
239+ compute_error: bool, default= True
238240 If False, the kl_divergence is not computed and returns NaN.
239241
240- num_threads : int (optional , default:1)
242+ num_threads : int, default=1
241243 Number of threads used to compute the gradient. This is set here to
242244 avoid calling _openmp_effective_n_threads for each gradient step.
243245
@@ -246,7 +248,7 @@ def _kl_divergence_bh(params, P, degrees_of_freedom, n_samples, n_components,
246248 kl_divergence : float
247249 Kullback-Leibler divergence of p_ij and q_ij.
248250
249- grad : array, shape (n_params,)
251+ grad : ndarray of shape (n_params,)
250252 Unraveled gradient of the Kullback-Leibler divergence with respect to
251253 the embedding.
252254 """
@@ -278,13 +280,13 @@ def _gradient_descent(objective, p0, it, n_iter,
278280
279281 Parameters
280282 ----------
281- objective : function or callable
283+ objective : callable
282284 Should return a tuple of cost and gradient for a given parameter
283285 vector. When expensive to compute, the cost can optionally
284286 be None and can be computed every n_iter_check steps using
285287 the objective_error function.
286288
287- p0 : array-like, shape (n_params,)
289+ p0 : array-like of shape (n_params,)
288290 Initial parameter vector.
289291
290292 it : int
@@ -302,7 +304,7 @@ def _gradient_descent(objective, p0, it, n_iter,
302304 Maximum number of iterations without progress before we abort the
303305 optimization.
304306
305- momentum : float, within (0.0, 1.0), default=0.8
307+ momentum : float within (0.0, 1.0), default=0.8
306308 The momentum generates a weight for previous gradients that decays
307309 exponentially.
308310
@@ -331,7 +333,7 @@ def _gradient_descent(objective, p0, it, n_iter,
331333
332334 Returns
333335 -------
334- p : array, shape (n_params,)
336+ p : ndarray of shape (n_params,)
335337 Optimum parameters.
336338
337339 error : float
@@ -424,17 +426,17 @@ def trustworthiness(X, X_embedded, *, n_neighbors=5, metric='euclidean'):
424426
425427 Parameters
426428 ----------
427- X : array, shape (n_samples, n_features) or (n_samples, n_samples)
429+ X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)
428430 If the metric is 'precomputed' X must be a square distance
429431 matrix. Otherwise it contains a sample per row.
430432
431- X_embedded : array, shape (n_samples, n_components)
433+ X_embedded : ndarray of shape (n_samples, n_components)
432434 Embedding of the training data in low-dimensional space.
433435
434436 n_neighbors : int, default=5
435437 Number of neighbors k that will be considered.
436438
437- metric : string, or callable, default='euclidean'
439+ metric : str or callable, default='euclidean'
438440 Which metric to use for computing pairwise distances between samples
439441 from the original input space. If metric is 'precomputed', X must be a
440442 matrix of pairwise distances or squared distances. Otherwise, see the
@@ -499,7 +501,7 @@ class TSNE(BaseEstimator):
499501 n_components : int, default=2
500502 Dimension of the embedded space.
501503
502- perplexity : float, default=30
504+ perplexity : float, default=30.0
503505 The perplexity is related to the number of nearest neighbors that
504506 is used in other manifold learning algorithms. Larger datasets
505507 usually require a larger perplexity. Consider selecting a value
@@ -540,7 +542,7 @@ class TSNE(BaseEstimator):
540542 If the gradient norm is below this threshold, the optimization will
541543 be stopped.
542544
543- metric : string or callable, default='euclidean'
545+ metric : str or callable, default='euclidean'
544546 The metric to use when calculating distance between instances in a
545547 feature array. If metric is a string, it must be one of the options
546548 allowed by scipy.spatial.distance.pdist for its metric parameter, or
@@ -552,7 +554,8 @@ class TSNE(BaseEstimator):
552554 the distance between them. The default is "euclidean" which is
553555 interpreted as squared euclidean distance.
554556
555- init : string or numpy array, default="random"
557+ init : {'random', 'pca'} or ndarray of shape (n_samples, n_components), \
558+ default='random'
556559 Initialization of embedding. Possible options are 'random', 'pca',
557560 and a numpy array of shape (n_samples, n_components).
558561 PCA initialization cannot be used with precomputed distances and is
@@ -561,13 +564,13 @@ class TSNE(BaseEstimator):
561564 verbose : int, default=0
562565 Verbosity level.
563566
564- random_state : int, RandomState instance, default=None
567+ random_state : int or RandomState instance, default=None
565568 Determines the random number generator. Pass an int for reproducible
566569 results across multiple function calls. Note that different
567570 initializations might result in different local minima of the cost
568571 function. See :term: `Glossary <random_state>`.
569572
570- method : string , default='barnes_hut'
573+ method : str , default='barnes_hut'
571574 By default the gradient calculation algorithm uses Barnes-Hut
572575 approximation running in O(NlogN) time. method='exact'
573576 will run on the slower, but exact, algorithm in O(N^2) time. The
@@ -588,7 +591,7 @@ class TSNE(BaseEstimator):
588591 in the range of 0.2 - 0.8. Angle less than 0.2 has quickly increasing
589592 computation time and angle greater 0.8 has quickly increasing error.
590593
591- n_jobs : int or None , default=None
594+ n_jobs : int, default=None
592595 The number of parallel jobs to run for neighbors search. This parameter
593596 has no impact when ``metric="precomputed"`` or
594597 (``metric="euclidean"`` and ``method="exact"``).
@@ -598,7 +601,7 @@ class TSNE(BaseEstimator):
598601
599602 .. versionadded:: 0.22
600603
601- square_distances : { True, 'legacy'} , default='legacy'
604+ square_distances : True or 'legacy', default='legacy'
602605 Whether TSNE should square the distance values. ``'legacy'`` means
603606 that distance values are squared only when ``metric="euclidean"``.
604607 ``True`` means that distance values are squared for all metrics.
@@ -613,7 +616,7 @@ class TSNE(BaseEstimator):
613616
614617 Attributes
615618 ----------
616- embedding_ : array-like, shape (n_samples, n_components)
619+ embedding_ : array-like of shape (n_samples, n_components)
617620 Stores the embedding vectors.
618621
619622 kl_divergence_ : float
@@ -910,7 +913,7 @@ def fit_transform(self, X, y=None):
910913
911914 Parameters
912915 ----------
913- X : array, shape (n_samples, n_features) or (n_samples, n_samples)
916+ X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)
914917 If the metric is 'precomputed' X must be a square distance
915918 matrix. Otherwise it contains a sample per row. If the method
916919 is 'exact', X may be a sparse matrix of type 'csr', 'csc'
@@ -921,7 +924,7 @@ def fit_transform(self, X, y=None):
921924
922925 Returns
923926 -------
924- X_new : array, shape (n_samples, n_components)
927+ X_new : ndarray of shape (n_samples, n_components)
925928 Embedding of the training data in low-dimensional space.
926929 """
927930 embedding = self ._fit (X )
@@ -933,7 +936,7 @@ def fit(self, X, y=None):
933936
934937 Parameters
935938 ----------
936- X : array, shape (n_samples, n_features) or (n_samples, n_samples)
939+ X : ndarray of shape (n_samples, n_features) or (n_samples, n_samples)
937940 If the metric is 'precomputed' X must be a square distance
938941 matrix. Otherwise it contains a sample per row. If the method
939942 is 'exact', X may be a sparse matrix of type 'csr', 'csc'
0 commit comments