ambreen2006
diff --git a/‎sklearn/cluster/_kmeans.py‎
Lines changed: 28 additions & 32 deletions b/‎sklearn/cluster/_kmeans.py‎
Lines changed: 28 additions & 32 deletions
@@ -1,4 +1,4 @@
-"""K-means clustering"""
+"""K-means clustering."""
 
 # Authors: Gael Varoquaux <[email protected]>
 #          Thomas Rueckstiess <[email protected]>
@@ -55,7 +55,7 @@ def _k_init(X, n_clusters, x_squared_norms, random_state, n_local_trials=None):
         should be double precision (dtype=np.float64).
 
     n_clusters : int
-        The number of seeds to choose
+        The number of seeds to choose.
 
     x_squared_norms : ndarray of shape (n_samples,)
         Squared Euclidean norm of each data point.
@@ -145,7 +145,7 @@ def _k_init(X, n_clusters, x_squared_norms, random_state, n_local_trials=None):
 # K-means batch estimation by EM (expectation maximization)
 
 def _tolerance(X, tol):
-    """Return a tolerance which is independent of the dataset"""
+    """Return a tolerance which is independent of the dataset."""
     if tol == 0:
         return 0
     if sp.issparse(X):
@@ -166,7 +166,7 @@ def k_means(X, n_clusters, *, sample_weight=None, init='k-means++',
 
     Parameters
     ----------
-    X : {array-like, sparse} matrix of shape (n_samples, n_features)
+    X : {array-like, sparse matrix} of shape (n_samples, n_features)
         The observations to cluster. It must be noted that the data
         will be converted to C ordering, which will cause a memory copy
         if the given data is not C-contiguous.
@@ -177,7 +177,7 @@ def k_means(X, n_clusters, *, sample_weight=None, init='k-means++',
 
     sample_weight : array-like of shape (n_samples,), default=None
         The weights for each observation in X. If None, all observations
-        are assigned equal weight
+        are assigned equal weight.
 
     init : {'k-means++', 'random'}, callable or array-like of shape \
             (n_clusters, n_features), default='k-means++'
@@ -227,7 +227,7 @@ def k_means(X, n_clusters, *, sample_weight=None, init='k-means++',
         in the cluster centers of two consecutive iterations to declare
         convergence.
 
-    random_state : int, RandomState instance, default=None
+    random_state : int, RandomState instance or None, default=None
         Determines random number generation for centroid initialization. Use
         an int to make the randomness deterministic.
         See :term:`Glossary <random_state>`.
@@ -315,7 +315,7 @@ def _kmeans_single_elkan(X, sample_weight, centers_init, max_iter=300,
         Maximum number of iterations of the k-means algorithm to run.
 
     verbose : bool, default=False
-        Verbosity mode
+        Verbosity mode.
 
     x_squared_norms : array-like, default=None
         Precomputed x_squared_norms.
@@ -659,7 +659,7 @@ class KMeans(TransformerMixin, ClusterMixin, BaseEstimator):
     verbose : int, default=0
         Verbosity mode.
 
-    random_state : int, RandomState instance, default=None
+    random_state : int, RandomState instance or None, default=None
         Determines random number generation for centroid initialization. Use
         an int to make the randomness deterministic.
         See :term:`Glossary <random_state>`.
@@ -843,7 +843,7 @@ def _check_params(self, X):
             self._n_init = 1
 
     def _validate_center_shape(self, X, centers):
-        """Check if centers is compatible with X and n_clusters"""
+        """Check if centers is compatible with X and n_clusters."""
         if centers.shape[0] != self.n_clusters:
             raise ValueError(
                 f"The shape of the initial centers {centers.shape} does not "
@@ -867,7 +867,7 @@ def _check_test_data(self, X):
 
     def _init_centroids(self, X, x_squared_norms, init, random_state,
                         init_size=None):
-        """Compute the initial centroids
+        """Compute the initial centroids.
 
         Parameters
         ----------
@@ -1066,7 +1066,7 @@ def fit_transform(self, X, y=None, sample_weight=None):
 
         Returns
         -------
-        X_new : array of shape (n_samples, n_clusters)
+        X_new : ndarray of shape (n_samples, n_clusters)
             X transformed in the new space.
         """
         # Currently, this just skips a copy of the data if it is not in
@@ -1079,7 +1079,7 @@ def transform(self, X):
         """Transform X to a cluster-distance space.
 
         In the new space, each dimension is the distance to the cluster
-        centers.  Note that even if X is sparse, the array returned by
+        centers. Note that even if X is sparse, the array returned by
         `transform` will typically be dense.
 
         Parameters
@@ -1098,7 +1098,7 @@ def transform(self, X):
         return self._transform(X)
 
     def _transform(self, X):
-        """guts of transform method; no input validation"""
+        """Guts of transform method; no input validation."""
         return euclidean_distances(X, self.cluster_centers_)
 
     def predict(self, X, sample_weight=None):
@@ -1191,26 +1191,28 @@ def _mini_batch_step(X, sample_weight, x_squared_norms, centers, weight_sums,
     centers : ndarray of shape (k, n_features)
         The cluster centers. This array is MODIFIED IN PLACE
 
-    counts : ndarray of shape (k,)
-         The vector in which we keep track of the numbers of elements in a
-         cluster. This array is MODIFIED IN PLACE
+    old_center_buffer : int
+        Copy of old centers for monitoring convergence.
+
+    compute_squared_diff : bool
+        If set to False, the squared diff computation is skipped.
 
     distances : ndarray of shape (n_samples,), dtype=float, default=None
         If not None, should be a pre-allocated array that will be used to store
         the distances of each sample to its closest center.
         May not be None when random_reassign is True.
 
-    random_state : int, RandomState instance, default=None
+    random_reassign : bool, default=False
+        If True, centers with very low counts are randomly reassigned
+        to observations.
+
+    random_state : int, RandomState instance or None, default=None
         Determines random number generation for centroid initialization and to
         pick new clusters amongst observations with uniform probability. Use
         an int to make the randomness deterministic.
         See :term:`Glossary <random_state>`.
 
-    random_reassign : bool, default=None
-        If True, centers with very low counts are randomly reassigned
-        to observations.
-
-    reassignment_ratio : float, default=None
+    reassignment_ratio : float, default=.01
         Control the fraction of the maximum number of counts for a
         center to be reassigned. A higher value means that low count
         centers are more likely to be reassigned, which means that the
@@ -1220,12 +1222,6 @@ def _mini_batch_step(X, sample_weight, x_squared_norms, centers, weight_sums,
     verbose : bool, default=False
         Controls the verbosity.
 
-    compute_squared_diff : bool
-        If set to False, the squared diff computation is skipped.
-
-    old_center_buffer : int
-        Copy of old centers for monitoring convergence.
-
     Returns
     -------
     inertia : float
@@ -1315,7 +1311,7 @@ def _mini_batch_step(X, sample_weight, x_squared_norms, centers, weight_sums,
 def _mini_batch_convergence(model, iteration_idx, n_iter, tol,
                             n_samples, centers_squared_diff, batch_inertia,
                             context, verbose=0):
-    """Helper function to encapsulate the early stopping logic"""
+    """Helper function to encapsulate the early stopping logic."""
     # Normalize inertia to be able to compare values when
     # batch_size changes
     batch_inertia /= model.batch_size
@@ -1422,7 +1418,7 @@ class MiniBatchKMeans(KMeans):
         Compute label assignment and inertia for the complete dataset
         once the minibatch optimization has converged in fit.
 
-    random_state : int, RandomState instance, default=None
+    random_state : int, RandomState instance or None, default=None
         Determines random number generation for centroid initialization and
         random reassignment. Use an int to make the randomness deterministic.
         See :term:`Glossary <random_state>`.
@@ -1469,7 +1465,7 @@ class MiniBatchKMeans(KMeans):
     ----------
 
     cluster_centers_ : ndarray of shape (n_clusters, n_features)
-        Coordinates of cluster centers
+        Coordinates of cluster centers.
 
     labels_ : int
         Labels of each point (if compute_labels is set to True).
@@ -1755,7 +1751,7 @@ def fit(self, X, y=None, sample_weight=None):
     def _labels_inertia_minibatch(self, X, sample_weight):
         """Compute labels and inertia using mini batches.
 
-        This is slightly slower than doing everything at once but preventes
+        This is slightly slower than doing everything at once but prevents
         memory errors / segfaults.
 
         Parameters