pberkes
diff --git a/‎sklearn/model_selection/_split.py‎
Lines changed: 3 additions & 2 deletions b/‎sklearn/model_selection/_split.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎sklearn/model_selection/tests/test_split.py‎
Lines changed: 23 additions & 0 deletions b/‎sklearn/model_selection/tests/test_split.py‎
Lines changed: 23 additions & 0 deletions
@@ -1534,8 +1534,9 @@ def _iter_indices(self, X, y, groups=None):
                                                   self.train_size)
 
         if y.ndim == 2:
-            # for multi-label y, map each distinct row to its string repr:
-            y = np.array([str(row) for row in y])
+            # for multi-label y, map each distinct row to a string repr
+            # using join because str(row) uses an ellipsis if len(row) > 1000
+            y = np.array([' '.join(row.astype('str')) for row in y])
 
         classes, y_indices = np.unique(y, return_inverse=True)
         n_classes = classes.shape[0]
 
@@ -726,6 +726,29 @@ def test_stratified_shuffle_split_multilabel():
         assert_equal(expected_ratio, np.mean(y_test[:, 0]))
 
 
+def test_stratified_shuffle_split_multilabel_many_labels():
+    # fix in PR #9922: for multilabel data with > 1000 labels, str(row)
+    # truncates with an ellipsis for elements in positions 4 through
+    # len(row) - 4, so labels were not being correctly split using the powerset
+    # method for transforming a multilabel problem to a multiclass one; this
+    # test checks that this problem is fixed.
+    row_with_many_zeros = [1, 0, 1] + [0] * 1000 + [1, 0, 1]
+    row_with_many_ones = [1, 0, 1] + [1] * 1000 + [1, 0, 1]
+    y = np.array([row_with_many_zeros] * 10 + [row_with_many_ones] * 100)
+    X = np.ones_like(y)
+
+    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0)
+    train, test = next(sss.split(X=X, y=y))
+    y_train = y[train]
+    y_test = y[test]
+
+    # correct stratification of entire rows
+    # (by design, here y[:, 4] uniquely determines the entire row of y)
+    expected_ratio = np.mean(y[:, 4])
+    assert_equal(expected_ratio, np.mean(y_train[:, 4]))
+    assert_equal(expected_ratio, np.mean(y_test[:, 4]))
+
+
 def test_predefinedsplit_with_kfold_split():
     # Check that PredefinedSplit can reproduce a split generated by Kfold.
     folds = -1 * np.ones(10)