@@ -129,7 +129,7 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3,
129129 series_index , name )
130130
131131
132- def qcut (x , q , labels = None , retbins = False , precision = 3 ):
132+ def qcut (x , q , labels = None , retbins = False , precision = 3 , duplicates = 'raise' ):
133133 """
134134 Quantile-based discretization function. Discretize variable into
135135 equal-sized buckets based on rank or based on sample quantiles. For example
@@ -151,6 +151,10 @@ def qcut(x, q, labels=None, retbins=False, precision=3):
151151 as a scalar.
152152 precision : int
153153 The precision at which to store and display the bins labels
154+ duplicates : {default 'raise', 'drop'}, optional
155+ If bin edges are not unique, raise ValueError or drop non-uniques.
156+
157+ .. versionadded:: 0.20.0
154158
155159 Returns
156160 -------
@@ -187,22 +191,32 @@ def qcut(x, q, labels=None, retbins=False, precision=3):
187191 bins = algos .quantile (x , quantiles )
188192 fac , bins = _bins_to_cuts (x , bins , labels = labels ,
189193 precision = precision , include_lowest = True ,
190- dtype = dtype )
194+ dtype = dtype , duplicates = duplicates )
191195
192196 return _postprocess_for_cut (fac , bins , retbins , x_is_series ,
193197 series_index , name )
194198
195199
196200def _bins_to_cuts (x , bins , right = True , labels = None ,
197201 precision = 3 , include_lowest = False ,
198- dtype = None ):
202+ dtype = None , duplicates = 'raise' ):
203+
204+ if duplicates not in ['raise' , 'drop' ]:
205+ raise ValueError ("invalid value for 'duplicates' parameter, "
206+ "valid options are: raise, drop" )
207+
208+ unique_bins = algos .unique (bins )
209+ if len (unique_bins ) < len (bins ):
210+ if duplicates == 'raise' :
211+ raise ValueError ("Bin edges must be unique: {}. You "
212+ "can drop duplicate edges by setting "
213+ "'duplicates' param" .format (repr (bins )))
214+ else :
215+ bins = unique_bins
199216
200217 side = 'left' if right else 'right'
201218 ids = bins .searchsorted (x , side = side )
202219
203- if len (algos .unique (bins )) < len (bins ):
204- raise ValueError ('Bin edges must be unique: %s' % repr (bins ))
205-
206220 if include_lowest :
207221 ids [x == bins [0 ]] = 1
208222
0 commit comments