@@ -3661,7 +3661,7 @@ def ksdensity(dataset, bw_method=None):
36613661 Representation of a kernel-density estimate using Gaussian kernels.
36623662
36633663 Call signature::
3664- xmin, xmax, result = ksdensity(dataset, 'scott ')
3664+ kde_dict = ksdensity(dataset, 'silverman ')
36653665
36663666 Parameters
36673667 ----------
@@ -3714,22 +3714,22 @@ def ksdensity(dataset, bw_method=None):
37143714 """
37153715
37163716 # This implementation with minor modification was too good to pass up.
3717- # from scipy: https://github.com/scipy/scipy/blob/master/scipy/stats/kde.py
3717+ # from scipy: https://github.com/scipy/scipy/blob/master/scipy/stats/kde.py
37183718
3719- dataset = np .atleast_2d (dataset )
3719+ dataset = np .array ( np . atleast_2d (dataset ) )
37203720 xmin = dataset .min ()
37213721 xmax = dataset .max ()
37223722
37233723 if not dataset .size > 1 :
37243724 raise ValueError ("`dataset` input should have multiple elements." )
37253725
3726- d , n = dataset .shape
3726+ dim , num_dp = dataset .shape
37273727
37283728 # ----------------------------------------------
37293729 # Set Bandwidth, defaulted to Scott's Factor
37303730 # ----------------------------------------------
3731- scotts_factor = lambda : np .power (n , - 1. / (d + 4 ))
3732- silverman_factor = lambda : np .power (n * ( d + 2.0 )/ 4.0 , - 1. / (d + 4 ))
3731+ scotts_factor = lambda : np .power (num_dp , - 1. / (dim + 4 ))
3732+ silverman_factor = lambda : np .power (num_dp * ( dim + 2.0 )/ 4.0 , - 1. / (dim + 4 ))
37333733
37343734 # Default method to calculate bandwidth, can be overwritten by subclass
37353735 covariance_factor = scotts_factor
@@ -3740,7 +3740,7 @@ def ksdensity(dataset, bw_method=None):
37403740 covariance_factor = scotts_factor
37413741 elif bw_method == 'silverman' :
37423742 covariance_factor = silverman_factor
3743- elif np .isscalar (bw_method ) and not isinstance (bw_method , string_types ):
3743+ elif np .isscalar (bw_method ) and not isinstance (bw_method , six . string_types ):
37443744 covariance_factor = lambda : bw_method
37453745 else :
37463746 msg = "`bw_method` should be 'scott', 'silverman', or a scalar"
@@ -3752,53 +3752,54 @@ def ksdensity(dataset, bw_method=None):
37523752 factor = covariance_factor ()
37533753
37543754 # Cache covariance and inverse covariance of the data
3755- data_covariance = np .atleast_2d (np .cov (dataset , rowvar = 1 ,bias = False ))
3755+ data_covariance = np .atleast_2d (np .cov (dataset , rowvar = 1 , bias = False ))
37563756 data_inv_cov = np .linalg .inv (data_covariance )
37573757
37583758 covariance = data_covariance * factor ** 2
37593759 inv_cov = data_inv_cov / factor ** 2
3760- norm_factor = np .sqrt (np .linalg .det (2 * np .pi * covariance )) * n
3760+ norm_factor = np .sqrt (np .linalg .det (2 * np .pi * covariance )) * num_dp
37613761
37623762 # ----------------------------------------------
37633763 # Evaluate the estimated pdf on a set of points.
37643764 # ----------------------------------------------
3765- points = np .atleast_2d (np .arange (xmin ,xmax , (xmax - xmin )/ 100. ))
3765+ points = np .atleast_2d (np .arange (xmin , xmax , (xmax - xmin )/ 100. ))
37663766
3767- d1 , m1 = points .shape
3768- if d1 != d :
3769- if d1 == 1 and m1 == d :
3767+ dim_pts , num_dp_pts = np . array ( points ) .shape
3768+ if dim_pts != dim :
3769+ if dim_pts == 1 and num_dp_pts == num_dp :
37703770 # points was passed in as a row vector
3771- points = np .reshape (points , (d , 1 ))
3772- m1 = 1
3771+ points = np .reshape (points , (dim , 1 ))
3772+ num_dp_pts = 1
37733773 else :
3774- msg = "points have dimension %s, dataset has dimension %s" % (d1 , d )
3774+ msg = "points have dimension %s,\
3775+ dataset has dimension %s" % (dim_pts , dim )
37753776 raise ValueError (msg )
37763777
3777- result = np .zeros ((m1 ,), dtype = np .float )
3778+ result = np .zeros ((num_dp_pts ,), dtype = np .float )
37783779
3779- if m1 >= n :
3780+ if num_dp_pts >= num_dp :
37803781 # there are more points than data, so loop over data
3781- for i in range (n ):
3782+ for i in range (num_dp ):
37823783 diff = dataset [:, i , np .newaxis ] - points
37833784 tdiff = np .dot (inv_cov , diff )
3784- energy = np .sum (diff * tdiff ,axis = 0 ) / 2.0
3785+ energy = np .sum (diff * tdiff , axis = 0 ) / 2.0
37853786 result = result + np .exp (- energy )
37863787 else :
37873788 # loop over points
3788- for i in range (m ):
3789- diff = dataset - points [:, i , newaxis ]
3789+ for i in range (num_dp_pts ):
3790+ diff = dataset - points [:, i , np . newaxis ]
37903791 tdiff = np .dot (inv_cov , diff )
37913792 energy = np .sum (diff * tdiff , axis = 0 ) / 2.0
37923793 result [i ] = np .sum (np .exp (- energy ), axis = 0 )
37933794
37943795 result = result / norm_factor
37953796
37963797 return {
3797- 'xmin' : xmin ,
3798- 'xmax' : xmax ,
3799- 'mean' : np .mean (result ),
3800- 'median' : np .median (result ),
3801- 'result' : result
3798+ 'xmin' : xmin ,
3799+ 'xmax' : xmax ,
3800+ 'mean' : np .mean (dataset ),
3801+ 'median' : np .median (dataset ),
3802+ 'result' : result
38023803 }
38033804
38043805##################################################
0 commit comments