@@ -3656,12 +3656,12 @@ def stineman_interp(xi,x,y,yp=None):
36563656 1 / (dy1 + dy2 ),))
36573657 return yi
36583658
3659- def ksdensity ( dataset , bw_method = None ):
3659+ class GaussianKDE ( object ):
36603660 """
36613661 Representation of a kernel-density estimate using Gaussian kernels.
36623662
36633663 Call signature::
3664- kde_dict = ksdensity (dataset, 'silverman')
3664+ kde = gaussian_kde (dataset, 'silverman')
36653665
36663666 Parameters
36673667 ----------
@@ -3676,10 +3676,10 @@ def ksdensity(dataset, bw_method=None):
36763676 Attributes
36773677 ----------
36783678 dataset : ndarray
3679- The dataset with which `ksdensity ` was initialized.
3680- d : int
3679+ The dataset with which `gaussian_kde ` was initialized.
3680+ dim : int
36813681 Number of dimensions.
3682- n : int
3682+ num_dp : int
36833683 Number of datapoints.
36843684 factor : float
36853685 The bandwidth factor, obtained from `kde.covariance_factor`, with which
@@ -3690,117 +3690,127 @@ def ksdensity(dataset, bw_method=None):
36903690 inv_cov : ndarray
36913691 The inverse of `covariance`.
36923692
3693- Returns
3693+ Methods
36943694 -------
3695- A dictionary mapping each various aspects of the computed KDE.
3696- The dictionary has the following keys:
3697-
3698- xmin : number
3699- The min of the input dataset
3700- xmax : number
3701- The max of the input dataset
3702- mean : number
3703- The mean of the result
3704- median: number
3705- The median of the result
3706- result: (# of points,)-array
3707- The array of the evaluated PDF estimation
3708-
3709- Raises
3710- ------
3711- ValueError : if the dimensionality of the input points is different than
3712- the dimensionality of the KDE.
3695+ kde.evaluate(points) : ndarray
3696+ Evaluate the estimated pdf on a provided set of points.
3697+ kde(points) : ndarray
3698+ Same as kde.evaluate(points)
37133699
37143700 """
37153701
37163702 # This implementation with minor modification was too good to pass up.
37173703 # from scipy: https://github.com/scipy/scipy/blob/master/scipy/stats/kde.py
37183704
3719- dataset = np .array (np .atleast_2d (dataset ))
3720- xmin = dataset .min ()
3721- xmax = dataset .max ()
3705+ def __init__ (self , dataset , bw_method = None ):
3706+ self .dataset = np .atleast_2d (dataset )
3707+ if not np .array (self .dataset ).size > 1 :
3708+ raise ValueError ("`dataset` input should have multiple elements." )
3709+
3710+ self .dim , self .num_dp = np .array (self .dataset ).shape
3711+
3712+ if bw_method is None :
3713+ pass
3714+ elif bw_method == 'scott' :
3715+ self .covariance_factor = self .scotts_factor
3716+ elif bw_method == 'silverman' :
3717+ self .covariance_factor = self .silverman_factor
3718+ elif np .isscalar (bw_method ):
3719+ if not isinstance (bw_method , six .string_types ):
3720+ self ._bw_method = 'use constant'
3721+ self .covariance_factor = lambda : bw_method
3722+ elif callable (bw_method ):
3723+ self ._bw_method = bw_method
3724+ self .covariance_factor = lambda : self ._bw_method (self )
3725+ else :
3726+ msg = "`bw_method` should be 'scott', 'silverman', a scalar " \
3727+ "or a callable."
3728+ raise ValueError (msg )
3729+
3730+ # Computes the covariance matrix for each Gaussian kernel using
3731+ # covariance_factor().
3732+
3733+ self .factor = self .covariance_factor ()
3734+ # Cache covariance and inverse covariance of the data
3735+ if not hasattr (self , '_data_inv_cov' ):
3736+ self .data_covariance = np .atleast_2d (
3737+ np .cov (
3738+ self .dataset ,
3739+ rowvar = 1 ,
3740+ bias = False ))
3741+ self .data_inv_cov = np .linalg .inv (self .data_covariance )
3742+
3743+ self .covariance = self .data_covariance * self .factor ** 2
3744+ self .inv_cov = self .data_inv_cov / self .factor ** 2
3745+ self .norm_factor = np .sqrt (
3746+ np .linalg .det (
3747+ 2 * np .pi * self .covariance )) * self .num_dp
3748+
3749+ def scotts_factor (self ):
3750+ return np .power (self .num_dp , - 1. / (self .dim + 4 ))
3751+
3752+ def silverman_factor (self ):
3753+ return np .power (
3754+ self .num_dp * (self .dim + 2.0 ) / 4.0 , - 1. / (self .dim + 4 ))
3755+
3756+ # Default method to calculate bandwidth, can be overwritten by subclass
3757+ covariance_factor = scotts_factor
37223758
3723- if not dataset . size > 1 :
3724- raise ValueError ( "`dataset` input should have multiple elements." )
3759+ def evaluate ( self , points ) :
3760+ """Evaluate the estimated pdf on a set of points.
37253761
3726- dim , num_dp = dataset .shape
3762+ Parameters
3763+ ----------
3764+ points : (# of dimensions, # of points)-array
3765+ Alternatively, a (# of dimensions,) vector can be passed in and
3766+ treated as a single point.
37273767
3728- # ----------------------------------------------
3729- # Set Bandwidth, defaulted to Scott's Factor
3730- # ----------------------------------------------
3731- scotts_factor = lambda : np .power (num_dp , - 1. / (dim + 4 ))
3732- silverman_factor = lambda : np .power (num_dp * (dim + 2.0 )/ 4.0 , - 1. / (dim + 4 ))
3768+ Returns
3769+ -------
3770+ values : (# of points,)-array
3771+ The values at each point.
37333772
3734- # Default method to calculate bandwidth, can be overwritten by subclass
3735- covariance_factor = scotts_factor
3773+ Raises
3774+ ------
3775+ ValueError : if the dimensionality of the input points is different
3776+ than the dimensionality of the KDE.
37363777
3737- if bw_method is None :
3738- pass
3739- elif bw_method == 'scott' :
3740- covariance_factor = scotts_factor
3741- elif bw_method == 'silverman' :
3742- covariance_factor = silverman_factor
3743- elif np .isscalar (bw_method ) and not isinstance (bw_method , six .string_types ):
3744- covariance_factor = lambda : bw_method
3745- else :
3746- msg = "`bw_method` should be 'scott', 'silverman', or a scalar"
3747- raise ValueError (msg )
3748-
3749- # ---------------------------------------------------------------
3750- # Computes covariance matrix for each Gaussian kernel with factor
3751- # ---------------------------------------------------------------
3752- factor = covariance_factor ()
3753-
3754- # Cache covariance and inverse covariance of the data
3755- data_covariance = np .atleast_2d (np .cov (dataset , rowvar = 1 , bias = False ))
3756- data_inv_cov = np .linalg .inv (data_covariance )
3757-
3758- covariance = data_covariance * factor ** 2
3759- inv_cov = data_inv_cov / factor ** 2
3760- norm_factor = np .sqrt (np .linalg .det (2 * np .pi * covariance )) * num_dp
3761-
3762- # ----------------------------------------------
3763- # Evaluate the estimated pdf on a set of points.
3764- # ----------------------------------------------
3765- points = np .atleast_2d (np .arange (xmin , xmax , (xmax - xmin )/ 100. ))
3766-
3767- dim_pts , num_dp_pts = np .array (points ).shape
3768- if dim_pts != dim :
3769- if dim_pts == 1 and num_dp_pts == num_dp :
3770- # points was passed in as a row vector
3771- points = np .reshape (points , (dim , 1 ))
3772- num_dp_pts = 1
3778+ """
3779+ points = np .atleast_2d (points )
3780+
3781+ dim , num_m = np .array (points ).shape
3782+ if dim != self .dim :
3783+ if dim == 1 and num_m == self .dim :
3784+ # points was passed in as a row vector
3785+ points = np .reshape (points , (self .dim , 1 ))
3786+ num_m = 1
3787+ else :
3788+ msg = "points have dimension %s, dataset has dimension %s" % (
3789+ dim , self .dim )
3790+ raise ValueError (msg )
3791+
3792+ result = np .zeros ((num_m ,), dtype = np .float )
3793+
3794+ if num_m >= self .num_dp :
3795+ # there are more points than data, so loop over data
3796+ for i in range (self .num_dp ):
3797+ diff = self .dataset [:, i , np .newaxis ] - points
3798+ tdiff = np .dot (self .inv_cov , diff )
3799+ energy = np .sum (diff * tdiff , axis = 0 ) / 2.0
3800+ result = result + np .exp (- energy )
37733801 else :
3774- msg = "points have dimension %s,\
3775- dataset has dimension %s" % (dim_pts , dim )
3776- raise ValueError (msg )
3802+ # loop over points
3803+ for i in range (num_m ):
3804+ diff = self .dataset - points [:, i , np .newaxis ]
3805+ tdiff = np .dot (self .inv_cov , diff )
3806+ energy = np .sum (diff * tdiff , axis = 0 ) / 2.0
3807+ result [i ] = np .sum (np .exp (- energy ), axis = 0 )
37773808
3778- result = np . zeros (( num_dp_pts ,), dtype = np . float )
3809+ result = result / self . norm_factor
37793810
3780- if num_dp_pts >= num_dp :
3781- # there are more points than data, so loop over data
3782- for i in range (num_dp ):
3783- diff = dataset [:, i , np .newaxis ] - points
3784- tdiff = np .dot (inv_cov , diff )
3785- energy = np .sum (diff * tdiff , axis = 0 ) / 2.0
3786- result = result + np .exp (- energy )
3787- else :
3788- # loop over points
3789- for i in range (num_dp_pts ):
3790- diff = dataset - points [:, i , np .newaxis ]
3791- tdiff = np .dot (inv_cov , diff )
3792- energy = np .sum (diff * tdiff , axis = 0 ) / 2.0
3793- result [i ] = np .sum (np .exp (- energy ), axis = 0 )
3794-
3795- result = result / norm_factor
3796-
3797- return {
3798- 'xmin' : xmin ,
3799- 'xmax' : xmax ,
3800- 'mean' : np .mean (dataset ),
3801- 'median' : np .median (dataset ),
3802- 'result' : result
3803- }
3811+ return result
3812+
3813+ __call__ = evaluate
38043814
38053815##################################################
38063816# Code related to things in and around polygons
0 commit comments