Source code for mousestyles.kde

import numpy as np
from sklearn.neighbors import KernelDensity
from sklearn.grid_search import GridSearchCV


[docs]def kde(x, x_grid, symmetric_correction=False, cutoff=1): """ Return a numpy.ndarray object of estimated density Parameters ---------- x : numpy.ndarray data, as realiztions of variable X x_grid : numpy.ndarray the grid points for the estimated density symmetric_correction : boolean a method indicator. If False, do common gaussian kernel density estimation (kde). If True, do common gaussian kde on data generated from x concatenating with its reflection around the cutoff point. Then transform the estimated kde back by a factor of 2. Used for e.g. kde for nonnegative kernel estimation cutoff : float the axis of symmetry for symmetric correction Returns ------- pdf : numpy.ndarray estimated density at the specified grid points x_grid Examples -------- >>> kde(x = np.array([2,3,1,0]), x_grid=np.linspace(0, 5, 10)) array([ 0.17483395, 0.21599529, 0.23685855, 0.24007961, 0.22670763, 0.19365019, 0.14228937, 0.08552725, 0.04043597, 0.01463953]) >>> x1 = np.concatenate([norm(-1, 1.).rvs(400), norm(1, 0.3).rvs(100)]) >>> pdf1 = kde(x=x1, x_grid=np.linspace(0, 5, 100), symmetric_correction =True, cutoff=1) array([ 0.26625297, 0.26818492, 0.27105849, 0.27489486, 0.27968752, ... 0.07764054, 0.07239964, 0.06736559, 0.06254175, 0.05793043]) """ # if we want to do a symmetric correction to do kde, we transform # the data to be the symmetric format by adding the counterpart # obtained by reflecting the data around x=cutoff if symmetric_correction: x = np.concatenate([x, 2*cutoff - x]) # Use GridSearchCV to search for the best bandwidth by 5 fold # cross-validation max loglikelihood grid = GridSearchCV(KernelDensity(), {'bandwidth': np.linspace(0.01, 0.5, 10)}, cv=min(5, len(x))) grid.fit(x[:, None]) bandwidth = grid.best_params_['bandwidth'] # do desity estimation kde_skl = KernelDensity(bandwidth=bandwidth) kde_skl.fit(x[:, np.newaxis]) log_pdf = kde_skl.score_samples(x_grid[:, np.newaxis]) # transform the fuction score function to density if symmetric_correction: # transform back to the one-sided density in symmetric correction pdf = 2 * np.exp(log_pdf) else: # vanilla case pdf = np.exp(log_pdf) return pdf