Loading peak_integration.py +0 −201 Original line number Diff line number Diff line Loading @@ -1349,207 +1349,6 @@ class PeakHistogram(object): return data, points def initialize1(self, bins=None, loss='mle', covariance_parameterization='givens'): ########################################################################### # rebin data if isinstance(bins,str): if bins=='knuth': bins = knuth_bins(self.data, min_bins=4, spread=1) # bins = knuth_bins(data, min_bins=4, max_bins=4, spread=0) elif bins=='adaptive_knuth': # rebin data using number of bins given by `knuth` algorithm but such that bins have comparable probability masses bins = knuth_bins(self.data, min_bins=4, spread=1) # 1d marginals marginal_data = marginalize_1d(self.data, normalize=False) # quantiles, note len(b)+2 to make odd number of bins quant = [ np.linspace(0,1,min(len(b)+2,self.shape[i])) for i,b in enumerate(bins) ] edges = [ np.quantile( np.repeat(np.arange(1,md.size+1), md.astype(int)), q[1:], method='inverted_cdf' ) for md,q in zip(marginal_data,quant) ] bins = [ np.diff(e,prepend=0).astype(int) for e in edges ] if _debug: plt.figure(constrained_layout=True, figsize=(10,4)) for i in range(self.ndims): plt.subplot(1,self.ndims,i+1) plt.hlines(np.linspace(0,marginal_data[i].sum(),len(bins[i])+1), 0, marginal_data[i].size) plt.vlines(edges[i],0,marginal_data[i].sum()) plt.plot(marginal_data[i].cumsum(), '.', c='red') plt.gca().set_box_aspect(1) plt.savefig(_debug_dir+'/adaptive_knuth_quantiles.png') elif isinstance(bins,int): nbins = bins bins = [split_bins([s],nbins,recursive=False) for s in self.shape] elif bins is None: bins = [[1]*s for s in self.shape] # rebinned data fit_data, fit_points = self.get_grid_data(bins=bins, rebin_mode='density') fit_points = fit_points.reshape((-1,self.ndims)) data_min, data_max, data_mean = fit_data.min(), fit_data.max(), fit_data.mean() fit_data -= data_mean + 0.5 * (data_max - data_mean) fit_data[fit_data<0] = 0 fit_data = fit_data.ravel() # detector_mask = None # if self.detector_mask is None: # detector_mask = fit_data==fit_data # if self.detector_mask is not None: # detector_mask = rebin_histogram(self.detector_mask.astype(int), bins)>0 # fit_data = fit_data.ravel()[detector_mask.ravel()] # fit_points = fit_points[detector_mask.ravel(),:] ########################################################################### # initialization and bounds on parameters # self.initialize(bins) ################################### # # initialization and bounds for the background intensity # bkgr_init = [ np.sqrt(0.9*data_mean)] #+ [0]*self.ndims # bkgr_lbnd = [-np.sqrt(1.1*data_mean)] #+ [0]*self.ndims # bkgr_ubnd = [ np.sqrt(1.1*data_mean)] #+ [0]*self.ndims ################################### # initialization and bounds for the max peak intensity intst_init = [ np.sqrt(data_max-data_mean)] intst_lbnd = [-np.sqrt(data_max)] intst_ubnd = [ np.sqrt(data_max)] ################################### # cnt_1d, std_1d = initial_parameters(hist_ws, bins) # params_init1 = initial_parameters(hist_ws, bins, detector_mask) # initialization and bounds for the peak center # dcnt = [ rad/3 for rad in self.radiuses] # cnt_init = cnt_1d cnt_init = [(lim[0]+lim[1])/2 for lim in self.limits] cnt_lbnd = [c-rad/3 for c,rad in zip(cnt_init,self.radiuses)] cnt_ubnd = [c+rad/3 for c,rad in zip(cnt_init,self.radiuses)] # initialization and bounds for the precision matrix if covariance_parameterization=='givens': num_angles = (self.ndims*(self.ndims-1))//2 # bounds on the semiaxes of the `peak_std` ellipsoid: standard deviations (square roots of the eigenvalues) of the covariance matrix peak_std = 4 ini_rads = [ 1/4*rad/peak_std for rad in self.radiuses] # initial 'peak_std' radius is 1/2 of the box radius max_rads = [ 5/6*rad/peak_std for rad in self.radiuses] # largest 'peak_std' radius is 5/6 of the box radius min_rads = [ 1/2*res/peak_std for res in self.resolution] # smallest 'peak_std' radius is 1/2 of the smallest bin size # `num_angles` angles and `ndims` square roots of the eigenvalues of the precision matrix # prec_init = [ 0]*num_angles + std_1d prec_init = [ 0]*num_angles + [ 1/r for r in ini_rads] prec_lbnd = [-np.pi]*num_angles + [ 1/r for r in max_rads] prec_ubnd = [ np.pi]*num_angles + [ 1/r for r in min_rads] elif covariance_parameterization=='cholesky': num_chol = (self.ndims*(self.ndims+1))//2 # upper triangular part of the Cholesky factor of the precision matrix prec_init = list(np.eye(self.ndims)[np.triu_indices(self.ndims)]) prec_lbnd = [-1000]*num_chol prec_ubnd = [ 1000]*num_chol elif covariance_parameterization=='full': # arbitrary square root of the precision matrix prec_init = list(np.eye(self.ndims).ravel()) prec_lbnd = [-1000]*(self.ndims**2) prec_ubnd = [ 1000]*(self.ndims**2) # # initialization and bounds for the skewness # skew_init = [0]*self.ndims # skew_lbnd = [0]*self.ndims # skew_ubnd = [0]*self.ndims ################################### # initialization and bounds for all parameters # params_init = params_init1 params_init = intst_init + cnt_init + prec_init #+ skew_init params_lbnd = intst_lbnd + cnt_lbnd + prec_lbnd #+ skew_lbnd params_ubnd = intst_ubnd + cnt_ubnd + prec_ubnd #+ skew_ubnd # # number of background and peak parameters # nbkgr = 1 #len(bkgr_init) # npeak = len(params_init) - nbkgr ########################################################################### # residual to fit densities of the bins in the rebinned histogram if loss=='pearson_chi': def residual(params): fit = params[0]**2 fit = fit + gaussian_mixture(params[1:],points,npeaks=1,covariance_parameterization=covariance_parameterization).reshape(data.shape) res = fit[nnz_mask] - nnz_data return (res/np.sqrt(fit)).ravel() result = least_squares(residual, #jac=jacobian_residual, x0=params_init, bounds=[params_lbnd,params_ubnd], method='trf', verbose=0, max_nfev=1000) elif loss=='neumann_chi': def residual(params): fit = params[0]**2 fit = fit + gaussian_mixture(params[1:],points,npeaks=1,covariance_parameterization=covariance_parameterization).reshape(data.shape) res = fit[nnz_mask] - nnz_data return (res/np.sqrt(nnz_data)).ravel() # return (res/np.maximum(1,np.sqrt(nnz_data))).ravel() # return (res/np.sqrt(data[nnz_mask].size)).ravel() result = least_squares(residual, #jac=jacobian_residual, x0=params_init, bounds=[params_lbnd,params_ubnd], method='trf', verbose=0, max_nfev=1000) elif loss=='mle': gaussian_peak = Gaussian(ndims=3, covariance_parameterization=covariance_parameterization) class MLELoss(object): def __init__(self): self.func_calls = 0 self.grad_calls = 0 self.hess_calls = 0 def __call__(self, params): self.func_calls+=1 self.func_params = np.asarray(params).copy() # fit self.fit = 0.01 + gaussian_peak(params, fit_points) return (self.fit-fit_data*np.log(self.fit)).sum() def gradient(self, params, *args, **kwargs): self.grad_calls += 1 self.grad_params = np.asarray(params).copy() if not hasattr(self, 'func_params') or not np.all(self.grad_params==self.func_params): g = self.__call__(params) self.dloss_dfit = 1 - fit_data/self.fit self.dloss_dpeak = gaussian_peak.gradient(params, fit_points, dloss_dfit=self.dloss_dfit) return self.dloss_dpeak def hessian(self, params, *args, **kwargs): self.hess_calls += 1 self.hess_params = np.asarray(params).copy() if not hasattr(self, 'func_params') or not np.all(self.hess_params==self.func_params): g = self.__call__(params) if not hasattr(self, 'grad_params') or not np.all(self.hess_params==self.grad_params): dg = self.gradient(params) d2loss_dfit2 = fit_data/self.fit**2 d2loss = gaussian_peak.hessian(params,fit_points,dloss_dfit=self.dloss_dfit,d2loss_dfit2=d2loss_dfit2) return d2loss peak_loss = MLELoss() result = minimize(peak_loss, jac = peak_loss.gradient, hess = peak_loss.hessian, x0=params_init, bounds=tuple(zip(params_lbnd,params_ubnd)), # method='L-BFGS-B', options={'maxiter':1000, 'maxfun':1000, 'maxls':20, 'maxcor':100, 'ftol':1.e-6, 'gtol':1.e-6, 'disp':True} # method='BFGS', options={'maxiter':1000, 'gtol':1.e-6, 'norm':np.inf, 'disp':True} method='Newton-CG', options={'maxiter':10, 'xtol':1.e-6, 'disp':True} ) print(params_init) print(result.x) center, evecs, radii, v = ellipsoid_fit(fit_points[fit_data>0,:]) print(center) print(evecs) print(radii, 1/params_init[7],1/params_init[8],1/params_init[9]) print(v) exit() return result.x def initialize(self, points, data, ellipsoid=True): # basic statistics data_min, data_max, data_mean = data.min(), data.max(), data.mean() Loading Loading
peak_integration.py +0 −201 Original line number Diff line number Diff line Loading @@ -1349,207 +1349,6 @@ class PeakHistogram(object): return data, points def initialize1(self, bins=None, loss='mle', covariance_parameterization='givens'): ########################################################################### # rebin data if isinstance(bins,str): if bins=='knuth': bins = knuth_bins(self.data, min_bins=4, spread=1) # bins = knuth_bins(data, min_bins=4, max_bins=4, spread=0) elif bins=='adaptive_knuth': # rebin data using number of bins given by `knuth` algorithm but such that bins have comparable probability masses bins = knuth_bins(self.data, min_bins=4, spread=1) # 1d marginals marginal_data = marginalize_1d(self.data, normalize=False) # quantiles, note len(b)+2 to make odd number of bins quant = [ np.linspace(0,1,min(len(b)+2,self.shape[i])) for i,b in enumerate(bins) ] edges = [ np.quantile( np.repeat(np.arange(1,md.size+1), md.astype(int)), q[1:], method='inverted_cdf' ) for md,q in zip(marginal_data,quant) ] bins = [ np.diff(e,prepend=0).astype(int) for e in edges ] if _debug: plt.figure(constrained_layout=True, figsize=(10,4)) for i in range(self.ndims): plt.subplot(1,self.ndims,i+1) plt.hlines(np.linspace(0,marginal_data[i].sum(),len(bins[i])+1), 0, marginal_data[i].size) plt.vlines(edges[i],0,marginal_data[i].sum()) plt.plot(marginal_data[i].cumsum(), '.', c='red') plt.gca().set_box_aspect(1) plt.savefig(_debug_dir+'/adaptive_knuth_quantiles.png') elif isinstance(bins,int): nbins = bins bins = [split_bins([s],nbins,recursive=False) for s in self.shape] elif bins is None: bins = [[1]*s for s in self.shape] # rebinned data fit_data, fit_points = self.get_grid_data(bins=bins, rebin_mode='density') fit_points = fit_points.reshape((-1,self.ndims)) data_min, data_max, data_mean = fit_data.min(), fit_data.max(), fit_data.mean() fit_data -= data_mean + 0.5 * (data_max - data_mean) fit_data[fit_data<0] = 0 fit_data = fit_data.ravel() # detector_mask = None # if self.detector_mask is None: # detector_mask = fit_data==fit_data # if self.detector_mask is not None: # detector_mask = rebin_histogram(self.detector_mask.astype(int), bins)>0 # fit_data = fit_data.ravel()[detector_mask.ravel()] # fit_points = fit_points[detector_mask.ravel(),:] ########################################################################### # initialization and bounds on parameters # self.initialize(bins) ################################### # # initialization and bounds for the background intensity # bkgr_init = [ np.sqrt(0.9*data_mean)] #+ [0]*self.ndims # bkgr_lbnd = [-np.sqrt(1.1*data_mean)] #+ [0]*self.ndims # bkgr_ubnd = [ np.sqrt(1.1*data_mean)] #+ [0]*self.ndims ################################### # initialization and bounds for the max peak intensity intst_init = [ np.sqrt(data_max-data_mean)] intst_lbnd = [-np.sqrt(data_max)] intst_ubnd = [ np.sqrt(data_max)] ################################### # cnt_1d, std_1d = initial_parameters(hist_ws, bins) # params_init1 = initial_parameters(hist_ws, bins, detector_mask) # initialization and bounds for the peak center # dcnt = [ rad/3 for rad in self.radiuses] # cnt_init = cnt_1d cnt_init = [(lim[0]+lim[1])/2 for lim in self.limits] cnt_lbnd = [c-rad/3 for c,rad in zip(cnt_init,self.radiuses)] cnt_ubnd = [c+rad/3 for c,rad in zip(cnt_init,self.radiuses)] # initialization and bounds for the precision matrix if covariance_parameterization=='givens': num_angles = (self.ndims*(self.ndims-1))//2 # bounds on the semiaxes of the `peak_std` ellipsoid: standard deviations (square roots of the eigenvalues) of the covariance matrix peak_std = 4 ini_rads = [ 1/4*rad/peak_std for rad in self.radiuses] # initial 'peak_std' radius is 1/2 of the box radius max_rads = [ 5/6*rad/peak_std for rad in self.radiuses] # largest 'peak_std' radius is 5/6 of the box radius min_rads = [ 1/2*res/peak_std for res in self.resolution] # smallest 'peak_std' radius is 1/2 of the smallest bin size # `num_angles` angles and `ndims` square roots of the eigenvalues of the precision matrix # prec_init = [ 0]*num_angles + std_1d prec_init = [ 0]*num_angles + [ 1/r for r in ini_rads] prec_lbnd = [-np.pi]*num_angles + [ 1/r for r in max_rads] prec_ubnd = [ np.pi]*num_angles + [ 1/r for r in min_rads] elif covariance_parameterization=='cholesky': num_chol = (self.ndims*(self.ndims+1))//2 # upper triangular part of the Cholesky factor of the precision matrix prec_init = list(np.eye(self.ndims)[np.triu_indices(self.ndims)]) prec_lbnd = [-1000]*num_chol prec_ubnd = [ 1000]*num_chol elif covariance_parameterization=='full': # arbitrary square root of the precision matrix prec_init = list(np.eye(self.ndims).ravel()) prec_lbnd = [-1000]*(self.ndims**2) prec_ubnd = [ 1000]*(self.ndims**2) # # initialization and bounds for the skewness # skew_init = [0]*self.ndims # skew_lbnd = [0]*self.ndims # skew_ubnd = [0]*self.ndims ################################### # initialization and bounds for all parameters # params_init = params_init1 params_init = intst_init + cnt_init + prec_init #+ skew_init params_lbnd = intst_lbnd + cnt_lbnd + prec_lbnd #+ skew_lbnd params_ubnd = intst_ubnd + cnt_ubnd + prec_ubnd #+ skew_ubnd # # number of background and peak parameters # nbkgr = 1 #len(bkgr_init) # npeak = len(params_init) - nbkgr ########################################################################### # residual to fit densities of the bins in the rebinned histogram if loss=='pearson_chi': def residual(params): fit = params[0]**2 fit = fit + gaussian_mixture(params[1:],points,npeaks=1,covariance_parameterization=covariance_parameterization).reshape(data.shape) res = fit[nnz_mask] - nnz_data return (res/np.sqrt(fit)).ravel() result = least_squares(residual, #jac=jacobian_residual, x0=params_init, bounds=[params_lbnd,params_ubnd], method='trf', verbose=0, max_nfev=1000) elif loss=='neumann_chi': def residual(params): fit = params[0]**2 fit = fit + gaussian_mixture(params[1:],points,npeaks=1,covariance_parameterization=covariance_parameterization).reshape(data.shape) res = fit[nnz_mask] - nnz_data return (res/np.sqrt(nnz_data)).ravel() # return (res/np.maximum(1,np.sqrt(nnz_data))).ravel() # return (res/np.sqrt(data[nnz_mask].size)).ravel() result = least_squares(residual, #jac=jacobian_residual, x0=params_init, bounds=[params_lbnd,params_ubnd], method='trf', verbose=0, max_nfev=1000) elif loss=='mle': gaussian_peak = Gaussian(ndims=3, covariance_parameterization=covariance_parameterization) class MLELoss(object): def __init__(self): self.func_calls = 0 self.grad_calls = 0 self.hess_calls = 0 def __call__(self, params): self.func_calls+=1 self.func_params = np.asarray(params).copy() # fit self.fit = 0.01 + gaussian_peak(params, fit_points) return (self.fit-fit_data*np.log(self.fit)).sum() def gradient(self, params, *args, **kwargs): self.grad_calls += 1 self.grad_params = np.asarray(params).copy() if not hasattr(self, 'func_params') or not np.all(self.grad_params==self.func_params): g = self.__call__(params) self.dloss_dfit = 1 - fit_data/self.fit self.dloss_dpeak = gaussian_peak.gradient(params, fit_points, dloss_dfit=self.dloss_dfit) return self.dloss_dpeak def hessian(self, params, *args, **kwargs): self.hess_calls += 1 self.hess_params = np.asarray(params).copy() if not hasattr(self, 'func_params') or not np.all(self.hess_params==self.func_params): g = self.__call__(params) if not hasattr(self, 'grad_params') or not np.all(self.hess_params==self.grad_params): dg = self.gradient(params) d2loss_dfit2 = fit_data/self.fit**2 d2loss = gaussian_peak.hessian(params,fit_points,dloss_dfit=self.dloss_dfit,d2loss_dfit2=d2loss_dfit2) return d2loss peak_loss = MLELoss() result = minimize(peak_loss, jac = peak_loss.gradient, hess = peak_loss.hessian, x0=params_init, bounds=tuple(zip(params_lbnd,params_ubnd)), # method='L-BFGS-B', options={'maxiter':1000, 'maxfun':1000, 'maxls':20, 'maxcor':100, 'ftol':1.e-6, 'gtol':1.e-6, 'disp':True} # method='BFGS', options={'maxiter':1000, 'gtol':1.e-6, 'norm':np.inf, 'disp':True} method='Newton-CG', options={'maxiter':10, 'xtol':1.e-6, 'disp':True} ) print(params_init) print(result.x) center, evecs, radii, v = ellipsoid_fit(fit_points[fit_data>0,:]) print(center) print(evecs) print(radii, 1/params_init[7],1/params_init[8],1/params_init[9]) print(v) exit() return result.x def initialize(self, points, data, ellipsoid=True): # basic statistics data_min, data_max, data_mean = data.min(), data.max(), data.mean() Loading