Commit 0274e1d2 authored by Viktor Reshniak's avatar Viktor Reshniak
Browse files

remove old initialization

parent 69d52965
Loading
Loading
Loading
Loading
+0 −201
Original line number Diff line number Diff line
@@ -1349,207 +1349,6 @@ class PeakHistogram(object):
			return data, points


	def initialize1(self, bins=None, loss='mle', covariance_parameterization='givens'):
		###########################################################################
		# rebin data

		if isinstance(bins,str):
			if bins=='knuth':
				bins = knuth_bins(self.data, min_bins=4, spread=1)
				# bins = knuth_bins(data, min_bins=4, max_bins=4, spread=0)
			elif bins=='adaptive_knuth':
				# rebin data using number of bins given by `knuth` algorithm but such that bins have comparable probability masses
				bins = knuth_bins(self.data, min_bins=4, spread=1)

				# 1d marginals
				marginal_data = marginalize_1d(self.data, normalize=False)

				# quantiles, note len(b)+2 to make odd number of bins
				quant = [ np.linspace(0,1,min(len(b)+2,self.shape[i])) for i,b in enumerate(bins) ]
				edges = [ np.quantile( np.repeat(np.arange(1,md.size+1), md.astype(int)), q[1:], method='inverted_cdf' ) for md,q in zip(marginal_data,quant) ]
				bins  = [ np.diff(e,prepend=0).astype(int) for e in edges ]

				if _debug:
					plt.figure(constrained_layout=True, figsize=(10,4))
					for i in range(self.ndims):
						plt.subplot(1,self.ndims,i+1)
						plt.hlines(np.linspace(0,marginal_data[i].sum(),len(bins[i])+1), 0, marginal_data[i].size)
						plt.vlines(edges[i],0,marginal_data[i].sum())
						plt.plot(marginal_data[i].cumsum(), '.', c='red')
						plt.gca().set_box_aspect(1)
					plt.savefig(_debug_dir+'/adaptive_knuth_quantiles.png')
		elif isinstance(bins,int):
			nbins = bins
			bins  = [split_bins([s],nbins,recursive=False) for s in self.shape]
		elif bins is None:
			bins = [[1]*s for s in self.shape]

		# rebinned data
		fit_data, fit_points = self.get_grid_data(bins=bins, rebin_mode='density')
		fit_points = fit_points.reshape((-1,self.ndims))

		data_min, data_max, data_mean = fit_data.min(), fit_data.max(), fit_data.mean()
		fit_data -= data_mean + 0.5 * (data_max - data_mean)
		fit_data[fit_data<0] = 0
		fit_data = fit_data.ravel()

		# detector_mask = None
		# if self.detector_mask is None:
		# 	detector_mask = fit_data==fit_data
		# if self.detector_mask is not None:
		# 	detector_mask = rebin_histogram(self.detector_mask.astype(int), bins)>0
		# 	fit_data   = fit_data.ravel()[detector_mask.ravel()]
		# 	fit_points = fit_points[detector_mask.ravel(),:]

		###########################################################################
		# initialization and bounds on parameters

		# self.initialize(bins)

		###################################
		# # initialization and bounds for the background intensity
		# bkgr_init = [ np.sqrt(0.9*data_mean)] #+ [0]*self.ndims
		# bkgr_lbnd = [-np.sqrt(1.1*data_mean)] #+ [0]*self.ndims
		# bkgr_ubnd = [ np.sqrt(1.1*data_mean)] #+ [0]*self.ndims

		###################################
		# initialization and bounds for the max peak intensity
		intst_init = [ np.sqrt(data_max-data_mean)]
		intst_lbnd = [-np.sqrt(data_max)]
		intst_ubnd = [ np.sqrt(data_max)]

		###################################
		# cnt_1d, std_1d = initial_parameters(hist_ws, bins)
		# params_init1 = initial_parameters(hist_ws, bins, detector_mask)

		# initialization and bounds for the peak center
		# dcnt = [ rad/3 for rad in self.radiuses]
		# cnt_init = cnt_1d
		cnt_init = [(lim[0]+lim[1])/2 for lim in self.limits]
		cnt_lbnd = [c-rad/3 for c,rad in zip(cnt_init,self.radiuses)]
		cnt_ubnd = [c+rad/3 for c,rad in zip(cnt_init,self.radiuses)]

		# initialization and bounds for the precision matrix
		if covariance_parameterization=='givens':
			num_angles = (self.ndims*(self.ndims-1))//2
			# bounds on the semiaxes of the `peak_std` ellipsoid: standard deviations (square roots of the eigenvalues) of the covariance matrix
			peak_std = 4
			ini_rads = [ 1/4*rad/peak_std for rad in self.radiuses]   # initial  'peak_std' radius is 1/2 of the box radius
			max_rads = [ 5/6*rad/peak_std for rad in self.radiuses]   # largest  'peak_std' radius is 5/6 of the box radius
			min_rads = [ 1/2*res/peak_std for res in self.resolution] # smallest 'peak_std' radius is 1/2 of the smallest bin size
			# `num_angles` angles and `ndims` square roots of the eigenvalues of the precision matrix
			# prec_init = [     0]*num_angles + std_1d
			prec_init = [     0]*num_angles + [ 1/r for r in ini_rads]
			prec_lbnd = [-np.pi]*num_angles + [ 1/r for r in max_rads]
			prec_ubnd = [ np.pi]*num_angles + [ 1/r for r in min_rads]
		elif covariance_parameterization=='cholesky':
			num_chol = (self.ndims*(self.ndims+1))//2
			# upper triangular part of the Cholesky factor of the precision matrix
			prec_init = list(np.eye(self.ndims)[np.triu_indices(self.ndims)])
			prec_lbnd = [-1000]*num_chol
			prec_ubnd = [ 1000]*num_chol
		elif covariance_parameterization=='full':
			# arbitrary square root of the precision matrix
			prec_init = list(np.eye(self.ndims).ravel())
			prec_lbnd = [-1000]*(self.ndims**2)
			prec_ubnd = [ 1000]*(self.ndims**2)

		# # initialization and bounds for the skewness
		# skew_init = [0]*self.ndims
		# skew_lbnd = [0]*self.ndims
		# skew_ubnd = [0]*self.ndims

		###################################
		# initialization and bounds for all parameters
		# params_init = params_init1
		params_init = intst_init + cnt_init + prec_init #+ skew_init
		params_lbnd = intst_lbnd + cnt_lbnd + prec_lbnd #+ skew_lbnd
		params_ubnd = intst_ubnd + cnt_ubnd + prec_ubnd #+ skew_ubnd

		# # number of background and peak parameters
		# nbkgr = 1 #len(bkgr_init)
		# npeak = len(params_init) - nbkgr

		###########################################################################

		# residual to fit densities of the bins in the rebinned histogram
		if loss=='pearson_chi':
			def residual(params):
				fit = params[0]**2
				fit = fit + gaussian_mixture(params[1:],points,npeaks=1,covariance_parameterization=covariance_parameterization).reshape(data.shape)
				res = fit[nnz_mask] - nnz_data
				return (res/np.sqrt(fit)).ravel()
			result = least_squares(residual, #jac=jacobian_residual,
				x0=params_init, bounds=[params_lbnd,params_ubnd], method='trf', verbose=0, max_nfev=1000)
		elif loss=='neumann_chi':
			def residual(params):
				fit = params[0]**2
				fit = fit + gaussian_mixture(params[1:],points,npeaks=1,covariance_parameterization=covariance_parameterization).reshape(data.shape)
				res = fit[nnz_mask] - nnz_data
				return (res/np.sqrt(nnz_data)).ravel()
				# return (res/np.maximum(1,np.sqrt(nnz_data))).ravel()
				# return (res/np.sqrt(data[nnz_mask].size)).ravel()
			result = least_squares(residual, #jac=jacobian_residual,
				x0=params_init, bounds=[params_lbnd,params_ubnd], method='trf', verbose=0, max_nfev=1000)
		elif loss=='mle':
			gaussian_peak = Gaussian(ndims=3, covariance_parameterization=covariance_parameterization)
			class MLELoss(object):
				def __init__(self):
					self.func_calls = 0
					self.grad_calls = 0
					self.hess_calls = 0

				def __call__(self, params):
					self.func_calls+=1
					self.func_params = np.asarray(params).copy()

					# fit
					self.fit = 0.01 + gaussian_peak(params, fit_points)

					return (self.fit-fit_data*np.log(self.fit)).sum()

				def gradient(self, params, *args, **kwargs):
					self.grad_calls += 1
					self.grad_params = np.asarray(params).copy()
					if not hasattr(self, 'func_params') or not np.all(self.grad_params==self.func_params):  g = self.__call__(params)

					self.dloss_dfit = 1 - fit_data/self.fit
					self.dloss_dpeak = gaussian_peak.gradient(params, fit_points, dloss_dfit=self.dloss_dfit)

					return self.dloss_dpeak

				def hessian(self, params, *args, **kwargs):
					self.hess_calls += 1
					self.hess_params = np.asarray(params).copy()
					if not hasattr(self, 'func_params') or not np.all(self.hess_params==self.func_params):  g = self.__call__(params)
					if not hasattr(self, 'grad_params') or not np.all(self.hess_params==self.grad_params): dg = self.gradient(params)

					d2loss_dfit2 = fit_data/self.fit**2
					d2loss = gaussian_peak.hessian(params,fit_points,dloss_dfit=self.dloss_dfit,d2loss_dfit2=d2loss_dfit2)

					return d2loss
			peak_loss = MLELoss()
			result = minimize(peak_loss,
				jac  = peak_loss.gradient,
				hess = peak_loss.hessian,
				x0=params_init, bounds=tuple(zip(params_lbnd,params_ubnd)),
				# method='L-BFGS-B', options={'maxiter':1000, 'maxfun':1000, 'maxls':20, 'maxcor':100, 'ftol':1.e-6, 'gtol':1.e-6, 'disp':True}
				# method='BFGS', options={'maxiter':1000, 'gtol':1.e-6, 'norm':np.inf, 'disp':True}
				method='Newton-CG', options={'maxiter':10, 'xtol':1.e-6, 'disp':True}
				)


		print(params_init)
		print(result.x)
		center, evecs, radii, v = ellipsoid_fit(fit_points[fit_data>0,:])
		print(center)
		print(evecs)
		print(radii, 1/params_init[7],1/params_init[8],1/params_init[9])
		print(v)
		exit()
		return result.x


	def initialize(self, points, data, ellipsoid=True):
		# basic statistics
		data_min, data_max, data_mean = data.min(), data.max(), data.mean()