Commit 5a6f5b82 authored by Viktor Reshniak's avatar Viktor Reshniak
Browse files

add Gaussian class

parent 8ddfa542
Loading
Loading
Loading
Loading
+328 −47
Original line number Diff line number Diff line
@@ -425,12 +425,215 @@ def mahalanobis_distance(mu, sqrtP, x):
	return np.sqrt(squared_mahalanobis_distance(mu, sqrtP, x))


def gaussian_mixture(params, x, npeaks=1, covariance_parameterization='givens', return_gradient=False, return_hessian=False):
	"""Compute nD Gaussian mixture at points in `x`.
	Parameters are interpreted as follows:

class Gaussian(object):
	def __init__(self, ndims, covariance_parameterization='givens'):
		self.covariance_parameterization = covariance_parameterization

		# number of parameters
		self.ncnt  = ndims
		self.nskew = 0#ndims
		if covariance_parameterization=='full':
			self.ncov = ndims**2
		else:
			self.ncov = (ndims*(ndims+1))//2

		# number of dimensions
		self.ndims = ndims

		# number of parameters
		self.nparams = 1 + self.ncnt + self.ncov + self.nskew

		# number of angles
		if covariance_parameterization=='givens':
			self.nangles = (ndims*(ndims-1))//2
		else:
			self.nangles = None


	def __call__(self, params, x):
		if len(params)!=self.nparams:
			raise ValueError(f"`params` array is of wrong size, must have `len(params) = 1 + ncnt + ncov + nskew = {self.nparams}`, got {len(params)}")

		start = time.time()

		# convert to numpy array
		params = np.array(params).ravel()

		# parameters of the model
		intst = params[0]
		cnt   = params[1:1+self.ncnt]
		sqrtP = params[1+self.ncnt:1+self.ncnt+self.ncov]
		# skew  = params[1+ncnt+ncov:1+ncnt+ncov+nskew].reshape((1,-1))

		start = time.time()
		# square root of the precision matrix
		if self.covariance_parameterization=='full':
			sqrtP_i = sqrtP.reshape((self.ndims,self.ndims))
		elif self.covariance_parameterization=='cholesky':
			sqrtP_i  = np.zeros((self.ndims,self.ndims))
			triu_ind = np.triu_indices(self.ndims)
			diag_ind = np.diag_indices(self.ndims)
			# fill upper triangular part
			sqrtP_i[triu_ind] = sqrtP
			# positive diagonal makes Cholesky decomposition unique
			sqrtP_i[diag_ind] = np.exp(sqrtP_i[diag_ind])
		elif self.covariance_parameterization=='givens':
			# inverse rotation matrix
			self.R,self.dR,self.d2R = rotation_matrix(sqrtP[:self.nangles],True,True)
			# square roots of the eigenvalues of the precision matrix
			self.sqrtD = sqrtP[self.nangles:] #.reshape((ndims,1))
			# square root of the precision matrix, i.e., diag(sqrt_eig) @ R
			self.sqrtP = self.sqrtD[:,np.newaxis] * self.R

		# base Gaussian
		self.g = intst**2 * np.exp(-0.5*squared_mahalanobis_distance(cnt, self.sqrtP, x))

		# modulated Gaussian
		# g = g * (1+erf(skew@(x-cnt.reshape((ndims,1))))/np.sqrt(2)).ravel()

		if _profile:
			self.func_time = time.time()-start
			print(f'func: {self.func_time} sec')

		return self.g


	def gradient(self, params, x, dloss_dfit=None, *args, **kwargs):
		if len(params)!=self.nparams:
			raise ValueError(f"`params` array is of wrong size, must have `len(params) = 1 + ncnt + ncov + nskew = {self.nparams}`, got {len(params)}")

		start = time.time()

		# convert to numpy array
		params = np.array(params).ravel()

		# parameters of the model
		intst = params[0]
		cnt   = params[1:1+self.ncnt]
		sqrtP = params[1+self.ncnt:1+self.ncnt+self.ncov]

		# define useful quantities
		self.P = self.sqrtP.T @ self.sqrtP								# (ndims,ndims)
		self.dsqrtP_dangle = self.sqrtD.reshape((1,-1,1)) * self.dR		# (nangles,ndims,ndims)

		# cache quantities for reuse
		self.xcnt       =  x - cnt.reshape((1,self.ndims))													# (npoints,ndims)
		self.Rxcnt      =  np.einsum('ip,kp->ki',self.R,self.xcnt) 											# (npoints,ndims)
		self.sqrtPxcnt_ = -np.einsum('ip,kp->ki',self.sqrtP,self.xcnt) 										# (npoints,ndims)
		self.Pxcnt      =  np.einsum('ip,kp->ki',self.P,self.xcnt)											# (npoints,ndims)
		self.dsqrtPxcnt_dangle = np.einsum('ijp,kp->kij',self.dsqrtP_dangle,self.xcnt)						# (npoints,nangles,ndims)
		self.sqrtPxcnt_dsqrtPxcnt_dangle_ = np.einsum('kp,kip->ki',self.sqrtPxcnt_,self.dsqrtPxcnt_dangle)	# (npoints,nangles)

		self.dg_dintst = self.g[:,np.newaxis] * (2/intst)							# (npoints,1)
		self.dg_dcnt   = self.g[:,np.newaxis] * self.Pxcnt							# (npoints,ndims)
		self.dg_dangle = self.g[:,np.newaxis] * self.sqrtPxcnt_dsqrtPxcnt_dangle_	# (npoints,ndims)
		self.dg_dsqrtD = self.g[:,np.newaxis] * (self.sqrtPxcnt_ * self.Rxcnt)		# (npoints,ndims)
		# dg_dskew  = np.zeros_like(dg_dcnt)							# (npoints,ndims)

		self.dg = np.concatenate((self.dg_dintst,self.dg_dcnt,self.dg_dangle,self.dg_dsqrtD), axis=-1)

		if dloss_dfit is not None:
			dg = np.einsum('k,ki->i',dloss_dfit,self.dg)

		if _profile:
			self.grad_time = time.time()-start
			print(f'grad: {self.grad_time} sec, {self.grad_time/self.func_time}')

		return dg

	def hessian(self, params, x, dloss_dfit=None, d2loss_dfit2=None, *args, **kwargs):
		if len(params)!=self.nparams:
			raise ValueError(f"`params` array is of wrong size, must have `len(params) = 1 + ncnt + ncov + nskew = {self.nparams}`, got {len(params)}")

		start = time.time()

		# convert to numpy array
		params = np.array(params).ravel()

		# parameters of the model
		intst = params[0]
		cnt   = params[1:1+self.ncnt]
		sqrtP = params[1+self.ncnt:1+self.ncnt+self.ncov]

		dRxcnt_dangle       = np.einsum('ijp,kp->kij',self.dR,self.xcnt)			# (npoints,nangle,ndims)
		d2sqrtP_dangle2     = np.einsum('m,ijmn->ijmn',self.sqrtD,self.d2R)			# (nangle,nangle,ndims,ndims)
		d2sqrtPxcnt_dangle2 = np.einsum('ijmn,kn->kijm',d2sqrtP_dangle2,self.xcnt)	# (npoints,nangle,nangle,ndims)
		# print(f'{time.time()-start}  cache'); start1 = time.time()

		d2g_dintst2       = self.dg_dintst[:,np.newaxis,:] / intst		# (npoints,1,1)
		d2g_dintst_dcnt   = self.dg_dcnt[:,np.newaxis,:]   * (2/intst)	# (npoints,1,ndims)
		d2g_dintst_dangle = self.dg_dangle[:,np.newaxis,:] * (2/intst)	# (npoints,1,ndims)
		d2g_dintst_dsqrtD = self.dg_dsqrtD[:,np.newaxis,:] * (2/intst)	# (npoints,1,ndims)
		# print(f'{time.time()-start1}  d2g_dintst'); start1 = time.time()

		d2g_dcnt2  = np.einsum('ki,kj->kij',self.dg_dcnt,self.Pxcnt)
		d2g_dcnt2 -= np.einsum('k,ij->kij',self.g,self.P)
		# print(f'{time.time()-start1}  d2g_dcnt2'); start1 = time.time()
		#
		d2g_dcnt_dangle  = np.einsum('ip,kjp->kij',self.sqrtP.T,self.dsqrtPxcnt_dangle)
		d2g_dcnt_dangle -= np.einsum('kp,jpi->kij',self.sqrtPxcnt_,self.dsqrtP_dangle)
		d2g_dcnt_dangle *= self.g.reshape((-1,1,1))
		d2g_dcnt_dangle += np.einsum('ki,kj->kij',self.dg_dcnt,np.einsum('kp,kjp->kj',self.sqrtPxcnt_,self.dsqrtPxcnt_dangle))
		# print(f'{time.time()-start1}  d2g_dcnt_dangle'); start1 = time.time()
		#
		d2g_dcnt_dsqrtD = (np.einsum('ki,kj->kij',self.dg_dcnt,self.Rxcnt) - np.einsum('k,ji->kij',self.g,2*self.R)) * self.sqrtPxcnt_[:,np.newaxis,:]
		# print(f'{time.time()-start1}  d2g_dcnt_dsqrtD'); start1 = time.time()

		d2g_dangle2  = np.einsum('kp,kijp->kij',self.sqrtPxcnt_,d2sqrtPxcnt_dangle2)
		d2g_dangle2 -= np.einsum('kip,kjp->kij',self.dsqrtPxcnt_dangle,self.dsqrtPxcnt_dangle)
		d2g_dangle2 *= self.g.reshape((-1,1,1))
		d2g_dangle2 += np.einsum('ki,kj->kij',self.dg_dangle,self.sqrtPxcnt_dsqrtPxcnt_dangle_)
		# print(f'{time.time()-start1}  d2g_dangle2'); start1 = time.time()
		#
		d2g_dangle_dsqrtD = ( self.dg_dangle[...,np.newaxis]*self.Rxcnt[:,np.newaxis,:] + (2*self.g.reshape((-1,1,1)))*dRxcnt_dangle ) * self.sqrtPxcnt_[:,np.newaxis,:]
		# print(f'{time.time()-start1}  d2g_dangle_dsqrtD'); start1 = time.time()

		axi,axj = np.diag_indices(self.ndims)
		d2g_dsqrtD2 = self.dg_dsqrtD[...,np.newaxis] * (-self.sqrtD.reshape((1,1,-1)))
		d2g_dsqrtD2[:,axi,axj] -= self.g[...,np.newaxis]
		d2g_dsqrtD2 *= (self.Rxcnt**2)[:,np.newaxis,:]
		# print(f'{time.time()-start1}  d2g_dsqrtD2'); start1 = time.time()


		dloss_dfit   = dloss_dfit.reshape((-1,1,1))
		d2loss_dfit2 = d2loss_dfit2.reshape((-1,1))

		d2g_dintst2       = (dloss_dfit*d2g_dintst2).sum(axis=0)
		d2g_dintst_dcnt   = (dloss_dfit*d2g_dintst_dcnt).sum(axis=0)
		d2g_dintst_dangle = (dloss_dfit*d2g_dintst_dangle).sum(axis=0)
		d2g_dintst_dsqrtD = (dloss_dfit*d2g_dintst_dsqrtD).sum(axis=0)

		d2g_dcnt2       = (dloss_dfit*d2g_dcnt2).sum(axis=0)
		d2g_dcnt_dangle = (dloss_dfit*d2g_dcnt_dangle).sum(axis=0)
		d2g_dcnt_dsqrtD = (dloss_dfit*d2g_dcnt_dsqrtD).sum(axis=0)

		d2g_dangle2       = (dloss_dfit*d2g_dangle2).sum(axis=0)
		d2g_dangle_dsqrtD = (dloss_dfit*d2g_dangle_dsqrtD).sum(axis=0)

		d2g_dsqrtD2 = (dloss_dfit*d2g_dsqrtD2).sum(axis=0)

		d2g = np.block([
			[d2g_dintst2,         d2g_dintst_dcnt,   d2g_dintst_dangle,   d2g_dintst_dsqrtD],
			[d2g_dintst_dcnt.T,   d2g_dcnt2,         d2g_dcnt_dangle,     d2g_dcnt_dsqrtD  ],
			[d2g_dintst_dangle.T, d2g_dcnt_dangle.T, d2g_dangle2,         d2g_dangle_dsqrtD],
			[d2g_dintst_dsqrtD.T, d2g_dcnt_dsqrtD.T, d2g_dangle_dsqrtD.T, d2g_dsqrtD2      ],
			])
		d2g += ((d2loss_dfit2*self.dg)[...,np.newaxis]*self.dg[:,np.newaxis,:]).sum(axis=0)


		# print(f'{time.time()-start1}  block'); start1 = time.time()
		if _profile:
			hess_time = time.time()-start
			print(f'hess: {hess_time} sec, {hess_time/self.func_time}')

		return d2g



def gaussian_mixture(params, x, npeaks=1, covariance_parameterization='givens', return_gradient=False, return_hessian=False):
	"""Compute nD Gaussian mixture at points in `x`.
	Parameters are interpreted as follows:
		- first `npeaks` values are the max peak intensities of each peak
		- next `npeaks*ndims` values are the coordinates of each of the peak centers
		- remaining values are the parameters of the precision matrix for each of the peaks
@@ -444,19 +647,21 @@ def gaussian_mixture(params, x, npeaks=1, covariance_parameterization='givens',
	-------
	  (npoints,) ndarray, values of the Gaussian at `x`
	"""
	# number of points
	npoints = x.shape[0]
	# number of dimensions
	ndims = x.shape[1]

	# number of parameters
	ncnt  = ndims
	nskew = ndims
	nskew = 0#ndims
	if covariance_parameterization=='full':
		ncov = ndims**2
	else:
		ncov = (ndims*(ndims+1))//2
	nparams = 1 + ndims + ncov + ndims
	nparams = 1 + ncnt + ncov + nskew
	if len(params)!=npeaks*nparams:
		raise ValueError(f"`params` array is of wrong size, must have `len(params) = npeaks * (1 + ndims + ndims*(ndims+1)/2 + ndims) = {npeaks*nparams}`, got {len(params)}")
		raise ValueError(f"`params` array is of wrong size, must have `len(params) = npeaks * (1 + ncnt + ncov + nskew) = {npeaks*nparams}`, got {len(params)}")

	# convert to numpy array
	params = np.array(params).reshape((npeaks,nparams))
@@ -469,6 +674,7 @@ def gaussian_mixture(params, x, npeaks=1, covariance_parameterization='givens',
		sqrtP = params[i][1+ncnt:1+ncnt+ncov]
		skew  = params[i][1+ncnt+ncov:1+ncnt+ncov+nskew].reshape((1,-1))

		start = time.time()
		# square root of the precision matrix
		if covariance_parameterization=='full':
			sqrtP_i = sqrtP.reshape((ndims,ndims))
@@ -490,7 +696,7 @@ def gaussian_mixture(params, x, npeaks=1, covariance_parameterization='givens',
			sqrtP_i = sqrtD_i[:,np.newaxis] * R

		# base Gaussian
		gi = intst**2 * np.exp(-0.5*squared_mahalanobis_distance(cnt, sqrtP_i, x)).reshape((-1,1))
		gi = intst**2 * np.exp(-0.5*squared_mahalanobis_distance(cnt, sqrtP_i, x)) #.reshape((-1,1))

		# # modulated Gaussian
		# # gi = gi * (1+erf(skew@(x-cnt.reshape((ndims,1))))/np.sqrt(2)).ravel()
@@ -498,47 +704,77 @@ def gaussian_mixture(params, x, npeaks=1, covariance_parameterization='givens',
		# total
		g = g + gi

		if _profile:
			func_time = time.time()-start
			print(f'func: {func_time} sec')

		start = time.time()
		if return_gradient or return_hessian:
			# define useful quantities
			P_i = sqrtP_i.T @ sqrtP_i						# (ndims,ndims)
			dsqrtP_dangle   = sqrtD_i[:,np.newaxis] * dR		# (ndims,ndims)
			d2sqrtP_dangle2 = sqrtD_i.reshape(1,1,-1,1) * d2R	# (ndims,ndims,ndims,ndims)
			dsqrtP_dangle = sqrtD_i.reshape((1,-1,1)) * dR	# (nangles,ndims,ndims)

			# start1 = time.time()
			# # cache quantities for reuse
			# xcnt      = x.reshape(-1,ndims,1) - cnt.reshape((1,ndims,1))									# (npoints,ndims), after squeezing last dimension
			# Rxcnt     = R[np.newaxis,...]       @ xcnt														# (npoints,ndims)
			# sqrtPxcnt = sqrtP_i[np.newaxis,...] @ xcnt														# (npoints,ndims)
			# Pxcnt     = P_i[np.newaxis,...]     @ xcnt														# (npoints,ndims)
			# dsqrtPxcnt_dangle = dsqrtP_dangle[np.newaxis,...] @ xcnt[:,np.newaxis,...]						# (npoints,ndims,ndims)
			# sqrtPxcnt_dsqrtPxcnt_dangle = (sqrtPxcnt[:,np.newaxis,...] * dsqrtPxcnt_dangle[...,np.newaxis]).sum(axis=-2)	# (npoints,ndims)
			# print(f'{time.time()-start1}')
			# # exit()

			# Rxcnt = Rxcnt.squeeze(-1)
			# Pxcnt = Pxcnt.squeeze(-1)
			# sqrtPxcnt = sqrtPxcnt.squeeze(-1)
			# dsqrtPxcnt_dangle = dsqrtPxcnt_dangle.squeeze(-1)
			# sqrtPxcnt_dsqrtPxcnt_dangle = sqrtPxcnt_dsqrtPxcnt_dangle.squeeze(-1)

			# cache quantities for reuse
			xcnt      = x.reshape(-1,ndims,1) - cnt.reshape((1,ndims,1))									# (npoints,ndims), after squeezing last dimension
			Rxcnt     = R[np.newaxis,...]       @ xcnt														# (npoints,ndims)
			sqrtPxcnt = sqrtP_i[np.newaxis,...] @ xcnt														# (npoints,ndims)
			Pxcnt     = P_i[np.newaxis,...]     @ xcnt														# (npoints,ndims)
			dRxcnt_dangle       = dR[np.newaxis,...] @ xcnt[:,np.newaxis,...]								# (npoints,ndims)
			dsqrtPxcnt_dangle   = dsqrtP_dangle[np.newaxis,...]   @ xcnt[:,np.newaxis,...]					# (npoints,ndims,ndims)
			d2sqrtPxcnt_dangle2 = d2sqrtP_dangle2[np.newaxis,...] @ xcnt[:,np.newaxis,np.newaxis,...]		# (npoints,ndims,ndims,ndims)
			sqrtPxcnt_dsqrtPxcnt_dangle = (sqrtPxcnt[:,np.newaxis,...] * dsqrtPxcnt_dangle).sum(axis=-2)	# (npoints,ndims)

			Rxcnt = Rxcnt.squeeze(-1)
			Pxcnt = Pxcnt.squeeze(-1)
			sqrtPxcnt = sqrtPxcnt.squeeze(-1)
			dRxcnt_dangle = dRxcnt_dangle.squeeze(-1)
			dsqrtPxcnt_dangle   = dsqrtPxcnt_dangle.squeeze(-1)
			d2sqrtPxcnt_dangle2 = d2sqrtPxcnt_dangle2.squeeze(-1)
			sqrtPxcnt_dsqrtPxcnt_dangle = sqrtPxcnt_dsqrtPxcnt_dangle.squeeze(-1)

			dg_dintst =  gi * (2/intst)						# (npoints,1)
			dg_dcnt   =  gi * Pxcnt							# (npoints,ndims)
			dg_dangle = -gi * sqrtPxcnt_dsqrtPxcnt_dangle	# (npoints,ndims)
			dg_dsqrtD = -gi * (sqrtPxcnt * Rxcnt)			# (npoints,ndims)
			dg_dskew  = np.zeros_like(dg_dcnt)				# (npoints,ndims)

			dg = np.concatenate((dg_dintst,dg_dcnt,dg_dangle,dg_dsqrtD,dg_dskew), axis=-1)
			xcnt       = x - cnt.reshape((1,ndims))												# (npoints,ndims)
			Rxcnt      =  np.einsum('ip,kp->ki',R,xcnt) 											# (npoints,ndims)
			sqrtPxcnt_ = -np.einsum('ip,kp->ki',sqrtP_i,xcnt) 									# (npoints,ndims)
			Pxcnt      =  np.einsum('ip,kp->ki',P_i,xcnt)										# (npoints,ndims)
			dsqrtPxcnt_dangle = np.einsum('ijp,kp->kij',dsqrtP_dangle,xcnt)						# (npoints,nangles,ndims)
			sqrtPxcnt_dsqrtPxcnt_dangle_ = np.einsum('kp,kip->ki',sqrtPxcnt_,dsqrtPxcnt_dangle)	# (npoints,nangles)

			dg_dintst = gi[:,np.newaxis] * (2/intst)					# (npoints,1)
			dg_dcnt   = gi[:,np.newaxis] * Pxcnt						# (npoints,ndims)
			dg_dangle = gi[:,np.newaxis] * sqrtPxcnt_dsqrtPxcnt_dangle_	# (npoints,ndims)
			dg_dsqrtD = gi[:,np.newaxis] * (sqrtPxcnt_ * Rxcnt)			# (npoints,ndims)
			# dg_dskew  = np.zeros_like(dg_dcnt)							# (npoints,ndims)

			dg = [dg_dintst,dg_dcnt,dg_dangle,dg_dsqrtD]
			# dg = np.concatenate((dg_dintst,dg_dcnt,dg_dangle,dg_dsqrtD), axis=-1)
			# dg = gi[:,np.newaxis] * np.concatenate((2/intst*np.ones((Pxcnt.shape[0],1)),Pxcnt,sqrtPxcnt_dsqrtPxcnt_dangle,sqrtPxcnt*Rxcnt), axis=-1)
			# dg = np.einsum('k,ki->ki',gi,np.concatenate((2/intst*np.ones((Pxcnt.shape[0],1)),Pxcnt,-sqrtPxcnt_dsqrtPxcnt_dangle,-sqrtPxcnt * Rxcnt), axis=-1))
		if _profile:
			grad_time = time.time()-start
			print(f'grad: {grad_time} sec, {grad_time/func_time}')
		# exit()

		start = time.time()
		if return_hessian:
			# batched transpose
			T = lambda a: a.transpose((0,2,1))

			dRxcnt_dangle       = np.einsum('ijp,kp->kij',dR,xcnt)					# (npoints,nangle,ndims)
			d2sqrtP_dangle2     = np.einsum('m,ijmn->ijmn',sqrtD_i,d2R)				# (nangle,nangle,ndims,ndims)
			d2sqrtPxcnt_dangle2 = np.einsum('ijmn,kn->kijm',d2sqrtP_dangle2,xcnt)	# (npoints,nangle,nangle,ndims)
			# d2sqrtP_dangle2     = sqrtD_i.reshape(1,1,-1,1) * d2R
			# d2sqrtPxcnt_dangle2 = np.einsum('m,ijmn,kn->kijm',sqrtD_i,d2R,xcnt)
			print(f'{time.time()-start}  cache'); start1 = time.time()
			# dRxcnt_dangle = dR[np.newaxis,...] @ xcnt[:,np.newaxis,...]								# (npoints,ndims)
			# dRxcnt_dangle = dRxcnt_dangle.squeeze(-1)
			# d2sqrtPxcnt_dangle2 = d2sqrtP_dangle2[np.newaxis,...] @ xcnt[:,np.newaxis,np.newaxis,...]		# (npoints,ndims,ndims,ndims)
			# d2sqrtPxcnt_dangle2 = d2sqrtPxcnt_dangle2.squeeze(-1)

			d2g_dintst2       = dg_dintst[:,np.newaxis,:] / intst		# (npoints,1,1)
			d2g_dintst_dcnt   = dg_dcnt[:,np.newaxis,:]   * (2/intst)	# (npoints,1,ndims)
			d2g_dintst_dangle = dg_dangle[:,np.newaxis,:] * (2/intst)	# (npoints,1,ndims)
			d2g_dintst_dsqrtD = dg_dsqrtD[:,np.newaxis,:] * (2/intst)	# (npoints,1,ndims)

			print(f'{time.time()-start1}  d2g_dintst'); start1 = time.time()

			# np.einsum('->',Pxcnt,np.einsum_path('kp,kjp->jk',sqrtPxcnt,dsqrtPxcnt_dangle))

@@ -553,17 +789,37 @@ def gaussian_mixture(params, x, npeaks=1, covariance_parameterization='givens',
			# 	- Pxcnt[...,np.newaxis] * (sqrtPxcnt[:,np.newaxis,:]*dsqrtPxcnt_dangle).sum(axis=-1)[:,np.newaxis,:] )
			# d2g_dcnt_dangle = gi.reshape((-1,1,1)) * ( np.einsum('ip,kjp->kij',sqrtP_i.T,dsqrtPxcnt_dangle) + np.einsum('kp,ijp->kij',sqrtPxcnt,dsqrtP_dangle.transpose((2,0,1))) - np.einsum('ki,kp,kjp->kij',Pxcnt,sqrtPxcnt,dsqrtPxcnt_dangle) )

			d2g_dcnt2 = np.einsum('ki,kj->kij',dg_dcnt,Pxcnt) - np.einsum('kn,ij->kij',gi,P_i)
			d2g_dcnt_dangle = (np.einsum('pi,kjp->kij',sqrtP_i,dsqrtPxcnt_dangle) + np.einsum('kp,jpi->kij',sqrtPxcnt,dsqrtP_dangle)) * gi[...,np.newaxis] - np.einsum('ki,kp,kjp->kij',dg_dcnt,sqrtPxcnt,dsqrtPxcnt_dangle)
			d2g_dcnt_dsqrtD = ( gi[...,np.newaxis]*(2*R.T) - dg_dcnt[...,np.newaxis]*Rxcnt[:,np.newaxis,:] ) * sqrtPxcnt[:,np.newaxis,:]

			d2g_dangle2 = -np.einsum('ki,kj->kij',dg_dangle,sqrtPxcnt_dsqrtPxcnt_dangle) - ( np.einsum('kip,kjp->kij',dsqrtPxcnt_dangle,dsqrtPxcnt_dangle) + np.einsum('kp,kijp->kij',sqrtPxcnt,d2sqrtPxcnt_dangle2) ) * gi[...,np.newaxis]
			d2g_dangle_dsqrtD = - ( dg_dangle[...,np.newaxis]*Rxcnt[:,np.newaxis,:] + (2*gi[...,np.newaxis])*dRxcnt_dangle ) * sqrtPxcnt[:,np.newaxis,:]
			d2g_dcnt2  = np.einsum('ki,kj->kij',dg_dcnt,Pxcnt)
			d2g_dcnt2 -= np.einsum('k,ij->kij',gi,P_i)
			print(f'{time.time()-start1}  d2g_dcnt2'); start1 = time.time()
			#
			d2g_dcnt_dangle  = np.einsum('ip,kjp->kij',sqrtP_i.T,dsqrtPxcnt_dangle)
			d2g_dcnt_dangle -= np.einsum('kp,jpi->kij',sqrtPxcnt_,dsqrtP_dangle)
			d2g_dcnt_dangle *= gi.reshape((-1,1,1))
			# d2g_dcnt_dangle  = (np.einsum('pi,kjp->kij',sqrtP_i,dsqrtPxcnt_dangle) + np.einsum('kp,jpi->kij',sqrtPxcnt,dsqrtP_dangle)) * gi.reshape((-1,1,1))
			# d2g_dcnt_dangle -= np.einsum('ki,kp,kjp->kij',dg_dcnt,sqrtPxcnt,dsqrtPxcnt_dangle)
			d2g_dcnt_dangle += np.einsum('ki,kj->kij',dg_dcnt,np.einsum('kp,kjp->kj',sqrtPxcnt_,dsqrtPxcnt_dangle))
			print(f'{time.time()-start1}  d2g_dcnt_dangle'); start1 = time.time()
			#
			# d2g_dcnt_dsqrtD = ( gi.reshape((-1,1,1))*(2*R.T) - dg_dcnt[...,np.newaxis]*Rxcnt[:,np.newaxis,:] ) * sqrtPxcnt[:,np.newaxis,:]
			d2g_dcnt_dsqrtD = (np.einsum('ki,kj->kij',dg_dcnt,Rxcnt) - np.einsum('k,ji->kij',gi,2*R)) * sqrtPxcnt_[:,np.newaxis,:]
			# d2g_dcnt_dsqrtD = np.einsum('kij,kj->kij',(np.einsum('k,ji->kij',gi,2*R) - np.einsum('ki,kj->kij',dg_dcnt,Rxcnt)), sqrtPxcnt)
			print(f'{time.time()-start1}  d2g_dcnt_dsqrtD'); start1 = time.time()

			d2g_dangle2  = np.einsum('kp,kijp->kij',sqrtPxcnt_,d2sqrtPxcnt_dangle2)
			d2g_dangle2 -= np.einsum('kip,kjp->kij',dsqrtPxcnt_dangle,dsqrtPxcnt_dangle)
			d2g_dangle2 *= gi.reshape((-1,1,1))
			d2g_dangle2 += np.einsum('ki,kj->kij',dg_dangle,sqrtPxcnt_dsqrtPxcnt_dangle_)
			print(f'{time.time()-start1}  d2g_dangle2'); start1 = time.time()
			#
			d2g_dangle_dsqrtD = ( dg_dangle[...,np.newaxis]*Rxcnt[:,np.newaxis,:] + (2*gi.reshape((-1,1,1)))*dRxcnt_dangle ) * sqrtPxcnt_[:,np.newaxis,:]
			print(f'{time.time()-start1}  d2g_dangle_dsqrtD'); start1 = time.time()

			axi,axj = np.diag_indices(ndims)
			d2g_dsqrtD2 = dg_dsqrtD[...,np.newaxis] * (-sqrtD_i.reshape((1,1,-1)))
			d2g_dsqrtD2[:,axi,axj] -= gi
			d2g_dsqrtD2[:,axi,axj] -= gi[...,np.newaxis]
			d2g_dsqrtD2 = d2g_dsqrtD2 * (Rxcnt**2)[:,np.newaxis,:]
			print(f'{time.time()-start1}  d2g_dsqrtD2'); start1 = time.time()

			d2g = np.block([
				[d2g_dintst2,          d2g_dintst_dcnt,    d2g_dintst_dangle,    d2g_dintst_dsqrtD],
@@ -571,15 +827,40 @@ def gaussian_mixture(params, x, npeaks=1, covariance_parameterization='givens',
				[T(d2g_dintst_dangle), T(d2g_dcnt_dangle), d2g_dangle2,          d2g_dangle_dsqrtD],
				[T(d2g_dintst_dsqrtD), T(d2g_dcnt_dsqrtD), T(d2g_dangle_dsqrtD), d2g_dsqrtD2      ],
				])
			d2g = np.concatenate((d2g,np.zeros((d2g.shape[0],d2g.shape[1],ndims))), axis=2)
			d2g = np.concatenate((d2g,np.zeros((d2g.shape[0],ndims,d2g.shape[2]))), axis=1)
			# d2g = np.zeros((npoints,params.size,params.size))
			# #
			# d2g[:,:1,:1] = d2g_dintst2
			# d2g[:,:1,1:ndims+1] = d2g_dintst_dcnt
			# d2g[:,:1,ndims+1:ndims+nangles+1] = d2g_dintst_dangle
			# d2g[:,:1,ndims+nangles+1:] = d2g_dintst_dsqrtD
			# #
			# d2g[:,1:ndims+1,1:ndims+1] = d2g_dcnt2
			# d2g[:,1:ndims+1,ndims+1:ndims+nangles+1] = d2g_dcnt_dangle
			# d2g[:,1:ndims+1,ndims+nangles+1:] = d2g_dcnt_dsqrtD
			# #
			# d2g[:,ndims+1:ndims+nangles+1,ndims+1:ndims+nangles+1] = d2g_dangle2
			# d2g[:,ndims+1:ndims+nangles+1,ndims+1:ndims+nangles+1:] = d2g_dangle_dsqrtD
			# #
			# d2g[:,ndims+1:ndims+nangles+1:,ndims+1:ndims+nangles+1:] = d2g_dsqrtD2


			# def hessp(xx, p, *args):
			# 	Hp = np.zeros()

			print(f'{time.time()-start1}  block'); start1 = time.time()
			# d2g = np.concatenate((d2g,np.zeros((d2g.shape[0],d2g.shape[1],ndims))), axis=2)
			# d2g = np.concatenate((d2g,np.zeros((d2g.shape[0],ndims,d2g.shape[2]))), axis=1)
		if _profile:
			hess_time = time.time()-start
			print(f'hess: {hess_time} sec, {hess_time/func_time}')
		# exit()

	if return_hessian:
		return g, dg, d2g
		return g.ravel(), dg, d2g
	if return_gradient:
		return g, dg
		return g.ravel(), dg
	else:
		return g
		return g.ravel()


from scipy.optimize.optimize import MemoizeJac