Commit 98eabad7 authored by Viktor Reshniak's avatar Viktor Reshniak
Browse files

update bfgs

parent a1ec648c
Loading
Loading
Loading
Loading
+47 −156
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@ from pathlib import Path

import numpy as np
from scipy.optimize import minimize as scipy_minimize
from scipy.optimize._optimize import _prepare_scalar_function, _LineSearchError, _epsilon, _line_search_wolfe12, _status_message, OptimizeResult
from scipy.linalg import svd
from numpy.linalg import eig
from scipy.special import loggamma, erf
@@ -20,143 +21,24 @@ from mantid.kernel import V3D
from mantid import config
config['Q.convention'] = "Crystallography"

# from ellipsoid import EllipsoidTool


from scipy.optimize._optimize import _check_unknown_options, _prepare_scalar_function, vecnorm, _LineSearchError, _epsilon, _line_search_wolfe12, Inf, _status_message, OptimizeResult
from numpy import asarray


np.set_printoptions(precision=2, linewidth=200, formatter={'float':lambda x: f'{x:9.2e}'})
_debug_dir = 'debug'
_debug = False
_debug = True
_profile = True
_verbose = False



def fmin_bfgs_2(f, x0, fprime=None, args=(), gtol=1e-5, norm=Inf,
			  epsilon=_epsilon, maxiter=None, full_output=0, disp=1,
			  retall=0, callback=None, init_hess=None):
def minimize_bfgs(fun, x0, args=(), jac=None, hess=None, callback=None,
				   gtol=1e-5, norm=np.inf, eps=1.e-6, maxiter=None, maxfun=None, hess_reeval=20,
				   disp=False, return_all=False, finite_diff_rel_step=None, H0=None):
	"""
	Minimize a function using the BFGS algorithm.
	Parameters
	----------
	f : callable ``f(x,*args)``
		Objective function to be minimized.
	x0 : ndarray
		Initial guess.
	fprime : callable ``f'(x,*args)``, optional
		Gradient of f.
	args : tuple, optional
		Extra arguments passed to f and fprime.
	gtol : float, optional
		Gradient norm must be less than `gtol` before successful termination.
	norm : float, optional
		Order of norm (Inf is max, -Inf is min)
	epsilon : int or ndarray, optional
		If `fprime` is approximated, use this value for the step size.
	callback : callable, optional
		An optional user-supplied function to call after each
		iteration. Called as ``callback(xk)``, where ``xk`` is the
		current parameter vector.
	maxiter : int, optional
		Maximum number of iterations to perform.
	full_output : bool, optional
		If True, return ``fopt``, ``func_calls``, ``grad_calls``, and
		``warnflag`` in addition to ``xopt``.
	disp : bool, optional
		Print convergence message if True.
	retall : bool, optional
		Return a list of results at each iteration if True.
	Returns
	-------
	xopt : ndarray
		Parameters which minimize f, i.e., ``f(xopt) == fopt``.
	fopt : float
		Minimum value.
	gopt : ndarray
		Value of gradient at minimum, f'(xopt), which should be near 0.
	Bopt : ndarray
		Value of 1/f''(xopt), i.e., the inverse Hessian matrix.
	func_calls : int
		Number of function_calls made.
	grad_calls : int
		Number of gradient calls made.
	warnflag : integer
		1 : Maximum number of iterations exceeded.
		2 : Gradient and/or function calls not changing.
		3 : NaN result encountered.
	allvecs : list
		The value of `xopt` at each iteration. Only returned if `retall` is
		True.
	Notes
	-----
	Optimize the function, `f`, whose gradient is given by `fprime`
	using the quasi-Newton method of Broyden, Fletcher, Goldfarb,
	and Shanno (BFGS).
	See Also
	--------
	minimize: Interface to minimization algorithms for multivariate
		functions. See ``method='BFGS'`` in particular.
	References
	----------
	Wright, and Nocedal 'Numerical Optimization', 1999, p. 198.
	Examples
	--------
	>>> from scipy.optimize import fmin_bfgs
	>>> def quadratic_cost(x, Q):
	...     return x @ Q @ x
	...
	>>> x0 = np.array([-3, -4])
	>>> cost_weight =  np.diag([1., 10.])
	>>> # Note that a trailing comma is necessary for a tuple with single element
	>>> fmin_bfgs(quadratic_cost, x0, args=(cost_weight,))
	Optimization terminated successfully.
			Current function value: 0.000000
			Iterations: 7                   # may vary
			Function evaluations: 24        # may vary
			Gradient evaluations: 8         # may vary
	array([ 2.85169950e-06, -4.61820139e-07])
	>>> def quadratic_cost_grad(x, Q):
	...     return 2 * Q @ x
	...
	>>> fmin_bfgs(quadratic_cost, x0, quadratic_cost_grad, args=(cost_weight,))
	Optimization terminated successfully.
			Current function value: 0.000000
			Iterations: 7
			Function evaluations: 8
			Gradient evaluations: 8
	array([ 2.85916637e-06, -4.54371951e-07])
	"""
	opts = {'gtol': gtol,
			'norm': norm,
			'eps': epsilon,
			'disp': disp,
			'maxiter': maxiter,
			'return_all': retall}

	res = _minimize_bfgs_2(f, x0, args, fprime, callback=callback, init_hess=init_hess, **opts)
	return res
	# if full_output:
	# 	retlist = (res['x'], res['fun'], res['jac'], res['hess_inv'],
	# 			   res['nfev'], res['njev'], res['status'])
	# 	if retall:
	# 		retlist += (res['allvecs'], )
	# 	return retlist
	# else:
	# 	if retall:
	# 		return res['x'], res['allvecs']
	# 	else:
	# 		return res['x']
	Source: https://github.com/scipy/scipy/blob/v1.9.3/scipy/optimize/_optimize.py

	Minimization of scalar function of one or more variables using the BFGS algorithm.

def _minimize_bfgs_2(fun, x0, args=(), jac=None, callback=None,
				   gtol=1e-5, norm=np.inf, eps=1.e-6, maxiter=None,
				   disp=False, return_all=False, finite_diff_rel_step=None, init_hess=None,
				   **unknown_options):
	"""
	Minimization of scalar function of one or more variables using the
	BFGS algorithm.
	Options
	-------
	disp : bool
@@ -164,16 +46,14 @@ def _minimize_bfgs_2(fun, x0, args=(), jac=None, callback=None,
	maxiter : int
		Maximum number of iterations to perform.
	gtol : float
		Gradient norm must be less than `gtol` before successful
		termination.
		Gradient norm must be less than `gtol` before successful termination.
	norm : float
		Order of norm (Inf is max, -Inf is min).
	eps : float or ndarray
		If `jac is None` the absolute step size used for numerical
		approximation of the jacobian via forward differences.
	return_all : bool, optional
		Set to True to return a list of the best solution at each of the
		iterations.
		Set to True to return a list of the best solution at each of the iterations.
	finite_diff_rel_step : None or array_like, optional
		If `jac in ['2-point', '3-point', 'cs']` the relative step size to
		use for numerical approximation of the jacobian. The absolute step
@@ -182,29 +62,30 @@ def _minimize_bfgs_2(fun, x0, args=(), jac=None, callback=None,
		the sign of `h` is ignored. If None (default) then step is selected
		automatically.
	"""
	_check_unknown_options(unknown_options)
	retall = return_all

	x0 = asarray(x0).flatten()
	x0 = np.asarray(x0).flatten()
	if x0.ndim == 0:
		x0.shape = (1,)
	if maxiter is None:
		maxiter = len(x0) * 200
	if maxfun is None:
		maxfun = len(x0) * 200

	sf = _prepare_scalar_function(fun, x0, jac, args=args, epsilon=eps,
								  finite_diff_rel_step=finite_diff_rel_step)
	sf = _prepare_scalar_function(fun, x0, jac, args=args, epsilon=eps, finite_diff_rel_step=finite_diff_rel_step)

	f = sf.fun
	myfprime = sf.grad
	f, df = sf.fun, sf.grad

	old_fval = f(x0)
	gfk = myfprime(x0)
	gfk = df(x0)

	k = 0
	N = len(x0)
	I = np.eye(N, dtype=int)
	if init_hess is not None:
		Hk = init_hess
	if H0 is not None:
		Hk = np.linalg.pinv(H0)
	elif hess is not None:
		Hk = np.linalg.pinv(hess(x0))
	else:
		Hk = I

@@ -215,15 +96,22 @@ def _minimize_bfgs_2(fun, x0, args=(), jac=None, callback=None,
	if retall:
		allvecs = [x0]
	warnflag = 0
	gnorm = vecnorm(gfk, ord=norm)
	while (gnorm > gtol) and (k < maxiter):
	gnorm = np.linalg.norm(gfk, ord=norm)
	hess_fvals = 0
	while (gnorm > gtol) and (k < maxiter) and (sf.nfev < maxfun):
		pk = -np.dot(Hk, gfk)
		try:
			alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
					 _line_search_wolfe12(f, myfprime, xk, pk, gfk,
										  old_fval, old_old_fval, amin=1e-100, amax=1e100)
			alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = _line_search_wolfe12(f, df, xk, pk, gfk, old_fval, old_old_fval, amin=1e-100, amax=1e100)
		except _LineSearchError:
			# Line search failed to find a better solution.
			# Line search failed to find a better solution
			# retry with exact hessian
			hess_fvals = 0
			Hk = np.linalg.pinv(hess(xk))
			pk = -np.dot(Hk, gfk)
			try:
				alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = _line_search_wolfe12(f, df, xk, pk, gfk, old_fval, old_old_fval, amin=1e-100, amax=1e100)
			except _LineSearchError:
				# break if failed again
				warnflag = 2
				break

@@ -233,20 +121,19 @@ def _minimize_bfgs_2(fun, x0, args=(), jac=None, callback=None,
		sk = xkp1 - xk
		xk = xkp1
		if gfkp1 is None:
			gfkp1 = myfprime(xkp1)
			gfkp1 = df(xkp1)

		yk  = gfkp1 - gfk
		gfk = gfkp1
		if callback is not None:
			callback(xk)
		k += 1
		gnorm = vecnorm(gfk, ord=norm)
		gnorm = np.linalg.norm(gfk, ord=norm)
		if (gnorm <= gtol):
			break

		if not np.isfinite(old_fval):
			# We correctly found +-Inf as optimal value, or something went
			# wrong.
			# We correctly found +-Inf as optimal value, or something went wrong.
			warnflag = 2
			break

@@ -259,10 +146,14 @@ def _minimize_bfgs_2(fun, x0, args=(), jac=None, callback=None,
		else:
			rhok = 1. / rhok_inv

		hess_fvals += sf.nfev
		if hess_fvals<hess_reeval:
			A1 = I - sk[:, np.newaxis] * yk[np.newaxis, :] * rhok
			A2 = I - yk[:, np.newaxis] * sk[np.newaxis, :] * rhok
		Hk = np.dot(A1, np.dot(Hk, A2)) + (rhok * sk[:, np.newaxis] *
												 sk[np.newaxis, :])
			Hk = np.dot(A1, np.dot(Hk, A2)) + (rhok * sk[:, np.newaxis] * sk[np.newaxis, :])
		else:
			hess_fvals = 0
			Hk = np.linalg.pinv(hess(xk))

	fval = old_fval