# Copyright (c) 2023, The University of Texas at Austin
# & Georgia Institute of Technology
#
# All Rights reserved.
# See file COPYRIGHT for details.
#
# This file is part of the SOUPy package. For more information see
# https://github.com/hippylib/soupy/
#
# SOUPy is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License (as published by the Free
# Software Foundation) version 3.0 dated June 2007.
import math
from hippylib import ParameterList
from ..modeling.controlCostHessian import ControlCostHessian
from ..modeling.variables import CONTROL
from .cgSolverSteihaug import CGSolverSteihaug
[docs]
def LS_ParameterList():
"""
Generate a ParameterList for line search globalization. \
type :code:`LS_ParameterList().showMe()` for default values and their descriptions
"""
parameters = {}
parameters["c_armijo"] = [1e-4, "Armijo constant for sufficient reduction"]
parameters["max_backtracking_iter"] = [10, "Maximum number of backtracking iterations"]
return ParameterList(parameters)
[docs]
def TR_ParameterList():
"""
Generate a ParameterList for Trust Region globalization. \
type :code:`RT_ParameterList().showMe()` for default values and their descriptions
"""
parameters = {}
parameters["eta"] = [0.05, "Reject step if (actual reduction)/(predicted reduction) < eta"]
return ParameterList(parameters)
[docs]
def InexactNewtonCG_ParameterList():
"""
Generate a ParameterList for InexactNewtonCG. \
type :code:`InexactNewtonCG_ParameterList().showMe()` for default values and their descriptions
"""
parameters = {}
parameters["rel_tolerance"] = [1e-6, "we converge when sqrt(g,g)/sqrt(g_0,g_0) <= rel_tolerance"]
parameters["abs_tolerance"] = [1e-12, "we converge when sqrt(g,g) <= abs_tolerance"]
parameters["gdz_tolerance"] = [1e-18, "we converge when (g,dz) <= gdz_tolerance"]
parameters["max_iter"] = [20, "maximum number of iterations"]
parameters["globalization"] = ["LS", "Globalization technique: line search (LS) or trust region (TR)"]
parameters["print_level"] = [0, "Control verbosity of printing screen"]
parameters["GN_iter"] = [5, "Number of Gauss Newton iterations before switching to Newton"]
parameters["cg_coarse_tolerance"] = [.5, "Coarsest tolerance for the CG method (Eisenstat-Walker)"]
parameters["cg_max_iter"] = [100, "Maximum CG iterations"]
parameters["LS"] = [LS_ParameterList(), "Sublist containing LS globalization parameters"]
parameters["TR"] = [TR_ParameterList(), "Sublist containing TR globalization parameters"]
return ParameterList(parameters)
[docs]
class InexactNewtonCG:
"""
Inexact Newton-CG method to solve constrained optimization problems in the reduced parameter space.
The Newton system is solved inexactly by early termination of CG iterations via Eisenstat-Walker
(to prevent oversolving) and Steihaug (to avoid negative curvature) criteria.
Globalization is performed using one of the following methods:
- line search (LS) based on the armijo sufficient reduction condition; or
- trust region (TR) based on the prior preconditioned norm of the update direction.
The stopping criterion is based on a control on the norm of the gradient and a control of the
inner product between the gradient and the Newton direction.
The user must provide a model that describes the forward problem, cost functionals, and all the
derivatives for the gradient and the Hessian.
More specifically the model object should implement following methods:
- :code:`cost(z)` -> evaluate the cost functional
- :code:`grad(g)` -> evaluate the gradient and store to :code:`g`
- :code:`hessian(zhat, Hzhat)` -> apply the cost Hessian
Type :code:`help(Model)` for additional information
"""
termination_reasons = [
"Maximum number of Iteration reached", #0
"Norm of the gradient less than tolerance", #1
"Maximum number of backtracking reached", #2
"Norm of (g, dz) less than tolerance" #3
]
def __init__(self, cost_functional, parameters=InexactNewtonCG_ParameterList(),
preconditioner=None,
norm_weighting=None,
callback = None):
"""
Constructor for the SGD solver
:param cost_functional: The cost functional object
:type cost_functional: :py:class:`soupy.ControlCostFunctional` or similar
:param parameters: The parameters of the solver.
Type :code:`InexactNewtonCG_ParameterList().showMe()` for list of default parameters
and their descriptions.
:type parameters: :py:class:`hippylib.ParameterList`.
:param preconditioner: Optional preconditioner for the Hessian inverse with method :code:`mult`
and :code:`solve` Has optional method :code:`setLinearizationPoint`
:param norm_weighting: Weighting matrix for the norm with method :code:`mult`
"""
self.cost_functional = cost_functional
self.parameters = parameters
self.preconditioner = preconditioner
self.norm_weighting = norm_weighting
self.it = 0
self.converged = False
self.total_cg_iter = 0
self.ncalls = 0
self.reason = 0
self.final_grad_norm = 0
self.callback = callback
[docs]
def solve(self, z):
"""
Solve the constrained optimization problem with initial guess :code:`z`
:param z: The initial guess
:type z: :py:class:`dolfin.Vector`
:return: The optimization solution :code:`z` and a dictionary of information
.. note:: The input :code:`z` will be overwritten
"""
if self.cost_functional is None:
raise TypeError("Cost functional cannot be of type None")
if self.parameters["globalization"] == "LS":
return self._solve_ls(z)
elif self.parameters["globalization"] == "TR":
return self._solve_tr(z)
else:
raise ValueError(self.parameters["globalization"])
def _solve_ls(self, z):
"""
Solve the constrained optimization problem with initial guess :code:`x`.
"""
rel_tol = self.parameters["rel_tolerance"]
abs_tol = self.parameters["abs_tolerance"]
max_iter = self.parameters["max_iter"]
print_level = self.parameters["print_level"]
GN_iter = self.parameters["GN_iter"]
cg_coarse_tolerance = self.parameters["cg_coarse_tolerance"]
cg_max_iter = self.parameters["cg_max_iter"]
c_armijo = self.parameters["LS"]["c_armijo"]
max_backtracking_iter = self.parameters["LS"]["max_backtracking_iter"]
self.it = 0
self.converged = False
self.ncalls += 1
zhat = self.cost_functional.generate_vector(CONTROL)
zstar = self.cost_functional.generate_vector(CONTROL)
mg = self.cost_functional.generate_vector(CONTROL)
cost_old = self.cost_functional.cost(z, order=2)
while (self.it < max_iter) and (self.converged == False):
# Compute the gradient
gradnorm = self.cost_functional.grad(mg)
if self.it == 0:
gradnorm_ini = gradnorm
tol = max(abs_tol, gradnorm_ini*rel_tol)
# check if solution is reached
if (gradnorm < tol) and (self.it > 0):
self.converged = True
self.reason = 1
break
self.it += 1
tolcg = min(cg_coarse_tolerance, math.sqrt(gradnorm/gradnorm_ini))
cost_hessian = ControlCostHessian(self.cost_functional)
solver = CGSolverSteihaug()
solver.set_operator(cost_hessian)
if self.preconditioner is not None:
if hasattr(self.preconditioner, 'setLinearizationPoint'):
self.preconditioner.setLinearizationPoint(z)
solver.set_preconditioner(self.preconditioner)
solver.parameters["rel_tolerance"] = tolcg
solver.parameters["max_iter"] = cg_max_iter
solver.parameters["zero_initial_guess"] = True
solver.parameters["print_level"] = print_level-1
# Solve by CG
solver.solve(zhat, -mg)
self.total_cg_iter += cost_hessian.ncalls
alpha = 1.0
descent = 0
n_backtrack = 0
mg_zhat = mg.inner(zhat)
while descent == 0 and n_backtrack < max_backtracking_iter:
# Update the step with line search step
zstar.zero()
zstar.axpy(1.0, z)
zstar.axpy(alpha, zhat)
cost_new = self.cost_functional.cost(zstar, order=0)
# Check if armijo conditions are satisfied
if (cost_new < cost_old + alpha * c_armijo * mg_zhat) or (-mg_zhat <= self.parameters["gdz_tolerance"]):
# conditions are satisfied
cost_old = cost_new
descent = 1
z.zero()
z.axpy(1.0, zstar)
# Evaluate cost functional and prepare Hessian
self.cost_functional.cost(z, order=2)
else:
# backtrack
n_backtrack += 1
alpha *= 0.5
if(print_level >= 0) and (self.it == 1):
print( "\n{0:3} {1:3} {2:15} {3:15} {4:15} {5:15}".format(
"It", "cg_it", "cost", "(g,dz)", "||g||L2", "alpha", "tolcg") )
if print_level >= 0:
print( "{0:3d} {1:3d} {2:15e} {3:15e} {4:15e} {5:15e} {6:15e}".format(
self.it, cost_hessian.ncalls, cost_new, mg_zhat, gradnorm, alpha, tolcg) )
if self.callback:
self.callback(self.it, z)
if n_backtrack == max_backtracking_iter:
self.converged = False
self.reason = 2
break
if -mg_zhat <= self.parameters["gdz_tolerance"]:
self.converged = True
self.reason = 3
break
self.final_grad_norm = gradnorm
self.final_cost = cost_new
result = dict()
result["termination_reason"] = InexactNewtonCG.termination_reasons[self.reason]
result["final_cost"] = cost_new
result["final_grad_norm"] = gradnorm
return z, result
def _solve_tr(self,z):
rel_tol = self.parameters["rel_tolerance"]
abs_tol = self.parameters["abs_tolerance"]
max_iter = self.parameters["max_iter"]
print_level = self.parameters["print_level"]
GN_iter = self.parameters["GN_iter"]
cg_coarse_tolerance = self.parameters["cg_coarse_tolerance"]
cg_max_iter = self.parameters["cg_max_iter"]
eta_TR = self.parameters["TR"]["eta"]
delta_TR = None
self.it = 0
self.converged = False
self.ncalls += 1
zhat = self.cost_functional.generate_vector(CONTROL)
R_zhat = self.cost_functional.generate_vector(CONTROL)
zstar = self.cost_functional.generate_vector(CONTROL)
mg = self.cost_functional.generate_vector(CONTROL)
cost_old = self.cost_functional.cost(z, order=2)
while (self.it < max_iter) and (self.converged == False):
gradnorm = self.cost_functional.grad(mg)
if self.it == 0:
gradnorm_ini = gradnorm
tol = max(abs_tol, gradnorm_ini*rel_tol)
# check if solution is reached
if (gradnorm < tol) and (self.it > 0):
self.converged = True
self.reason = 1
break
self.it += 1
tolcg = min(cg_coarse_tolerance, math.sqrt(gradnorm/gradnorm_ini))
cost_hessian = ControlCostHessian(self.cost_functional)
solver = CGSolverSteihaug()
solver.set_operator(cost_hessian)
if self.preconditioner is not None:
if hasattr(self.preconditioner, 'setLinearizationPoint'):
self.preconditioner.setLinearizationPoint(z)
solver.set_preconditioner(self.preconditioner)
if self.it > 1:
solver.set_TR(delta_TR, self.norm_weighting)
solver.parameters["rel_tolerance"] = tolcg
self.parameters["max_iter"] = cg_max_iter
solver.parameters["zero_initial_guess"] = True
solver.parameters["print_level"] = print_level-1
solver.solve(zhat, -mg)
self.total_cg_iter += cost_hessian.ncalls
if self.it == 1:
if self.norm_weighting is not None:
self.norm_weighting.mult(zhat,R_zhat)
zhat_Rnorm = R_zhat.inner(zhat)
delta_TR = max(math.sqrt(zhat_Rnorm),1)
else:
# Use the l2 norm
zhat_Rnorm = zhat.inner(zhat)
delta_TR = max(math.sqrt(zhat_Rnorm),1)
zstar.zero()
zstar.axpy(1.0, z)
zstar.axpy(1.0, zhat)
cost_star = self.cost_functional.cost(zstar, order=0)
ACTUAL_RED = cost_old - cost_star
#Calculate Predicted Reduction
Hzhat = self.cost_functional.generate_vector(CONTROL)
Hzhat.zero()
cost_hessian.mult(zhat, Hzhat)
mg_zhat = mg.inner(zhat)
PRED_RED = -0.5*zhat.inner(Hzhat) - mg_zhat
# print( "PREDICTED REDUCTION", PRED_RED, "ACTUAL REDUCTION", ACTUAL_RED)
rho_TR = ACTUAL_RED/PRED_RED
# Nocedal and Wright Trust Region conditions (page 69)
if rho_TR < 0.25:
delta_TR *= 0.5
elif rho_TR > 0.75 and solver.reasonid == 3:
delta_TR *= 2.0
# print( "rho_TR", rho_TR, "eta_TR", eta_TR, "rho_TR > eta_TR?", rho_TR > eta_TR , "\n")
if rho_TR > eta_TR:
# accept the step and update
z.zero()
z.axpy(1.0, zstar)
cost_old = cost_star
accept_step = True
# Run cost and prepare for Hessian
self.cost_functional.cost(z, order=2)
else:
accept_step = False
if self.callback:
self.callback(self.it, z)
if(print_level >= 0) and (self.it == 1):
print( "\n{0:3} {1:3} {2:15} {3:15} {4:14} {5:14} {6:14} {7:11} {8:14}".format(
"It", "cg_it", "cost", "(g,dz)", "||g||L2", "TR Radius", "rho_TR", "Accept Step","tolcg") )
if print_level >= 0:
print( "{0:3d} {1:3d} {2:15e} {3:15e} {4:14e} {5:14e} {6:14e} {7:11} {8:14e}".format(
self.it, cost_hessian.ncalls, cost_old, mg_zhat, gradnorm, delta_TR, rho_TR, accept_step,tolcg) )
#TR radius can make this term arbitrarily small and prematurely exit.
if -mg_zhat <= self.parameters["gdz_tolerance"]:
self.converged = True
self.reason = 3
break
self.final_grad_norm = gradnorm
self.final_cost = cost_old
result = dict()
result["termination_reason"] = InexactNewtonCG.termination_reasons[self.reason]
result["final_cost"] = cost_old
result["final_grad_norm"] = gradnorm
return z, result