Source code for soupy.optimization.inexactNewtonCG

# Copyright (c) 2023, The University of Texas at Austin 
# & Georgia Institute of Technology
#
# All Rights reserved.
# See file COPYRIGHT for details.
#
# This file is part of the SOUPy package. For more information see
# https://github.com/hippylib/soupy/
#
# SOUPy is free software; you can redistribute it and/or modify it under the
# terms of the GNU General Public License (as published by the Free
# Software Foundation) version 3.0 dated June 2007.


import math

from hippylib import ParameterList 

from ..modeling.controlCostHessian import ControlCostHessian
from ..modeling.variables import CONTROL
from .cgSolverSteihaug import CGSolverSteihaug



[docs]
def LS_ParameterList():
    """
    Generate a ParameterList for line search globalization. \
        type :code:`LS_ParameterList().showMe()` for default values and their descriptions
    """
    parameters = {}
    parameters["c_armijo"]              = [1e-4, "Armijo constant for sufficient reduction"]
    parameters["max_backtracking_iter"] = [10, "Maximum number of backtracking iterations"]
    
    return ParameterList(parameters)



[docs]
def TR_ParameterList():
    """
    Generate a ParameterList for Trust Region globalization. \
        type :code:`RT_ParameterList().showMe()` for default values and their descriptions
    """
    parameters = {}
    parameters["eta"] = [0.05, "Reject step if (actual reduction)/(predicted reduction) < eta"]
    
    return ParameterList(parameters)



[docs]
def InexactNewtonCG_ParameterList():
    """
    Generate a ParameterList for InexactNewtonCG. \
        type :code:`InexactNewtonCG_ParameterList().showMe()` for default values and their descriptions
    """
    parameters = {}
    parameters["rel_tolerance"]         = [1e-6, "we converge when sqrt(g,g)/sqrt(g_0,g_0) <= rel_tolerance"]
    parameters["abs_tolerance"]         = [1e-12, "we converge when sqrt(g,g) <= abs_tolerance"]
    parameters["gdz_tolerance"]         = [1e-18, "we converge when (g,dz) <= gdz_tolerance"]
    parameters["max_iter"]              = [20, "maximum number of iterations"]
    parameters["globalization"]         = ["LS", "Globalization technique: line search (LS)  or trust region (TR)"]
    parameters["print_level"]           = [0, "Control verbosity of printing screen"]
    parameters["GN_iter"]               = [5, "Number of Gauss Newton iterations before switching to Newton"]
    parameters["cg_coarse_tolerance"]   = [.5, "Coarsest tolerance for the CG method (Eisenstat-Walker)"]
    parameters["cg_max_iter"]           = [100, "Maximum CG iterations"]
    parameters["LS"]                    = [LS_ParameterList(), "Sublist containing LS globalization parameters"]
    parameters["TR"]                    = [TR_ParameterList(), "Sublist containing TR globalization parameters"]
    
    return ParameterList(parameters)

  
    


[docs]
class InexactNewtonCG:
    
    """
    Inexact Newton-CG method to solve constrained optimization problems in the reduced parameter space.
    The Newton system is solved inexactly by early termination of CG iterations via Eisenstat-Walker
    (to prevent oversolving) and Steihaug (to avoid negative curvature) criteria.
    Globalization is performed using one of the following methods:

    - line search (LS) based on the armijo sufficient reduction condition; or
    - trust region (TR) based on the prior preconditioned norm of the update direction.

    The stopping criterion is based on a control on the norm of the gradient and a control of the
    inner product between the gradient and the Newton direction.
       
    The user must provide a model that describes the forward problem, cost functionals, and all the
    derivatives for the gradient and the Hessian.
    
    More specifically the model object should implement following methods:
        - :code:`cost(z)` -> evaluate the cost functional
        - :code:`grad(g)` -> evaluate the gradient and store to :code:`g`
        - :code:`hessian(zhat, Hzhat)` -> apply the cost Hessian
       
    Type :code:`help(Model)` for additional information
    """
    termination_reasons = [
                           "Maximum number of Iteration reached",      #0
                           "Norm of the gradient less than tolerance", #1
                           "Maximum number of backtracking reached",   #2
                           "Norm of (g, dz) less than tolerance"       #3
                           ]
    
    def __init__(self, cost_functional, parameters=InexactNewtonCG_ParameterList(), 
                preconditioner=None,
                norm_weighting=None,
                callback = None):
        """
        Constructor for the SGD solver

        :param cost_functional: The cost functional object
        :type cost_functional: :py:class:`soupy.ControlCostFunctional` or similar
        :param parameters: The parameters of the solver.
            Type :code:`InexactNewtonCG_ParameterList().showMe()` for list of default parameters
            and their descriptions.
        :type parameters: :py:class:`hippylib.ParameterList`.
        :param preconditioner: Optional preconditioner for the Hessian inverse with method :code:`mult`
            and :code:`solve` Has optional method :code:`setLinearizationPoint`
        :param norm_weighting: Weighting matrix for the norm with method :code:`mult` 
        """

        self.cost_functional = cost_functional
        self.parameters = parameters
        self.preconditioner = preconditioner
        self.norm_weighting = norm_weighting 

        self.it = 0
        self.converged = False
        self.total_cg_iter = 0
        self.ncalls = 0
        self.reason = 0
        self.final_grad_norm = 0
        self.callback = callback

        

[docs]
    def solve(self, z):
        """
        Solve the constrained optimization problem with initial guess :code:`z`

        :param z: The initial guess
        :type z: :py:class:`dolfin.Vector` 

        :return: The optimization solution :code:`z` and a dictionary of information

        .. note:: The input :code:`z` will be overwritten 
        """
        if self.cost_functional is None:
            raise TypeError("Cost functional cannot be of type None")
        if self.parameters["globalization"] == "LS":
            return self._solve_ls(z)
        elif self.parameters["globalization"] == "TR":
            return self._solve_tr(z)
        else:
            raise ValueError(self.parameters["globalization"])

        
    def _solve_ls(self, z):
        """
        Solve the constrained optimization problem with initial guess :code:`x`.
        """
        rel_tol = self.parameters["rel_tolerance"]
        abs_tol = self.parameters["abs_tolerance"]
        max_iter = self.parameters["max_iter"]
        print_level = self.parameters["print_level"]
        GN_iter = self.parameters["GN_iter"]
        cg_coarse_tolerance = self.parameters["cg_coarse_tolerance"]
        cg_max_iter         = self.parameters["cg_max_iter"]
        
        c_armijo = self.parameters["LS"]["c_armijo"]
        max_backtracking_iter = self.parameters["LS"]["max_backtracking_iter"]
        
        self.it = 0
        self.converged = False
        self.ncalls += 1
        
        zhat = self.cost_functional.generate_vector(CONTROL)    
        zstar = self.cost_functional.generate_vector(CONTROL)    
        mg = self.cost_functional.generate_vector(CONTROL)
                
        cost_old = self.cost_functional.cost(z, order=2)
        
        while (self.it < max_iter) and (self.converged == False):
            # Compute the gradient 
            gradnorm = self.cost_functional.grad(mg)
            
            if self.it == 0:
                gradnorm_ini = gradnorm
                tol = max(abs_tol, gradnorm_ini*rel_tol)
                
            # check if solution is reached
            if (gradnorm < tol) and (self.it > 0):
                self.converged = True
                self.reason = 1
                break
            
            self.it += 1
            
            tolcg = min(cg_coarse_tolerance, math.sqrt(gradnorm/gradnorm_ini))
            
            cost_hessian = ControlCostHessian(self.cost_functional)
            solver = CGSolverSteihaug()
            solver.set_operator(cost_hessian)

            if self.preconditioner is not None:
                if hasattr(self.preconditioner, 'setLinearizationPoint'):
                    self.preconditioner.setLinearizationPoint(z)
                solver.set_preconditioner(self.preconditioner)

            solver.parameters["rel_tolerance"] = tolcg
            solver.parameters["max_iter"] = cg_max_iter
            solver.parameters["zero_initial_guess"] = True
            solver.parameters["print_level"] = print_level-1

            # Solve by CG  
            solver.solve(zhat, -mg)
            self.total_cg_iter += cost_hessian.ncalls
            
            alpha = 1.0
            descent = 0
            n_backtrack = 0
            
            mg_zhat = mg.inner(zhat)
            
            while descent == 0 and n_backtrack < max_backtracking_iter:
                # Update the step with line search step 
                zstar.zero()
                zstar.axpy(1.0, z)
                zstar.axpy(alpha, zhat)
                cost_new = self.cost_functional.cost(zstar, order=0)
                  
                # Check if armijo conditions are satisfied
                if (cost_new < cost_old + alpha * c_armijo * mg_zhat) or (-mg_zhat <= self.parameters["gdz_tolerance"]):
                    # conditions are satisfied 
                    cost_old = cost_new
                    descent = 1
                    z.zero()
                    z.axpy(1.0, zstar)

                    # Evaluate cost functional and prepare Hessian 
                    self.cost_functional.cost(z, order=2)
                else:
                    # backtrack 
                    n_backtrack += 1
                    alpha *= 0.5
                            
            if(print_level >= 0) and (self.it == 1):
                print( "\n{0:3} {1:3} {2:15} {3:15} {4:15} {5:15}".format(
                      "It", "cg_it", "cost", "(g,dz)", "||g||L2", "alpha", "tolcg") )
                
            if print_level >= 0:
                print( "{0:3d} {1:3d} {2:15e} {3:15e} {4:15e} {5:15e} {6:15e}".format(
                        self.it, cost_hessian.ncalls, cost_new, mg_zhat, gradnorm, alpha, tolcg) )
                
            if self.callback:
                self.callback(self.it, z)
                
                
            if n_backtrack == max_backtracking_iter:
                self.converged = False
                self.reason = 2
                break
            
            if -mg_zhat <= self.parameters["gdz_tolerance"]:
                self.converged = True
                self.reason = 3
                break
                            
        self.final_grad_norm = gradnorm
        self.final_cost      = cost_new

        result = dict()
        result["termination_reason"] = InexactNewtonCG.termination_reasons[self.reason]
        result["final_cost"] = cost_new
        result["final_grad_norm"] = gradnorm

        return z, result 
    
    def _solve_tr(self,z):
        rel_tol = self.parameters["rel_tolerance"]
        abs_tol = self.parameters["abs_tolerance"]
        max_iter = self.parameters["max_iter"]
        print_level = self.parameters["print_level"]
        GN_iter = self.parameters["GN_iter"]
        cg_coarse_tolerance = self.parameters["cg_coarse_tolerance"]
        cg_max_iter         = self.parameters["cg_max_iter"]
        
        eta_TR = self.parameters["TR"]["eta"]
        delta_TR = None
        
        self.it = 0
        self.converged = False
        self.ncalls += 1
        
        zhat = self.cost_functional.generate_vector(CONTROL) 
        R_zhat = self.cost_functional.generate_vector(CONTROL)   
        zstar = self.cost_functional.generate_vector(CONTROL)
        mg = self.cost_functional.generate_vector(CONTROL)
        
        cost_old = self.cost_functional.cost(z, order=2)

        while (self.it < max_iter) and (self.converged == False):
            gradnorm = self.cost_functional.grad(mg)
            
            if self.it == 0:
                gradnorm_ini = gradnorm
                tol = max(abs_tol, gradnorm_ini*rel_tol)
                
            # check if solution is reached
            if (gradnorm < tol) and (self.it > 0):
                self.converged = True
                self.reason = 1
                break
            
            self.it += 1
            
            tolcg = min(cg_coarse_tolerance, math.sqrt(gradnorm/gradnorm_ini))
            
            cost_hessian = ControlCostHessian(self.cost_functional)
            solver = CGSolverSteihaug()
            solver.set_operator(cost_hessian)

            if self.preconditioner is not None:
                if hasattr(self.preconditioner, 'setLinearizationPoint'):
                    self.preconditioner.setLinearizationPoint(z)
                solver.set_preconditioner(self.preconditioner)

            if self.it > 1:
                solver.set_TR(delta_TR, self.norm_weighting)

            solver.parameters["rel_tolerance"] = tolcg
            self.parameters["max_iter"]        = cg_max_iter
            solver.parameters["zero_initial_guess"] = True
            solver.parameters["print_level"] = print_level-1
            
            solver.solve(zhat, -mg)
            self.total_cg_iter += cost_hessian.ncalls

            if self.it == 1:
                if self.norm_weighting is not None:
                    self.norm_weighting.mult(zhat,R_zhat)
                    zhat_Rnorm = R_zhat.inner(zhat)
                    delta_TR = max(math.sqrt(zhat_Rnorm),1)
                else:
                    # Use the l2 norm 
                    zhat_Rnorm = zhat.inner(zhat)
                    delta_TR = max(math.sqrt(zhat_Rnorm),1)

            zstar.zero()
            zstar.axpy(1.0, z)
            zstar.axpy(1.0, zhat)
            cost_star = self.cost_functional.cost(zstar, order=0)


            ACTUAL_RED = cost_old - cost_star
            #Calculate Predicted Reduction
            Hzhat = self.cost_functional.generate_vector(CONTROL)
            Hzhat.zero()
            cost_hessian.mult(zhat, Hzhat)
            mg_zhat = mg.inner(zhat)
            PRED_RED = -0.5*zhat.inner(Hzhat) - mg_zhat
            # print( "PREDICTED REDUCTION", PRED_RED, "ACTUAL REDUCTION", ACTUAL_RED)
            rho_TR = ACTUAL_RED/PRED_RED


            # Nocedal and Wright Trust Region conditions (page 69)
            if rho_TR < 0.25:
                delta_TR *= 0.5
            elif rho_TR > 0.75 and solver.reasonid == 3:
                delta_TR *= 2.0
            

            # print( "rho_TR", rho_TR, "eta_TR", eta_TR, "rho_TR > eta_TR?", rho_TR > eta_TR , "\n")
            if rho_TR > eta_TR:
                # accept the step and update 
                z.zero()
                z.axpy(1.0, zstar)
                cost_old = cost_star
                accept_step = True

                # Run cost and prepare for Hessian 
                self.cost_functional.cost(z, order=2)
            else:
                accept_step = False
                
            if self.callback:
                self.callback(self.it, z)
                
            if(print_level >= 0) and (self.it == 1):
                print( "\n{0:3} {1:3} {2:15} {3:15} {4:14} {5:14} {6:14} {7:11} {8:14}".format(
                      "It", "cg_it", "cost", "(g,dz)", "||g||L2", "TR Radius", "rho_TR", "Accept Step","tolcg") )
                
            if print_level >= 0:
                print( "{0:3d} {1:3d} {2:15e} {3:15e} {4:14e} {5:14e} {6:14e} {7:11} {8:14e}".format(
                        self.it, cost_hessian.ncalls, cost_old, mg_zhat, gradnorm, delta_TR, rho_TR, accept_step,tolcg) )

            #TR radius can make this term arbitrarily small and prematurely exit.
            if -mg_zhat <= self.parameters["gdz_tolerance"]:
                self.converged = True
                self.reason = 3
                break
                            
        self.final_grad_norm = gradnorm
        self.final_cost      = cost_old
        
        result = dict()
        result["termination_reason"] = InexactNewtonCG.termination_reasons[self.reason]
        result["final_cost"] = cost_old
        result["final_grad_norm"] = gradnorm

        return z, result