Source code for PEPit.examples.unconstrained_convex_minimization.gradient_descent_lc

import numpy as np
import scipy.optimize as optimize

from PEPit import PEP
from PEPit import Point

from PEPit.functions import SmoothStronglyConvexFunction
from PEPit.operators import SymmetricLinearOperator
from PEPit.operators import SkewSymmetricLinearOperator
from PEPit.operators import LinearOperator



[docs]
def wc_gradient_descent_lc(mug, Lg, typeM, muM, LM, gamma, n, verbose=1):
    """
    Consider the convex minimization problem

    .. math:: g_\\star \\triangleq \\min_x g(Mx),

    where :math:`g` is an :math:`L_g`-smooth, `\\mu_g`-strongly convex function and :math:`M` is a general, symmetric or
    skew-symmetric matrix with :math:`\\mu_M \\leqslant \\|M\\| \\leqslant L_M`.

    Note:
        For general and skew-symmetric matrices, :math:`\\mu_M` must be set to 0.

    This code computes a worst-case guarantee for **gradient descent** with fixed step-size :math:`\\gamma`
    on a function :math:`F` defined as the composition of a function :math:`G` and a linear operator :math:`M`.
    That is, it computes the smallest possible :math:`\\tau(n, \\mu_g, L_g, \\mu_M, L_M, \\gamma)`
    such that the guarantee

    .. math:: g(Mx_n) - g_\\star \\leqslant \\tau(n, \\mu_g, L_g, \\mu_M, L_M, \\gamma) \\|x_0 - x_\\star\\|^2

    is valid, where :math:`x_n` is the output of gradient descent run on :math:`F` with fixed step-size :math:`\\gamma`,
    where :math:`x_\\star` is a minimizer of :math:`F(x) = g(Mx)`, and where :math:`g_\\star = g(M x_\\star)`.

    In short, for given values of :math:`n`, :math:`\\mu_g`, :math:`L_g`, :math:`\\mu_M`, :math:`L_M`,
    and :math:`\\gamma`, :math:`\\tau(n, \\mu_g, L_g, \\mu_M, L_M, \\gamma)` is computed as the worst-case
    value of :math:`g(Mx_n)-g_\\star` when :math:`\\|x_0 - x_\\star\\|^2 \\leqslant 1`.

    **Algorithm**:
    Gradient descent on such a function is described by

    .. math:: x_{t+1} = x_t - \\gamma M^T \\nabla g(Mx_t),

    where :math:`\\gamma` is a step-size.

    **Theoretical guarantee**:
    When :math:`\\gamma \\leqslant \\frac{2}{L}`, :math:`0 \\leqslant \\mu_g \\leqslant L_g`,
    and :math:`0 \\leqslant \\mu_M \\leqslant L_M`,
    the following **tight** theoretical guarantee is **conjectured** in [1, Conjecture 4.2],
    and the associated **lower** theoretical guarantee is stated in [1, Conjecture 4.3]:

    .. math:: g(Mx_n)-g_\\star \\leqslant \\frac{L}{2} \\max\\left\\{\\frac{\\kappa_g {M^*}^2}{\\kappa_g -1 + (1-\\kappa_g {M^*}^2 L \\gamma )^{-2n}}, (1-L\\gamma)^{2n} \\right\\} \\|x_0-x_\\star\\|^2,

    where :math:`L = L_g L_M^2`, :math:`\kappa_g = \\frac{\\mu_g}{L_g}`, :math:`\kappa_M = \\frac{\\mu_M}{L_M}`,
    :math:`M^* = \\mathrm{proj}_{[\\kappa_M,1]} \\left(\\sqrt{\\frac{h_0}{L\\gamma}}\\right)` for :math:`h_0` solution of

    .. math:: (1-\\kappa_g)(1-\kappa_g h_0)^{2n+1} = 1 - (2n+1)\\kappa_g h_0.

    **References**:

    `[1] N. Bousselmi, J. Hendrickx, F. Glineur  (2023).
    Interpolation Conditions for Linear Operators and applications to Performance Estimation Problems.
    arXiv preprint
    <https://arxiv.org/pdf/2302.08781.pdf>`_

    Args:
        mug (float): the strong convexity parameter of :math:`g(y)`.
        Lg (float): the smoothness parameter of :math:`g(y)`.
        typeM (string): type of matrix :math:`M` ("gen", "sym" or "skew").
        muM (float): lower bound on :math:`\\|M\\|` (if typeM != "sym", then muM must be set to zero).
        LM (float): upper bound on :math:`\\|M\\|`.
        gamma (float): step-size.
        n (int): number of iterations.
        verbose (int): Level of information details to print.
                        
                        - -1: No verbose at all.
                        - 0: This example's output.
                        - 1: This example's output + PEPit information.
                        - 2: This example's output + PEPit information + CVXPY details.

    Returns:
        pepit_tau (float): worst-case value
        theoretical_tau (float): theoretical value

    Example:
        >>> Lg = 3.; mug = 0.3
        >>> typeM = "sym"; LM = 1.; muM = 0.
        >>> L = Lg*LM**2
        >>> pepit_tau, theoretical_tau = wc_gradient_descent_lc(mug = mug, Lg=Lg, typeM=typeM, muM = muM, LM=LM, gamma=1 / L, n=3, verbose=1)
        (PEPit) Setting up the problem: size of the Gram matrix: 16x16
        (PEPit) Setting up the problem: performance measure is the minimum of 1 element(s)
        (PEPit) Setting up the problem: Adding initial conditions and general constraints ...
        (PEPit) Setting up the problem: initial conditions and general constraints (2 constraint(s) added)
        (PEPit) Setting up the problem: interpolation conditions for 3 function(s)
        			Function 1 : Adding 20 scalar constraint(s) ...
        			Function 1 : 20 scalar constraint(s) added
        			Function 2 : Adding 20 scalar constraint(s) ...
        			Function 2 : 20 scalar constraint(s) added
        			Function 2 : Adding 2 lmi constraint(s) ...
        		 Size of PSD matrix 1: 5x5
        		 Size of PSD matrix 2: 4x4
        			Function 2 : 2 lmi constraint(s) added
        			Function 3 : Adding 0 scalar constraint(s) ...
        			Function 3 : 0 scalar constraint(s) added
        (PEPit) Setting up the problem: additional constraints for 0 function(s)
        (PEPit) Compiling SDP
        (PEPit) Calling SDP solver
        (PEPit) Solver status: optimal (wrapper:cvxpy, solver: MOSEK); optimal value: 0.16380641240039548
        (PEPit) Primal feasibility check:
        		The solver found a Gram matrix that is positive semi-definite up to an error of 8.261931885644884e-09
        		All required PSD matrices are indeed positive semi-definite
        		All the primal scalar constraints are verified up to an error of 1.7219835096598865e-08
        (PEPit) Dual feasibility check:
        		The solver found a residual matrix that is positive semi-definite
        		All the dual matrices to lmi are positive semi-definite
        		All the dual scalar values associated with inequality constraints are nonnegative
        (PEPit) The worst-case guarantee proof is perfectly reconstituted up to an error of 5.282963298309745e-07
        (PEPit) Final upper bound (dual): 0.1638061803039686 and lower bound (primal example): 0.16380641240039548 
        (PEPit) Duality gap: absolute: -2.320964268831549e-07 and relative: -1.4168946348439444e-06
        *** Example file: worst-case performance of gradient descent on g(Mx) with fixed step-sizes ***
        	PEPit guarantee:	 f(x_n)-f_* <= 0.163806 ||x_0 - x_*||^2
        	Theoretical guarantee:	 f(x_n)-f_* <= 0.16379 ||x_0 - x_*||^2
    
    """

    # Instantiate PEP
    problem = PEP()

    # Declare a strongly convex smooth function G and a linear operator M
    G = problem.declare_function(SmoothStronglyConvexFunction, mu=mug, L=Lg)
    if typeM == "gen":
        M = problem.declare_function(function_class=LinearOperator, L=LM)
    elif typeM == "sym":
        M = problem.declare_function(function_class=SymmetricLinearOperator, mu=muM, L=LM)
    elif typeM == "skew":
        M = problem.declare_function(function_class=SkewSymmetricLinearOperator, L=LM)
    else:
        raise ValueError("The argument \'typeM\' must be \'gen\', \`sym\` or \`skew\`."
                         "Got {}".format(typeM))

    # Define the starting point x0 of the algorithm
    x0 = problem.set_initial_point()

    # Defining unique optimal point xs = x_* of F(x) = g(Mx) and corresponding function value fs = f_*
    xs = Point()                          # xs
    ys = M.gradient(xs)                   # ys = M*xs
    us, fs = G.oracle(ys)                 # us = \nabla g(ys) and fs = F(xs) = g(M*ys)
    if typeM == "gen":
        vs = M.T.gradient(us)             # vs =  M^T \nabla g(ys) = \nabla F(xs)
    elif typeM == "sym":
        vs = M.gradient(us)               # vs =  M \nabla g(ys) = \nabla F(xs)
    elif typeM == "skew":
        vs = -M.gradient(us)              # vs =  -M \nabla g(ys) = \nabla F(xs)
    else:
        raise ValueError("The argument \'typeM\' must be \'gen\', \`sym\` or \`skew\`."
                         "Got {}".format(typeM))
    problem.add_constraint(vs ** 2 == 0)  # vs = \nabla F(xs) = 0

    # Set the initial constraint that is the distance between x0 and x^*
    problem.set_initial_condition((x0 - xs) ** 2 <= 1)

    # Run n steps of the GD method on F
    x = x0
    for _ in range(n):
        y = M.gradient(x)        # y = Mx
        u = G.gradient(y)        # y = \nabla g(y)
        if typeM == "gen":
            v = M.T.gradient(u)  # v = M^T u
        elif typeM == "sym":
            v = M.gradient(u)    # v = M u
        elif typeM == "skew":
            v = -M.gradient(u)   # v = M^T u = - M u
        else:
            raise ValueError("The argument \'typeM\' must be \'gen\', \`sym\` or \`skew\`."
                             "Got {}".format(typeM))
        x = x - gamma * v

    # Set the performance metric to the function values accuracy
    problem.set_performance_metric(G(M.gradient(x)) - fs)

    # Solve the PEP
    pepit_verbose = max(verbose, 0)
    pepit_tau = problem.solve(verbose=pepit_verbose)

    # Compute theoretical guarantee (for comparison)
    L = Lg * LM ** 2
    kappag = mug / Lg
    kappaM = muM / LM

    def fun(x):
        return (1 - (2 * n + 1) * x) * (1 - x) ** (-2 * n - 1) - 1 + kappag

    x = optimize.fsolve(fun, 0.5, xtol=1e-10, full_output=True)[0]
    h0 = x / kappag
    t = np.sqrt(h0[0] / (L * gamma))

    if t < kappaM:
        M_star = kappaM
    elif t > 1:
        M_star = 1
    else:
        M_star = t

    theoretical_tau = 0.5 * L * np.max((kappag * M_star ** 2 / (
                kappag - 1 + (1 - kappag * L * gamma * M_star ** 2) ** (-2 * n)), (1 - L * gamma) ** (2 * n)))

    # Print conclusion if required
    if verbose != -1:
        print('*** Example file: worst-case performance of gradient descent on g(Mx) with fixed step-sizes ***')
        print('\tPEPit guarantee:\t f(x_n)-f_* <= {:.6} ||x_0 - x_*||^2'.format(pepit_tau))
        print('\tTheoretical guarantee:\t f(x_n)-f_* <= {:.6} ||x_0 - x_*||^2'.format(theoretical_tau))

    # Return the worst-case guarantee of the evaluated method (and the reference theoretical value)
    return pepit_tau, theoretical_tau



if __name__ == "__main__":
    Lg = 3
    mug = 0.3
    typeM = "gen"
    LM = 1.
    muM = 0.1

    pepit_tau, theoretical_tau = wc_gradient_descent_lc(mug=mug, Lg=Lg, typeM=typeM, muM=muM, LM=LM,
                                                        gamma=1 / (Lg * LM ** 2), n=3, verbose=1)