Skip to content

bs_opt module

Interface to scipy.optimize:

  • ScalarFunctionAndGradient, ProximalFunction type aliases
  • an OptimizeParams class
  • check_gradient_scalar_function checks whether an analytical gradient is correct
  • acc_grad_descent: accelerated gradient descent for convex, possibly non-smooth functions
  • minimize_some_fixed: minimizes a function with some parameter values possibly fixed and some possibly within bounds, using L-BFGS-B
  • minimize_free: minimizes a function with some parameter values possibly within bounds
  • dfp_update, bfgs_update: compute updates to the inverese Hessian
  • armijo_alpha, barzilai_borwein_alpha: two ways of computing the step length
  • print_optimization_results, print_constrained_optimization_results format the results.

ProximalFunction = Callable[[np.ndarray, float, Iterable], np.ndarray] module-attribute

Type of h(x, t, pars) that returns a scalar value.

ScalarFunctionAndGradient = Callable[[np.ndarray, Iterable, Optional[bool]], Union[float, tuple[float, np.ndarray]]] module-attribute

Type of f(v, args, gr) that returns a scalar value and also a gradient if gr is True.

OptimizeParams dataclass

used for optimization; combines values, bounds and initial values for a parameter vector

Source code in bs_python_utils/bs_opt.py
37
38
39
40
41
42
43
44
@dataclass
class OptimizeParams:
    """used for optimization; combines values, bounds and initial values for a parameter vector
    """

    params_values: np.ndarray | None
    params_bounds: list[tuple] | None
    params_init: np.ndarray | None

acc_grad_descent(grad_f, x_init, other_params, prox_h=None, print_result=False, verbose=False, tol=1e-09, alpha=1.01, beta=0.5, maxiter=10000)

Minimizes (f+h) by Accelerated Gradient Descent where f is smooth and convex and h is convex.

By default h is zero. The convergence criterion is that the largest component of the absolute value of the gradient must be smaller than tol.

Parameters:

Name Type Description Default
grad_f Callable

grad_f of f; should return an (n) array from an (n) array and the other_ params object

required
x_init ndarray

initial guess, shape (n)

required
prox_h ProximalFunction | None

proximal projector of h, if any

None
other_params Iterable

an iterable with additional parameters

required
verbose bool

if True, print remaining gradient error every 10 iterations

False
tol float

convergence criterion on grad_f

1e-09
alpha float

ceiling on step multiplier

1.01
beta float

floor on step multiplier

0.5
maxiter int

max number of iterations

10000

Returns:

Type Description
tuple[ndarray, int]

the candidate solution, and a convergence code (0 if successful, 1 if not).

Source code in bs_python_utils/bs_opt.py
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
@timeit
def acc_grad_descent(
    grad_f: Callable,
    x_init: np.ndarray,
    other_params: Iterable,
    prox_h: ProximalFunction | None = None,
    print_result: bool = False,
    verbose: bool = False,
    tol: float = 1e-9,
    alpha: float = 1.01,
    beta: float = 0.5,
    maxiter: int = 10000,
) -> tuple[np.ndarray, int]:
    """
    Minimizes `(f+h)` by Accelerated Gradient Descent where `f` is smooth and convex  and `h` is convex.

    By default `h` is zero.
    The convergence criterion is that the largest component of the absolute value of the gradient must be smaller than `tol`.


    Args:
        grad_f: grad_f of `f`; should return an `(n)` array from an `(n)` array and the `other_ params` object
        x_init: initial guess, shape `(n)`
        prox_h: proximal projector of `h`, if any
        other_params: an iterable with additional parameters
        verbose: if `True`, print remaining gradient error every 10 iterations
        tol: convergence criterion on grad_f
        alpha: ceiling on step multiplier
        beta: floor on step multiplier
        maxiter: max number of iterations

    Returns:
        the candidate solution, and a convergence code (0 if successful, 1 if not).
    """

    # no proximal projection if no h
    local_prox_h: ProximalFunction = prox_h if prox_h else lambda x, t, p: x

    x = x_init.copy()
    y = x_init.copy()

    #  for stepsize we use Barzilai-Borwein
    t, g = barzilai_borwein_alpha(grad_f, y, other_params)

    grad_err_init = npmaxabs(g)

    if verbose:
        print(f"agd: grad_err_init={grad_err_init}")

    n_iter = 0
    theta = 1.0

    while n_iter < maxiter:
        grad_err = npmaxabs(g)
        if grad_err < tol:
            break
        xi = x
        yi = y
        x = y - t * g
        x = local_prox_h(x, t, other_params)

        theta = 2.0 / (1.0 + sqrt(1.0 + 4.0 / theta / theta))

        if np.dot(y - x, x - xi) > 0:  # wrong direction, we restart
            x = xi
            y = x
            theta = 1.0
        else:
            y = x + (1.0 - theta) * (x - xi)

        gi = g
        g = grad_f(y, other_params)
        ndy = spla.norm(y - yi)
        t_hat = 0.5 * ndy * ndy / abs(np.dot(y - yi, gi - g))
        t = min(alpha * t, max(beta * t, t_hat))

        n_iter += 1

        if verbose and n_iter % 10 == 0:
            print(f" AGD with grad_err = {grad_err} after {n_iter} iterations")

    x_conv = y

    ret_code = 0 if grad_err < tol else 1

    if verbose or print_result:
        if ret_code == 0:
            print_stars(
                f" AGD converged with grad_err = {grad_err} after {n_iter} iterations"
            )
        else:
            print_stars(
                f" Problem in AGD: grad_err = {grad_err} after {n_iter} iterations"
            )

    return x_conv, ret_code

armijo_alpha(f, x, d, args, alpha_init=1.0, beta=0.5, max_iter=100, tol=0.0)

Given a function f we are minimizing, computes the step size alpha to take in the direction d using the Armijo rule.

Parameters:

Name Type Description Default
f Callable

the function

required
x ndarray

the current point

required
d ndarray

the direction we are taking

required
args Iterable

other arguments passed to f

required
alpha_init float

the initial step size

1.0
beta float

the step size reduction factor

0.5
max_iter int

the maximum number of iterations

100
tol float

a tolerance

0.0

Returns:

Type Description
float

the step size alpha.

Source code in bs_python_utils/bs_opt.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def armijo_alpha(
    f: Callable,
    x: np.ndarray,
    d: np.ndarray,
    args: Iterable,
    alpha_init: float = 1.0,
    beta: float = 0.5,
    max_iter: int = 100,
    tol: float = 0.0,
) -> float:
    """Given a function `f` we are minimizing, computes the step size `alpha`
    to take in the direction `d` using the Armijo rule.

    Args:
        f: the function
        x: the current point
        d: the direction we are taking
        args: other arguments passed to `f`
        alpha_init: the initial step size
        beta: the step size reduction factor
        max_iter: the maximum number of iterations
        tol: a tolerance

    Returns:
        the step size `alpha`.
    """
    f0 = f(x, args)
    alpha = alpha_init
    for _ in range(max_iter):
        x1 = x + alpha * d
        f1 = f(x1, args)
        if f1 < f0 + tol:
            return alpha
        alpha *= beta
    else:
        bs_error_abort("Too many iterations")
    return alpha

barzilai_borwein_alpha(grad_f, x, args)

Given a function f we are minimizing, computes the step size alpha to take in the opposite direction of the gradient using the Barzilai-Borwein rule.

Parameters:

Name Type Description Default
grad_f Callable

the gradient of the function

required
x ndarray

the current point

required
args Iterable

other arguments passed to f

required

Returns:

Type Description
tuple[float, ndarray]

the step size alpha and the gradient g at the point x.

Source code in bs_python_utils/bs_opt.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
def barzilai_borwein_alpha(
    grad_f: Callable, x: np.ndarray, args: Iterable
) -> tuple[float, np.ndarray]:
    """Given a function `f` we are minimizing, computes the step size `alpha`
    to take in the opposite direction of the gradient using the Barzilai-Borwein rule.

    Args:
        grad_f: the gradient of the function
        x: the current point
        args: other arguments passed to `f`

    Returns:
        the step size `alpha` and the gradient `g` at the point `x`.
    """
    g = grad_f(x, args)
    alpha = 1.0 / spla.norm(g)
    x_hat = x - alpha * g
    g_hat = grad_f(x_hat, args)
    norm_dg = spla.norm(g - g_hat)
    norm_dg2 = norm_dg * norm_dg
    alpha = np.abs(np.dot(x - x_hat, g - g_hat)) / norm_dg2
    return alpha, g

bfgs_update(hess_inv, gradient_diff, x_diff)

Runs a BFGS update for the inverse Hessian.

Parameters:

Name Type Description Default
hess_inv ndarray

the current inverse Hessian

required
gradient_diff ndarray

the update in the gradient

required
x_diff ndarray

the update in x

required

Returns:

Type Description
ndarray

the updated inverse Hessian.

Source code in bs_python_utils/bs_opt.py
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
def bfgs_update(
    hess_inv: np.ndarray, gradient_diff: np.ndarray, x_diff: np.ndarray
) -> np.ndarray:
    """Runs a BFGS update for the inverse Hessian.

    Args:
        hess_inv: the current inverse Hessian
        gradient_diff: the update in the gradient
        x_diff: the update in x

    Returns:
        the updated inverse Hessian.
    """
    xdt = x_diff.T
    xpg = xdt @ gradient_diff
    hdg = hess_inv @ gradient_diff
    dgp_hdg = gradient_diff.T @ hdg
    u = x_diff / xpg - hdg / dgp_hdg
    hess_inv_new = dfp_update(hess_inv, gradient_diff, x_diff) + dgp_hdg * (u @ u.T)
    return cast(np.ndarray, hess_inv_new)

check_gradient_scalar_function(fg, p, args, mode='central', EPS=1e-06)

Checks the gradient of a scalar function.

Parameters:

Name Type Description Default
fg ScalarFunctionAndGradient

should return the scalar value, and the gradient if its gr argument is True

required
p ndarray

where we are checking the gradient

required
args Iterable

other arguments passed to fg

required
mode str

"central" or "forward" derivatives

'central'
EPS float

the step for forward or central derivatives

1e-06

Returns:

Type Description
TwoArrays

the analytic and numeric gradients.

Source code in bs_python_utils/bs_opt.py
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
def check_gradient_scalar_function(
    fg: ScalarFunctionAndGradient,
    p: np.ndarray,
    args: Iterable,
    mode: str = "central",
    EPS: float = 1e-6,
) -> TwoArrays:
    """Checks the gradient of a scalar function.

    Args:
        fg: should return the scalar value, and the gradient if its `gr` argument is `True`
        p: where we are checking the gradient
        args: other arguments passed to `fg`
        mode: "central" or "forward" derivatives
        EPS: the step for forward or central derivatives

    Returns:
        the analytic and numeric gradients.
    """
    f0, f_grad = fg(p, args, gr=True)  # type: ignore
    f0 = cast(float, f0)

    print_stars("checking the gradient: analytic, numeric")

    g = np.zeros_like(p)
    if mode == "central":
        for i, x in enumerate(p):
            p1 = p.copy()
            p1[i] = x + EPS
            f_plus = cast(float, fg(p1, args, gr=False))  # type: ignore
            p1[i] -= 2.0 * EPS
            f_minus = cast(float, fg(p1, args, gr=False))  # type: ignore
            g[i] = (f_plus - f_minus) / (2.0 * EPS)
            print(f"{i}: {f_grad[i]}, {g[i]}")
    elif mode == "forward":
        for i, x in enumerate(p):
            p1 = p.copy()
            p1[i] = x + EPS
            f_plus = cast(float, fg(p1, args, gr=False))  # type: ignore
            g[i] = (f_plus - f0) / EPS
            print(f"{i}: {f_grad[i]}, {g[i]}")
    else:
        bs_error_abort("mode must be 'central' or 'forward'")

    return f_grad, g

dfp_update(hess_inv, gradient_diff, x_diff)

Runs a DFP update for the inverse Hessian.

Parameters:

Name Type Description Default
hess_inv ndarray

the current inverse Hessian

required
gradient_diff ndarray

the update in the gradient

required
x_diff ndarray

the update in x

required

Returns:

Type Description
ndarray

the updated inverse Hessian.

Source code in bs_python_utils/bs_opt.py
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
def dfp_update(
    hess_inv: np.ndarray, gradient_diff: np.ndarray, x_diff: np.ndarray
) -> np.ndarray:
    """Runs a DFP update for the inverse Hessian.

    Args:
        hess_inv: the current inverse Hessian
        gradient_diff: the update in the gradient
        x_diff: the update in x

    Returns:
        the updated inverse Hessian.
    """
    xdt = x_diff.T
    xxp = x_diff @ xdt
    xpg = xdt @ gradient_diff
    hdg = hess_inv @ gradient_diff
    dgp_hdg = gradient_diff.T @ hdg
    hess_inv_new = hess_inv + xxp / xpg - (hdg @ hdg.T) / dgp_hdg
    return cast(np.ndarray, hess_inv_new)

minimize_free(obj, grad_obj, x_init, args, options=None, bounds=None)

Minimize a function on all of its variables, using BFGS or L-BFGS-B.

Parameters:

Name Type Description Default
obj Callable

the original function

required
grad_obj Callable

its gradient function

required
x_init ndarray

the initial values of all variables

required
args Iterable

other parameters

required
options dict | None

any options passed on to scipy.optimize.minimize

None
bounds list[tuple[float, float]] | None

the bounds on all variables, if any

None

Returns:

Type Description
Any

the result of optimization, on all variables.

Source code in bs_python_utils/bs_opt.py
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
def minimize_free(
    obj: Callable,
    grad_obj: Callable,
    x_init: np.ndarray,
    args: Iterable,
    options: dict | None = None,
    bounds: list[tuple[float, float]] | None = None,
) -> Any:
    """
    Minimize a function on all of its variables, using BFGS or L-BFGS-B.

    Args:
        obj: the original function
        grad_obj: its gradient function
        x_init: the initial values of all variables
        args: other parameters
        options: any options passed on to `scipy.optimize.minimize`
        bounds: the bounds on all variables, if any

    Returns:
        the result of optimization, on all variables.
    """
    if bounds is None:
        resopt = spopt.minimize(
            obj,
            x_init,
            method="BFGS",
            args=args,
            options=options,
            jac=grad_obj,
        )
    else:
        resopt = spopt.minimize(
            obj,
            x_init,
            method="L-BFGS-B",
            args=args,
            options=options,
            jac=grad_obj,
            bounds=bounds,
        )

    return resopt

minimize_some_fixed(obj, grad_obj, x_init, args, fixed_vars, fixed_vals, options=None, bounds=None, time_execution=False)

Minimize a function with some variables fixed, using L-BFGS-B.

Parameters:

Name Type Description Default
obj Callable

the original function

required
grad_obj Callable

its gradient function

required
fixed_vars list[int] | None

a list if the indices of variables whose values are fixed

required
fixed_vals ndarray | None

their fixed values

required
x_init ndarray

the initial values of all variables (those on fixed variables are not used)

required
args Iterable

other parameters

required
options dict | None

any options passed on to scipy.optimize.minimize

None
bounds list[tuple[float, float]] | None

the bounds on all variables (those on fixed variables are not used)

None
time_execution bool

if True, time the execution and print the result

False

Returns:

Type Description
Any

the result of optimization, on all variables.

Source code in bs_python_utils/bs_opt.py
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
def minimize_some_fixed(
    obj: Callable,
    grad_obj: Callable,
    x_init: np.ndarray,
    args: Iterable,
    fixed_vars: list[int] | None,
    fixed_vals: np.ndarray | None,
    options: dict | None = None,
    bounds: list[tuple[float, float]] | None = None,
    time_execution: bool = False,
) -> Any:
    """
    Minimize a function with some variables fixed, using L-BFGS-B.

    Args:
        obj: the original function
        grad_obj: its gradient function
        fixed_vars: a list if the indices of variables whose values are fixed
        fixed_vals: their fixed values
        x_init: the initial values of all variables (those on fixed variables are not used)
        args: other parameters
        options: any options passed on to `scipy.optimize.minimize`
        bounds: the bounds on all variables (those on fixed variables are not used)
        time_execution: if `True`, time the execution and print the result

    Returns:
        the result of optimization, on all variables.
    """
    if time_execution:
        time_start = perf_counter()
    if fixed_vars is None:
        resopt = spopt.minimize(
            obj,
            x_init,
            method="L-BFGS-B",
            args=args,
            options=options,
            jac=grad_obj,
            bounds=bounds,
        )
    else:
        fixed_vars = cast(list, fixed_vars)
        n_fixed = check_vector(fixed_vals)
        fixed_vals = cast(np.ndarray, fixed_vals)
        if len(fixed_vars) != n_fixed:
            bs_error_abort(
                f"fixed_vars has {len(fixed_vars)} indices but fixed_vals has"
                f" {fixed_vals.size} elements."
            )
        fixed_obj, fixed_grad_obj = _fix_some(obj, grad_obj, fixed_vars, fixed_vals)

        # drop fixed variables and the corresponding bounds
        n = len(x_init)
        not_fixed = np.ones(n, dtype=bool)
        not_fixed[fixed_vars] = False
        t_init = x_init[not_fixed]
        t_bounds = (
            None if bounds is None else [bounds[i] for i in range(n) if not_fixed[i]]
        )

        resopt = spopt.minimize(
            fixed_obj,
            t_init,
            method="L-BFGS-B",
            args=args,
            options=options,
            jac=fixed_grad_obj,
            bounds=t_bounds,
        )

        # now re-fill the values of the variables
        t = resopt.x
        t_full = list(t)
        for i, i_coef in enumerate(fixed_vars):
            t_full.insert(i_coef, fixed_vals[i])
        resopt.x = t_full

        # and re-fill the values of the gradients
        g = grad_obj(np.array(t_full), args)
        resopt.jac = g

        if time_execution:
            time_end = perf_counter()
            print(
                "\nTime elapsed in minimization:"
                f" {time_end - time_start: >.3f} seconds.\n"
            )

    return resopt

print_constrained_optimization_results(resus, title='Minimizing', print_constr=False, print_multipliers=False)

print results from constrained optimization.

Parameters:

Name Type Description Default
resus OptimizeResult

results from optimization

required
title str

a title

'Minimizing'
print_constr bool

if True, print the values of the constraints

False
print_multipliers bool

if True, print the values of the multipliers

False

Returns:

Type Description
None

just prints.

Source code in bs_python_utils/bs_opt.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def print_constrained_optimization_results(
    resus: spopt.OptimizeResult,
    title: str = "Minimizing",
    print_constr: bool = False,
    print_multipliers: bool = False,
) -> None:
    """print results from constrained optimization.

    Args:
        resus: results from optimization
        title: a title
        print_constr: if `True`, print the values of the constraints
        print_multipliers: if `True`, print the values of the multipliers

    Returns:
        just prints.
    """
    print_stars(title)
    print(resus.message)
    if resus.success:
        print(f"Successful! in {resus.nit} iterations")
        print(f" evaluated {resus.nfev} functions and {resus.njev} gradients")
        print(f"Minimized value is {resus.fun}")
        print(f"The Lagrangian norm is {resus.optimality}")
        print(f"The largest constraint violation is {resus.constr_violation}")
        if print_multipliers:
            print(f"The multipliers are {resus.v}")
        if print_constr:
            print(f"The values of the constraints are {resus.constr}")
    else:
        print_stars("Constrained minimization failed!")
    return

print_optimization_results(resus, title='Minimizing')

print results from unconstrained optimization.

Parameters:

Name Type Description Default
resus OptimizeResult

results from optimization

required
title str

a title

'Minimizing'

Returns:

Type Description
None

just prints.

Source code in bs_python_utils/bs_opt.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def print_optimization_results(
    resus: spopt.OptimizeResult, title: str = "Minimizing"
) -> None:
    """print results from unconstrained optimization.

    Args:
        resus: results from optimization
        title: a title

    Returns:
        just prints.
    """
    print_stars(title)
    print(resus.message)
    if resus.success:
        print(f"Successful! in {resus.nit} iterations")
        print(f" evaluated {resus.nfev} functions functions and {resus.njev} gradients")
        print("\nMinimizer and grad_f:")
        print(np.column_stack((resus.x, resus.jac)))
        print(f"Minimized value is {resus.fun}")
    else:
        print_stars("Minimization failed!")
    return