Lecture 1: Optimization

In [1]:
import numpy as np
import scipy as sp
import scipy.optimize as opt
import matplotlib.pyplot as plt
In [2]:
# this requires clean.mplstyle
plt.style.use('clean')
In [3]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
In [4]:
import tools.visual as viz
import tools.solve as sol

Globals

In [5]:
xlim = -2, 2

Test Functions

In [6]:
def linear(x):
    return x
In [7]:
def quadratic(x):
    return x**2 - 4*x
In [8]:
def cubic(x):
    return x**3 - x - 1
In [9]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 3.5))
viz.plot(linear, *xlim, zero=0, color='k', ax=axs[0])
viz.plot(quadratic, *xlim, zero=0, color='k', ax=axs[1])
viz.plot(cubic, *xlim, zero=0, color='k', ax=axs[2])
axs[0].set_title('Linear'); axs[1].set_title('Quadratic'); axs[2].set_title('Cubic');

Solving Methods

Here we just define the function that updates from one iteration to the next. Note that newton uses a finite difference approximation for the derivative.

In [10]:
def bisect(f, ab):
    a, b = ab
    c = 0.5*(a+b)
    fa, fb, fc = f(a), f(b), f(c)
    if fa*fc <= 0:
        b = c
    else:
        a = c
    return a, b
In [11]:
def secant(f, ab):
    a, b = ab
    fa, fb = f(a), f(b)
    m = (fb-fa)/(b-a)
    c = b - fb/m
    fc = f(c)
    return b, c
In [12]:
def newton(f, x, ϵ=1e-8):
     = x + ϵ
    fx,  = f(x), f()
    fp = (-fx)/ϵ
    xp = x - fx/fp
    return xp

These are all run with initial states the converge!

In [13]:
fig, axs = plt.subplots(nrows=3, ncols=3, figsize=(13, 10))
sol.solve_univar(bisect, linear, (-1, 1.7), xlim, ax=axs[0][0])
sol.solve_univar(secant, linear, (-1, 1.7), xlim, ax=axs[1][0])
sol.solve_univar(newton, linear, -1, xlim, ax=axs[2][0])
sol.solve_univar(bisect, quadratic, (-1, 1.7), xlim, ax=axs[0][1])
sol.solve_univar(secant, quadratic, (-1, 1.7), xlim, ax=axs[1][1])
sol.solve_univar(newton, quadratic, -1, xlim, ax=axs[2][1])
sol.solve_univar(bisect, cubic, (-1, 1.7), xlim, ax=axs[0][2])
sol.solve_univar(secant, cubic, (0.3, 1.7), xlim, ax=axs[1][2])
sol.solve_univar(newton, cubic, 1, xlim, ax=axs[2][2]);
axs[0][0].set_title('Linear'); axs[0][1].set_title('Quadratic'); axs[0][2].set_title('Cubic');
axs[0][0].set_ylabel('Bisection Method'); axs[1][0].set_ylabel('Secant Method');
axs[2][0].set_ylabel('Newton\'s Method');

For certain initial states, secant and newton go for some wild rides, but they usually seem to converge.

In [14]:
fig, axs = plt.subplots(ncols=2, figsize=(10, 3.5))
sol.solve_univar(newton, cubic, -1, (-4, 4), ax=axs[0])
sol.solve_univar(secant, cubic, (-2, -1), (-23, 10), ax=axs[1]);
axs[0].set_title('Newton\'s Method'); axs[1].set_title('Secant Method');
axs[0].set_ylabel('Cubic');

Multivariate Solving

I'm going to use the builtin scipy methods here, rather than rolling my own. Because the functions defined above will also operate pointwise on vectors, we could pass them to solve diagonal systems. But that's essentially equivalent to solving $N$ unidimensional system. Below is a simple N-dimensional system of equations implementing $$ f(x,y) = \left[\begin{array}{c} x^3 + x - y^3 - y \\ x^3 + x + y^3 + y \end{array}\right] = 0 $$ The first component has solutions of the form $x = y$, while the second component has solutions of the form $x = -y$, meaning the joint system has the unique solutions $(x, y) = (0, 0)$.

In [15]:
def goodcube(z):
    return z**3 + z
def multifun(xy):
    x, y = xy
    return goodcube(x) - goodcube(y), goodcube(x) + goodcube(y)
In [16]:
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(10, 7.5))
sol.solve_multivar(multifun, (0.1, -0.1), 'broyden1', axs=axs[0])
sol.solve_multivar(multifun, (0.1, -0.1), 'anderson', axs=axs[1]);
axs[0][0].set_title('Input Variable'); axs[0][1].set_title('Function Value');
axs[0][0].set_ylabel('Broyden\'s Method'); axs[1][0].set_ylabel('Anderson Mixing');

Optimization Methods

In [17]:
def basin1(x):
    return x**2
In [18]:
def valleys1(x):
    return x**4 - 3*x**2 - x + 2
In [19]:
fig, axs = plt.subplots(ncols=2, figsize=(10.5, 3))
viz.plot(basin1, *xlim, color='k', ax=axs[0])
viz.plot(valleys1, *xlim, color='k', ax=axs[1])
axs[0].set_title('Basin'); axs[1].set_title('Valleys');
In [20]:
phi = sp.constants.golden
def golden(f, ab):
    a, b = ab
    step = (b-a)/phi
    c = b - step
    d = a + step
    if f(c) < f(d):
        b = d
    else:
        a = c
    return a, b
In [21]:
def gradient(f, x, Δ=0.1, ϵ=1e-8):
     = x + ϵ
    fx,  = f(x), f()
    fp = (-fx)/ϵ
    xp = x - Δ*fp
    return xp
In [22]:
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(10, 7))
sol.optim_univar(golden, basin1, (-1.5, 1.5), xlim, vmin=None, ax=axs[0][0])
sol.optim_univar(gradient, basin1, 1, xlim, vmin=None, ax=axs[1][0])
sol.optim_univar(golden, valleys1, (-1.8, 1.8), xlim, vmin=None, ax=axs[0][1])
sol.optim_univar(gradient, valleys1, 0, xlim, vmin=None, ax=axs[1][1])
axs[0][0].set_title('Basin'); axs[0][1].set_title('Valleys');
axs[0][0].set_ylabel('Golden Section'); axs[1][0].set_ylabel('Gradient Descent');

Multivariate Optimization

Check out this list for some real spoilers: https://en.wikipedia.org/wiki/Test_functions_for_optimization

In [23]:
def basinN(x):
    return np.sum(basin1(x))
In [24]:
def valleysN(x):
    return np.sum(valleys1(x))
In [25]:
def himmelblau(xy):
    x, y = xy
    return (x**2+y-11)**2 + (x+y**2-7)**2
In [26]:
fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(12, 7))
sol.optim_multivar(basinN, (1, 1), method='nelder-mead', ax=axs[0][0])
sol.optim_multivar(basinN, (1, 1), method='bfgs', ax=axs[1][0])
sol.optim_multivar(valleysN, (0, 0), method='nelder-mead', ax=axs[0][1])
sol.optim_multivar(valleysN, (0, 0), method='bfgs', ax=axs[1][1])
sol.optim_multivar(himmelblau, (0, 0), method='nelder-mead', ax=axs[0][2])
sol.optim_multivar(himmelblau, (0, 0), method='bfgs', ax=axs[1][2]);
axs[0][0].set_title('Basin 2D'); axs[0][1].set_title('Valleys 2D'); axs[0][2].set_title('Himmelblau');
axs[0][0].set_ylabel('Nelder-Mead'); axs[1][0].set_ylabel('BFGS');

Homotopy

We define our target function to be $$ f(x,p) = x^2 - 2px + 1 $$ this yields the solutions in $x$ for given $p$ of $$ x^{\ast}(p) = p \pm \sqrt{p^2-1} $$

In [27]:
func = lambda x, p: x**2 - 2*p*x + 1
In [28]:
xlim1 = -5, 5
kargs = dict(color='k', linewidth=1)
fig, ax = plt.subplots(figsize=(7, 4))
viz.plot(lambda x: func(x, -2), *xlim1, ax=ax, zero=None)
viz.plot(lambda x: func(x, -1), *xlim1, ax=ax, zero=None, **kargs)
viz.plot(lambda x: func(x, 0), *xlim1, ax=ax, zero=None, **kargs)
viz.plot(lambda x: func(x, 1), *xlim1, ax=ax, zero=None, **kargs)
viz.plot(lambda x: func(x, 2), *xlim1, ax=ax, zero=None)
viz.plot(lambda x: np.zeros_like(x), *xlim1, ax=ax, zero=None, linestyle='--', **kargs)
ax.set_ylim(-4, 4); ax.set_xlim(*xlim1);

We'll run our homotopy from $p = -2$ to $p = 2$. Note that lift pushes our starting parameter slightly into the complex domain. We could also have just pushed our homotopy equation slightly into complex.

In [29]:
lift = 0.01j
p0, p1 = -2.0, 2.0
In [30]:
par = lambda t: (1-t)*p0 + t*p1
hom0 = lambda x, t: func(x, par(t))
hom = lambda x, t: hom0(x, t) + lift
In [31]:
x0a = p0 - np.sqrt(p0**2-1)
x0b = p0 + np.sqrt(p0**2-1)
print(x0a, x0b)
-3.732050807568877 -0.2679491924311228

Now let's run the homotopy path

In [32]:
xp1, tp1 = sol.homotopy(hom, x0a, dt=0.01)
xp2, tp2 = sol.homotopy(hom, x0b, dt=0.01)
pp1, pp2 = par(tp1), par(tp2)

_, axs = plt.subplots(ncols=2, figsize=(10, 4), gridspec_kw=dict(width_ratios=[2, 1]))
viz.plot_homotopy(pp1, (xp1, xp2), rzero=pp1, czero=np.zeros_like(pp1), axs=axs)
axs[0].set_xlim(-4, 4); axs[1].set_xlim(-2, 2)
axs[0].set_ylim(-2, 2); axs[1].set_ylim(-2, 2);

Looks cool! Now see what happens when lift = 0.0.

In [33]:
xp1, tp1 = sol.homotopy(hom0, x0a, dt=0.01)
xp2, tp2 = sol.homotopy(hom0, x0b, dt=0.01)
pp1, pp2 = par(tp1), par(tp2)

_, axs = plt.subplots(ncols=2, figsize=(10, 4), gridspec_kw=dict(width_ratios=[2, 1]))
viz.plot_homotopy(pp1, (xp1, xp2), rzero=pp1, czero=np.zeros_like(pp1), axs=axs)
axs[0].set_xlim(-4, 4); axs[1].set_xlim(-2, 2)
axs[0].set_ylim(-2, 2); axs[1].set_ylim(-2, 2);

Yikes!

In [ ]: