Lecture 1: Optimization

In [1]:
import numpy as np
import scipy as sp
import scipy.optimize as opt
import matplotlib.pyplot as plt
In [2]:
# this requires clean.mplstyle
plt.style.use('clean')
In [3]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
In [4]:
import tools.visual as viz
import tools.solve as sol

Globals

In [5]:
xlim = -2, 2

Test Functions

In [6]:
def linear(x):
    return x
In [7]:
def quadratic(x):
    return x**2 - 4*x
In [8]:
def cubic(x):
    return x**3 - x - 1
In [9]:
fig, axs = plt.subplots(ncols=3, figsize=(12, 3.5))
viz.plot(linear, *xlim, zero=0, color='k', ax=axs[0])
viz.plot(quadratic, *xlim, zero=0, color='k', ax=axs[1])
viz.plot(cubic, *xlim, zero=0, color='k', ax=axs[2])
axs[0].set_title('Linear'); axs[1].set_title('Quadratic'); axs[2].set_title('Cubic');
No description has been provided for this image

Solving Methods

Here we just define the function that updates from one iteration to the next. Note that newton uses a finite difference approximation for the derivative.

In [10]:
def bisect(f, ab):
    a, b = ab
    c = 0.5*(a+b)
    fa, fb, fc = f(a), f(b), f(c)
    if fa*fc <= 0:
        b = c
    else:
        a = c
    return a, b
In [11]:
def secant(f, ab):
    a, b = ab
    fa, fb = f(a), f(b)
    m = (fb-fa)/(b-a)
    c = b - fb/m
    fc = f(c)
    return b, c
In [12]:
def newton(f, x, ϵ=1e-8):
     = x + ϵ
    fx,  = f(x), f()
    fp = (-fx)/ϵ
    xp = x - fx/fp
    return xp

These are all run with initial states the converge!

In [13]:
fig, axs = plt.subplots(nrows=3, ncols=3, figsize=(13, 10))
sol.solve_univar(bisect, linear, (-1, 1.7), xlim, ax=axs[0][0])
sol.solve_univar(secant, linear, (-1, 1.7), xlim, ax=axs[1][0])
sol.solve_univar(newton, linear, -1, xlim, ax=axs[2][0])
sol.solve_univar(bisect, quadratic, (-1, 1.7), xlim, ax=axs[0][1])
sol.solve_univar(secant, quadratic, (-1, 1.7), xlim, ax=axs[1][1])
sol.solve_univar(newton, quadratic, -1, xlim, ax=axs[2][1])
sol.solve_univar(bisect, cubic, (-1, 1.7), xlim, ax=axs[0][2])
sol.solve_univar(secant, cubic, (0.3, 1.7), xlim, ax=axs[1][2])
sol.solve_univar(newton, cubic, 1, xlim, ax=axs[2][2]);
axs[0][0].set_title('Linear'); axs[0][1].set_title('Quadratic'); axs[0][2].set_title('Cubic');
axs[0][0].set_ylabel('Bisection Method'); axs[1][0].set_ylabel('Secant Method');
axs[2][0].set_ylabel('Newton\'s Method');
No description has been provided for this image

For certain initial states, secant and newton go for some wild rides, but they usually seem to converge.

In [14]:
fig, axs = plt.subplots(ncols=2, figsize=(10, 3.5))
sol.solve_univar(newton, cubic, -1, (-4, 4), ax=axs[0])
sol.solve_univar(secant, cubic, (-2, -1), (-23, 10), ax=axs[1]);
axs[0].set_title('Newton\'s Method'); axs[1].set_title('Secant Method');
axs[0].set_ylabel('Cubic');
No description has been provided for this image

Multivariate Solving

I'm going to use the builtin scipy methods here, rather than rolling my own. Because the functions defined above will also operate pointwise on vectors, we could pass them to solve diagonal systems. But that's essentially equivalent to solving $N$ unidimensional system. Below is a simple N-dimensional system of equations implementing $$ f(x,y) = \left[\begin{array}{c} x^3 + x - y^3 - y \\ x^3 + x + y^3 + y \end{array}\right] = 0 $$ The first component has solutions of the form $x = y$, while the second component has solutions of the form $x = -y$, meaning the joint system has the unique solutions $(x, y) = (0, 0)$.

In [15]:
def goodcube(z):
    return z**3 + z
def multifun(xy):
    x, y = xy
    return goodcube(x) - goodcube(y), goodcube(x) + goodcube(y)
In [16]:
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(10, 7.5))
sol.solve_multivar(multifun, (0.1, -0.1), 'broyden1', axs=axs[0])
sol.solve_multivar(multifun, (0.1, -0.1), 'anderson', axs=axs[1]);
axs[0][0].set_title('Input Variable'); axs[0][1].set_title('Function Value');
axs[0][0].set_ylabel('Broyden\'s Method'); axs[1][0].set_ylabel('Anderson Mixing');
No description has been provided for this image

Optimization Methods

In [17]:
def basin1(x):
    return x**2
In [18]:
def valleys1(x):
    return x**4 - 3*x**2 - x + 2
In [19]:
fig, axs = plt.subplots(ncols=2, figsize=(10.5, 3))
viz.plot(basin1, *xlim, color='k', ax=axs[0])
viz.plot(valleys1, *xlim, color='k', ax=axs[1])
axs[0].set_title('Basin'); axs[1].set_title('Valleys');
No description has been provided for this image
In [20]:
phi = sp.constants.golden
def golden(f, ab):
    a, b = ab
    step = (b-a)/phi
    c = b - step
    d = a + step
    if f(c) < f(d):
        b = d
    else:
        a = c
    return a, b
In [21]:
def gradient(f, x, Δ=0.1, ϵ=1e-8):
     = x + ϵ
    fx,  = f(x), f()
    fp = (-fx)/ϵ
    xp = x - Δ*fp
    return xp
In [22]:
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(10, 7))
sol.optim_univar(golden, basin1, (-1.5, 1.5), xlim, vmin=None, ax=axs[0][0])
sol.optim_univar(gradient, basin1, 1, xlim, vmin=None, ax=axs[1][0])
sol.optim_univar(golden, valleys1, (-1.8, 1.8), xlim, vmin=None, ax=axs[0][1])
sol.optim_univar(gradient, valleys1, 0, xlim, vmin=None, ax=axs[1][1])
axs[0][0].set_title('Basin'); axs[0][1].set_title('Valleys');
axs[0][0].set_ylabel('Golden Section'); axs[1][0].set_ylabel('Gradient Descent');
No description has been provided for this image

Multivariate Optimization

Check out this list for some real spoilers: https://en.wikipedia.org/wiki/Test_functions_for_optimization

In [23]:
def basinN(x):
    return np.sum(basin1(x))
In [24]:
def valleysN(x):
    return np.sum(valleys1(x))
In [25]:
def himmelblau(xy):
    x, y = xy
    return (x**2+y-11)**2 + (x+y**2-7)**2
In [26]:
fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(12, 7))
sol.optim_multivar(basinN, (1, 1), method='nelder-mead', ax=axs[0][0])
sol.optim_multivar(basinN, (1, 1), method='bfgs', ax=axs[1][0])
sol.optim_multivar(valleysN, (0, 0), method='nelder-mead', ax=axs[0][1])
sol.optim_multivar(valleysN, (0, 0), method='bfgs', ax=axs[1][1])
sol.optim_multivar(himmelblau, (0, 0), method='nelder-mead', ax=axs[0][2])
sol.optim_multivar(himmelblau, (0, 0), method='bfgs', ax=axs[1][2]);
axs[0][0].set_title('Basin 2D'); axs[0][1].set_title('Valleys 2D'); axs[0][2].set_title('Himmelblau');
axs[0][0].set_ylabel('Nelder-Mead'); axs[1][0].set_ylabel('BFGS');
No description has been provided for this image

Homotopy

We define our target function to be $$ f(x,p) = x^2 - 2px + 1 $$ this yields the solutions in $x$ for given $p$ of $$ x^{\ast}(p) = p \pm \sqrt{p^2-1} $$

In [27]:
func = lambda x, p: x**2 - 2*p*x + 1
In [28]:
xlim1 = -5, 5
kargs = dict(color='k', linewidth=1)
fig, ax = plt.subplots(figsize=(7, 4))
viz.plot(lambda x: func(x, -2), *xlim1, ax=ax, zero=None)
viz.plot(lambda x: func(x, -1), *xlim1, ax=ax, zero=None, **kargs)
viz.plot(lambda x: func(x, 0), *xlim1, ax=ax, zero=None, **kargs)
viz.plot(lambda x: func(x, 1), *xlim1, ax=ax, zero=None, **kargs)
viz.plot(lambda x: func(x, 2), *xlim1, ax=ax, zero=None)
viz.plot(lambda x: np.zeros_like(x), *xlim1, ax=ax, zero=None, linestyle='--', **kargs)
ax.set_ylim(-4, 4); ax.set_xlim(*xlim1);
No description has been provided for this image

We'll run our homotopy from $p = -2$ to $p = 2$. Note that lift pushes our starting parameter slightly into the complex domain. We could also have just pushed our homotopy equation slightly into complex.

In [29]:
lift = 0.01j
p0, p1 = -2.0, 2.0
In [30]:
par = lambda t: (1-t)*p0 + t*p1
hom0 = lambda x, t: func(x, par(t))
hom = lambda x, t: hom0(x, t) + lift
In [31]:
x0a = p0 - np.sqrt(p0**2-1)
x0b = p0 + np.sqrt(p0**2-1)
print(x0a, x0b)
-3.732050807568877 -0.2679491924311228

Now let's run the homotopy path

In [32]:
xp1, tp1 = sol.homotopy(hom, x0a, dt=0.01)
xp2, tp2 = sol.homotopy(hom, x0b, dt=0.01)
pp1, pp2 = par(tp1), par(tp2)

_, axs = plt.subplots(ncols=2, figsize=(10, 4), gridspec_kw=dict(width_ratios=[2, 1]))
viz.plot_homotopy(pp1, (xp1, xp2), rzero=pp1, czero=np.zeros_like(pp1), axs=axs)
axs[0].set_xlim(-4, 4); axs[1].set_xlim(-2, 2)
axs[0].set_ylim(-2, 2); axs[1].set_ylim(-2, 2);
No description has been provided for this image

Looks cool! Now see what happens when lift = 0.0.

In [33]:
xp1, tp1 = sol.homotopy(hom0, x0a, dt=0.01)
xp2, tp2 = sol.homotopy(hom0, x0b, dt=0.01)
pp1, pp2 = par(tp1), par(tp2)

_, axs = plt.subplots(ncols=2, figsize=(10, 4), gridspec_kw=dict(width_ratios=[2, 1]))
viz.plot_homotopy(pp1, (xp1, xp2), rzero=pp1, czero=np.zeros_like(pp1), axs=axs)
axs[0].set_xlim(-4, 4); axs[1].set_xlim(-2, 2)
axs[0].set_ylim(-2, 2); axs[1].set_ylim(-2, 2);
No description has been provided for this image

Yikes!

In [ ]: