import numpy as np
import scipy as sp
import scipy.optimize as opt
import matplotlib.pyplot as plt
# this requires clean.mplstyle
plt.style.use('clean')
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import tools.visual as viz
import tools.solve as sol
xlim = -2, 2
def linear(x):
return x
def quadratic(x):
return x**2 - 4*x
def cubic(x):
return x**3 - x - 1
fig, axs = plt.subplots(ncols=3, figsize=(12, 3.5))
viz.plot(linear, *xlim, zero=0, color='k', ax=axs[0])
viz.plot(quadratic, *xlim, zero=0, color='k', ax=axs[1])
viz.plot(cubic, *xlim, zero=0, color='k', ax=axs[2])
axs[0].set_title('Linear'); axs[1].set_title('Quadratic'); axs[2].set_title('Cubic');
Here we just define the function that updates from one iteration to the next. Note that newton
uses a finite difference approximation for the derivative.
def bisect(f, ab):
a, b = ab
c = 0.5*(a+b)
fa, fb, fc = f(a), f(b), f(c)
if fa*fc <= 0:
b = c
else:
a = c
return a, b
def secant(f, ab):
a, b = ab
fa, fb = f(a), f(b)
m = (fb-fa)/(b-a)
c = b - fb/m
fc = f(c)
return b, c
def newton(f, x, ϵ=1e-8):
xϵ = x + ϵ
fx, fϵ = f(x), f(xϵ)
fp = (fϵ-fx)/ϵ
xp = x - fx/fp
return xp
These are all run with initial states the converge!
fig, axs = plt.subplots(nrows=3, ncols=3, figsize=(13, 10))
sol.solve_univar(bisect, linear, (-1, 1.7), xlim, ax=axs[0][0])
sol.solve_univar(secant, linear, (-1, 1.7), xlim, ax=axs[1][0])
sol.solve_univar(newton, linear, -1, xlim, ax=axs[2][0])
sol.solve_univar(bisect, quadratic, (-1, 1.7), xlim, ax=axs[0][1])
sol.solve_univar(secant, quadratic, (-1, 1.7), xlim, ax=axs[1][1])
sol.solve_univar(newton, quadratic, -1, xlim, ax=axs[2][1])
sol.solve_univar(bisect, cubic, (-1, 1.7), xlim, ax=axs[0][2])
sol.solve_univar(secant, cubic, (0.3, 1.7), xlim, ax=axs[1][2])
sol.solve_univar(newton, cubic, 1, xlim, ax=axs[2][2]);
axs[0][0].set_title('Linear'); axs[0][1].set_title('Quadratic'); axs[0][2].set_title('Cubic');
axs[0][0].set_ylabel('Bisection Method'); axs[1][0].set_ylabel('Secant Method');
axs[2][0].set_ylabel('Newton\'s Method');
For certain initial states, secant
and newton
go for some wild rides, but they usually seem to converge.
fig, axs = plt.subplots(ncols=2, figsize=(10, 3.5))
sol.solve_univar(newton, cubic, -1, (-4, 4), ax=axs[0])
sol.solve_univar(secant, cubic, (-2, -1), (-23, 10), ax=axs[1]);
axs[0].set_title('Newton\'s Method'); axs[1].set_title('Secant Method');
axs[0].set_ylabel('Cubic');
I'm going to use the builtin scipy
methods here, rather than rolling my own. Because the functions defined above will also operate pointwise on vectors, we could pass them to solve diagonal systems. But that's essentially equivalent to solving $N$ unidimensional system. Below is a simple N-dimensional system of equations implementing
$$ f(x,y) = \left[\begin{array}{c}
x^3 + x - y^3 - y \\
x^3 + x + y^3 + y
\end{array}\right] = 0 $$
The first component has solutions of the form $x = y$, while the second component has solutions of the form $x = -y$, meaning the joint system has the unique solutions $(x, y) = (0, 0)$.
def goodcube(z):
return z**3 + z
def multifun(xy):
x, y = xy
return goodcube(x) - goodcube(y), goodcube(x) + goodcube(y)
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(10, 7.5))
sol.solve_multivar(multifun, (0.1, -0.1), 'broyden1', axs=axs[0])
sol.solve_multivar(multifun, (0.1, -0.1), 'anderson', axs=axs[1]);
axs[0][0].set_title('Input Variable'); axs[0][1].set_title('Function Value');
axs[0][0].set_ylabel('Broyden\'s Method'); axs[1][0].set_ylabel('Anderson Mixing');
def basin1(x):
return x**2
def valleys1(x):
return x**4 - 3*x**2 - x + 2
fig, axs = plt.subplots(ncols=2, figsize=(10.5, 3))
viz.plot(basin1, *xlim, color='k', ax=axs[0])
viz.plot(valleys1, *xlim, color='k', ax=axs[1])
axs[0].set_title('Basin'); axs[1].set_title('Valleys');
phi = sp.constants.golden
def golden(f, ab):
a, b = ab
step = (b-a)/phi
c = b - step
d = a + step
if f(c) < f(d):
b = d
else:
a = c
return a, b
def gradient(f, x, Δ=0.1, ϵ=1e-8):
xϵ = x + ϵ
fx, fϵ = f(x), f(xϵ)
fp = (fϵ-fx)/ϵ
xp = x - Δ*fp
return xp
fig, axs = plt.subplots(nrows=2, ncols=2, figsize=(10, 7))
sol.optim_univar(golden, basin1, (-1.5, 1.5), xlim, vmin=None, ax=axs[0][0])
sol.optim_univar(gradient, basin1, 1, xlim, vmin=None, ax=axs[1][0])
sol.optim_univar(golden, valleys1, (-1.8, 1.8), xlim, vmin=None, ax=axs[0][1])
sol.optim_univar(gradient, valleys1, 0, xlim, vmin=None, ax=axs[1][1])
axs[0][0].set_title('Basin'); axs[0][1].set_title('Valleys');
axs[0][0].set_ylabel('Golden Section'); axs[1][0].set_ylabel('Gradient Descent');
Check out this list for some real spoilers: https://en.wikipedia.org/wiki/Test_functions_for_optimization
def basinN(x):
return np.sum(basin1(x))
def valleysN(x):
return np.sum(valleys1(x))
def himmelblau(xy):
x, y = xy
return (x**2+y-11)**2 + (x+y**2-7)**2
fig, axs = plt.subplots(nrows=2, ncols=3, figsize=(12, 7))
sol.optim_multivar(basinN, (1, 1), method='nelder-mead', ax=axs[0][0])
sol.optim_multivar(basinN, (1, 1), method='bfgs', ax=axs[1][0])
sol.optim_multivar(valleysN, (0, 0), method='nelder-mead', ax=axs[0][1])
sol.optim_multivar(valleysN, (0, 0), method='bfgs', ax=axs[1][1])
sol.optim_multivar(himmelblau, (0, 0), method='nelder-mead', ax=axs[0][2])
sol.optim_multivar(himmelblau, (0, 0), method='bfgs', ax=axs[1][2]);
axs[0][0].set_title('Basin 2D'); axs[0][1].set_title('Valleys 2D'); axs[0][2].set_title('Himmelblau');
axs[0][0].set_ylabel('Nelder-Mead'); axs[1][0].set_ylabel('BFGS');
We define our target function to be $$ f(x,p) = x^2 - 2px + 1 $$ this yields the solutions in $x$ for given $p$ of $$ x^{\ast}(p) = p \pm \sqrt{p^2-1} $$
func = lambda x, p: x**2 - 2*p*x + 1
xlim1 = -5, 5
kargs = dict(color='k', linewidth=1)
fig, ax = plt.subplots(figsize=(7, 4))
viz.plot(lambda x: func(x, -2), *xlim1, ax=ax, zero=None)
viz.plot(lambda x: func(x, -1), *xlim1, ax=ax, zero=None, **kargs)
viz.plot(lambda x: func(x, 0), *xlim1, ax=ax, zero=None, **kargs)
viz.plot(lambda x: func(x, 1), *xlim1, ax=ax, zero=None, **kargs)
viz.plot(lambda x: func(x, 2), *xlim1, ax=ax, zero=None)
viz.plot(lambda x: np.zeros_like(x), *xlim1, ax=ax, zero=None, linestyle='--', **kargs)
ax.set_ylim(-4, 4); ax.set_xlim(*xlim1);
We'll run our homotopy from $p = -2$ to $p = 2$. Note that lift
pushes our starting parameter slightly into the complex domain. We could also have just pushed our homotopy equation slightly into complex.
lift = 0.01j
p0, p1 = -2.0, 2.0
par = lambda t: (1-t)*p0 + t*p1
hom0 = lambda x, t: func(x, par(t))
hom = lambda x, t: hom0(x, t) + lift
x0a = p0 - np.sqrt(p0**2-1)
x0b = p0 + np.sqrt(p0**2-1)
print(x0a, x0b)
Now let's run the homotopy path
xp1, tp1 = sol.homotopy(hom, x0a, dt=0.01)
xp2, tp2 = sol.homotopy(hom, x0b, dt=0.01)
pp1, pp2 = par(tp1), par(tp2)
_, axs = plt.subplots(ncols=2, figsize=(10, 4), gridspec_kw=dict(width_ratios=[2, 1]))
viz.plot_homotopy(pp1, (xp1, xp2), rzero=pp1, czero=np.zeros_like(pp1), axs=axs)
axs[0].set_xlim(-4, 4); axs[1].set_xlim(-2, 2)
axs[0].set_ylim(-2, 2); axs[1].set_ylim(-2, 2);
Looks cool! Now see what happens when lift = 0.0
.
xp1, tp1 = sol.homotopy(hom0, x0a, dt=0.01)
xp2, tp2 = sol.homotopy(hom0, x0b, dt=0.01)
pp1, pp2 = par(tp1), par(tp2)
_, axs = plt.subplots(ncols=2, figsize=(10, 4), gridspec_kw=dict(width_ratios=[2, 1]))
viz.plot_homotopy(pp1, (xp1, xp2), rzero=pp1, czero=np.zeros_like(pp1), axs=axs)
axs[0].set_xlim(-4, 4); axs[1].set_xlim(-2, 2)
axs[0].set_ylim(-2, 2); axs[1].set_ylim(-2, 2);
Yikes!