from __future__ import division
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
#Plotting functions
delta = 0.1
def plot_surface(f, x_min, x_max, y_min, y_max):
x = np.arange(x_min, x_max + delta, delta)
y = np.arange(y_min, y_max + delta, delta)
X1, X2 = np.meshgrid(x, y)
Z = np.array([f(z) for z in zip(X1.ravel(), X2.ravel())])
Z = Z.reshape(len(x), len(y))
plt.contourf(X1, X2, Z, 10, cmap=matplotlib.cm.bone, origin='lower', alpha=0.85)
plt.colorbar()
def show_descent(f, x_hist,f_hist):
plt.subplot(1,2,1)
X1,X2 = zip(*[(x[0], x[1]) for x in x_hist])
plot_surface(f, min(X1+X2)-1, max(X1+X2)+1, min(X1+X2)-1, max(X1+X2)+1)
plt.plot(X1, X2, 'o-', markersize=10)
plt.subplot(1,2,2)
plt.plot(f_hist, 'rx-')
plt.show()
def stepsize(x,p,f,t,c, mode):
'''chooses a stepsize.
different methods: const/sqrt(t)
const/t
linesearch
'''
if mode=="c/sqrt(t)":
return c/np.sqrt(t+1)
elif mode=="c":
return c
elif mode=="c/t":
return c/(t+1)
elif mode=="linesearch":
alpha=1.0
epsilon=10**(-6)
gamma = 0.5
fx = f(x)
print "alpha", alpha
while f(x+alpha*p) > f(x) - epsilon and alpha > epsilon:
alpha *= gamma
print "alpha", alpha
return alpha
else:
assert 0, "not supported stepsize mode "+ mode
def gradientDescent(x_init, f, g, stepsize_rule='c/sqrt(t)', c=0.1 ,max_iters=1000, eps=10**(-6)):
'''minimizes function f with gradient g'''
x = x_init
x_hist = [x]
f_hist = [f(x)]
convergence_hist = []
print "iter",-1,'x=',x, 'f(x)=', f_hist[-1]
for t in xrange(0,max_iters):
p = -g(x) #choose a direction
alpha = stepsize(x, p, f, t, c, stepsize_rule) #choose a stepsize
if alpha == 0: #error handling
break
print "iter",t,'x=',x, 'f(x)=', f(x), 'p=', p, 'alpha=', alpha
x = x + alpha*p #go!
x_hist.append(x) #logging x
f_hist.append(f(x)) #logging function value
if np.linalg.norm(x - x_hist[-2]) < eps: #check convergence
break
return x_hist, f_hist
f = lambda x: x[0]**2 + x[1]**2 + x[0]*x[1] + 3*x[0] + x[1]
g = lambda x: np.array([2*x[0] + x[1] + 3, 2*x[1] + x[0] + 1])
x_init = np.array([3,5])
#First look at a constant stepsize (change c to take larger or smaller steps)
x_hist, f_hist = gradientDescent(x_init, f, g, stepsize_rule="c",c=0.1)
show_descent(f,x_hist,f_hist)
#Decaying stepsize 1/t
x_hist, f_hist = gradientDescent(x_init, f, g, stepsize_rule="c/t",c=1)
show_descent(f,x_hist,f_hist)
#Decaying stepsize of 1/sqrt(t)
x_hist, f_hist = gradientDescent(x_init, f, g, stepsize_rule="c/sqrt(t)",c=0.1)
show_descent(f,x_hist,f_hist)
#backtracking linesearch
x_hist, f_hist = gradientDescent(x_init, f, g, stepsize_rule="linesearch",c=0.1)
show_descent(f,x_hist,f_hist)