zunction · January 11, 2017 05:50 · Jan 29, 2015
diff --git a/compare_adagrad_adadelta.py b/compare_adagrad_adadelta.py
@@ -0,0 +1,181 @@
+"""
+Comparing adagrad, adadelta and constant learning in gradient descent(the seddle point function y^2 - x^2)
+
+Reference:
+1. comparison on several learning rate update scheme: http://ml.memect.com/archive/2014-12-12/short.html#3786866375172817
+2. Saddle point, http://en.wikipedia.org/wiki/Saddle_point
+
+"""
+import numpy as np
+import theano
+import theano.tensor as T
+
+rho = 0.95
+epsilon = 0.00001
+gamma = 0.1
+
+const_lr = 0.01
+
+init_x = [0.1, 0.1]
+x = theano.shared(
+    np.array(init_x, dtype = theano.config.floatX), 
+    borrow = True,
+    name = "x"
+)
+
+tolorate = 0.01
+
+params = [x]
+param_shapes = [(2,)]
+
+# cost = 0.5 * (x[0]-2) ** 2 + (x[1]-2) ** 2
+cost = x[0] ** 2 - x[1] ** 2
+
+param_grads = [T.grad(cost, param) for param in params]
+
+def make_func(x, cost, updates, init_x):
+    x.set_value(init_x)
+    f = theano.function(
+        inputs = [], 
+        outputs = [x, cost], 
+        updates = updates
+    )
+    return f
+
+def simulate(f, n_epoch_max = 100):
+    epoch = 0
+    used_epochs = 0
+    xs = []
+    print "##################"
+    while epoch < n_epoch_max:
+        x_val, cost_val = f()
+        xs.append(x_val)
+        # if abs(cost_val) < tolorate:
+        #     break
+        epoch += 1
+        used_epochs += 1
+    return xs, used_epochs
+
+
+###############
+# ADADELTA    #
+###############
+print "Using AdaDelta with rho = %f and epsilon = %f" %(rho, epsilon)
+egs = [
+    theano.shared(
+        value = np.zeros(param_shape,
+                         dtype = theano.config.floatX
+                     ),
+        borrow = True,
+        name = "Eg:" + param.name
+    )
+    for param_shape, param in zip(param_shapes, params)
+]
+
+exs = [
+    theano.shared(
+        value = np.zeros(param_shape,
+                         dtype = theano.config.floatX
+                     ),
+        borrow = True,        
+        name = "Ex:" + param.name
+    )
+    for param_shape, param in zip(param_shapes, params)
+]
+
+new_egs = [
+    rho * eg + (1 - rho) * g ** 2
+    for eg, g in zip(egs, param_grads)
+]
+
+delta_x = [
+    -(T.sqrt(ex + epsilon) / T.sqrt(new_eg + epsilon)) * g
+    for new_eg, ex, g in zip(new_egs, exs, param_grads)
+]
+new_exs = [
+    rho * ex + (1 - rho) * (dx ** 2)
+    for ex, dx in zip(exs, delta_x)
+]
+
+egs_updates = zip(egs, new_egs)
+exs_updates = zip(exs, new_exs)
+param_updates = [
+    (p, p + dx)
+    for dx, g, p in zip(delta_x, param_grads, params)
+]
+
+updates = egs_updates + exs_updates + param_updates
+
+f = make_func(x, cost, updates, init_x)
+adadelta_xs, adadelta_epochs = simulate(f)
+
+##############
+# ADAGRAD    #
+##############
+print "Using AdaGrad with gamma = %f and epsilon = %f" %(gamma, epsilon)
+grad_hists = [
+            theano.shared(
+                value = np.zeros(param_shape,
+                                 dtype = theano.config.floatX
+                             ),
+                borrow = True,        
+                name = "grad_hist:" + param.name
+            )
+            for param_shape, param in zip(param_shapes, params)
+        ]
+
+new_grad_hists = [
+    g_hist + g ** 2
+    for g_hist, g in zip(grad_hists, param_grads)
+]
+
+param_updates = [
+    (param, param - theano.printing.Print("lr")(gamma * epsilon / (T.sqrt(g_hist) + epsilon)) * param_grad)
+    for param, param_grad in zip(params, param_grads)
+]
+
+grad_hist_update = zip(grad_hists, new_grad_hists)
+
+updates = grad_hist_update + param_updates
+
+f = make_func(x, cost, updates, init_x)
+adagrad_xs, adagrad_epochs = simulate(f)
+
+###############
+# constant lr #
+###############
+print "Usin constant learning rate %f" %(const_lr)
+
+updates = [
+    (param, param - const_lr * param_grad)
+    for param, param_grad in zip(params, param_grads)
+]
+
+f = make_func(x, cost, updates, init_x)
+const_lr_xs, const_lr_epochs = simulate(f)
+
+from matplotlib import pyplot  as plt
+
+def myplot(data, style, title, plot_number, total):
+    plt.subplot(1,total,plot_number)
+    x, y = zip(*data)
+    plt.plot(x, y, 'ro-')
+    plt.title(title)
+    plt.xlim([-10, 10]); plt.ylim([-10, 10])
+
+myplot(adadelta_xs, 
+       'ro-', 
+       "AdaDelta(%d epochs)" %(adadelta_epochs), 
+       1, 3)
+
+myplot(adagrad_xs, 
+       'ro-', 
+       "AdaGrad(%d epochs)" %(adagrad_epochs), 
+       2, 3)
+
+myplot(const_lr_xs, 
+       'ro-', 
+       "ConstLR(%d epochs)" %(const_lr_epochs), 
+       3, 3)
+
+plt.show()