Forked from xiaohan2012/compare_adagrad_adadelta.py
Created
January 11, 2017 05:50
-
-
Save zunction/0df29e252bf1f9aab1d90a108cff35f4 to your computer and use it in GitHub Desktop.
Revisions
-
xiaohan2012 created this gist
Jan 29, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,181 @@ """ Comparing adagrad, adadelta and constant learning in gradient descent(the seddle point function y^2 - x^2) Reference: 1. comparison on several learning rate update scheme: http://ml.memect.com/archive/2014-12-12/short.html#3786866375172817 2. Saddle point, http://en.wikipedia.org/wiki/Saddle_point """ import numpy as np import theano import theano.tensor as T rho = 0.95 epsilon = 0.00001 gamma = 0.1 const_lr = 0.01 init_x = [0.1, 0.1] x = theano.shared( np.array(init_x, dtype = theano.config.floatX), borrow = True, name = "x" ) tolorate = 0.01 params = [x] param_shapes = [(2,)] # cost = 0.5 * (x[0]-2) ** 2 + (x[1]-2) ** 2 cost = x[0] ** 2 - x[1] ** 2 param_grads = [T.grad(cost, param) for param in params] def make_func(x, cost, updates, init_x): x.set_value(init_x) f = theano.function( inputs = [], outputs = [x, cost], updates = updates ) return f def simulate(f, n_epoch_max = 100): epoch = 0 used_epochs = 0 xs = [] print "##################" while epoch < n_epoch_max: x_val, cost_val = f() xs.append(x_val) # if abs(cost_val) < tolorate: # break epoch += 1 used_epochs += 1 return xs, used_epochs ############### # ADADELTA # ############### print "Using AdaDelta with rho = %f and epsilon = %f" %(rho, epsilon) egs = [ theano.shared( value = np.zeros(param_shape, dtype = theano.config.floatX ), borrow = True, name = "Eg:" + param.name ) for param_shape, param in zip(param_shapes, params) ] exs = [ theano.shared( value = np.zeros(param_shape, dtype = theano.config.floatX ), borrow = True, name = "Ex:" + param.name ) for param_shape, param in zip(param_shapes, params) ] new_egs = [ rho * eg + (1 - rho) * g ** 2 for eg, g in zip(egs, param_grads) ] delta_x = [ -(T.sqrt(ex + epsilon) / T.sqrt(new_eg + epsilon)) * g for new_eg, ex, g in zip(new_egs, exs, param_grads) ] new_exs = [ rho * ex + (1 - rho) * (dx ** 2) for ex, dx in zip(exs, delta_x) ] egs_updates = zip(egs, new_egs) exs_updates = zip(exs, new_exs) param_updates = [ (p, p + dx) for dx, g, p in zip(delta_x, param_grads, params) ] updates = egs_updates + exs_updates + param_updates f = make_func(x, cost, updates, init_x) adadelta_xs, adadelta_epochs = simulate(f) ############## # ADAGRAD # ############## print "Using AdaGrad with gamma = %f and epsilon = %f" %(gamma, epsilon) grad_hists = [ theano.shared( value = np.zeros(param_shape, dtype = theano.config.floatX ), borrow = True, name = "grad_hist:" + param.name ) for param_shape, param in zip(param_shapes, params) ] new_grad_hists = [ g_hist + g ** 2 for g_hist, g in zip(grad_hists, param_grads) ] param_updates = [ (param, param - theano.printing.Print("lr")(gamma * epsilon / (T.sqrt(g_hist) + epsilon)) * param_grad) for param, param_grad in zip(params, param_grads) ] grad_hist_update = zip(grad_hists, new_grad_hists) updates = grad_hist_update + param_updates f = make_func(x, cost, updates, init_x) adagrad_xs, adagrad_epochs = simulate(f) ############### # constant lr # ############### print "Usin constant learning rate %f" %(const_lr) updates = [ (param, param - const_lr * param_grad) for param, param_grad in zip(params, param_grads) ] f = make_func(x, cost, updates, init_x) const_lr_xs, const_lr_epochs = simulate(f) from matplotlib import pyplot as plt def myplot(data, style, title, plot_number, total): plt.subplot(1,total,plot_number) x, y = zip(*data) plt.plot(x, y, 'ro-') plt.title(title) plt.xlim([-10, 10]); plt.ylim([-10, 10]) myplot(adadelta_xs, 'ro-', "AdaDelta(%d epochs)" %(adadelta_epochs), 1, 3) myplot(adagrad_xs, 'ro-', "AdaGrad(%d epochs)" %(adagrad_epochs), 2, 3) myplot(const_lr_xs, 'ro-', "ConstLR(%d epochs)" %(const_lr_epochs), 3, 3) plt.show()