Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save zunction/0df29e252bf1f9aab1d90a108cff35f4 to your computer and use it in GitHub Desktop.
Save zunction/0df29e252bf1f9aab1d90a108cff35f4 to your computer and use it in GitHub Desktop.

Revisions

  1. @xiaohan2012 xiaohan2012 created this gist Jan 29, 2015.
    181 changes: 181 additions & 0 deletions compare_adagrad_adadelta.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,181 @@
    """
    Comparing adagrad, adadelta and constant learning in gradient descent(the seddle point function y^2 - x^2)
    Reference:
    1. comparison on several learning rate update scheme: http://ml.memect.com/archive/2014-12-12/short.html#3786866375172817
    2. Saddle point, http://en.wikipedia.org/wiki/Saddle_point
    """
    import numpy as np
    import theano
    import theano.tensor as T

    rho = 0.95
    epsilon = 0.00001
    gamma = 0.1

    const_lr = 0.01

    init_x = [0.1, 0.1]
    x = theano.shared(
    np.array(init_x, dtype = theano.config.floatX),
    borrow = True,
    name = "x"
    )

    tolorate = 0.01

    params = [x]
    param_shapes = [(2,)]

    # cost = 0.5 * (x[0]-2) ** 2 + (x[1]-2) ** 2
    cost = x[0] ** 2 - x[1] ** 2

    param_grads = [T.grad(cost, param) for param in params]

    def make_func(x, cost, updates, init_x):
    x.set_value(init_x)
    f = theano.function(
    inputs = [],
    outputs = [x, cost],
    updates = updates
    )
    return f

    def simulate(f, n_epoch_max = 100):
    epoch = 0
    used_epochs = 0
    xs = []
    print "##################"
    while epoch < n_epoch_max:
    x_val, cost_val = f()
    xs.append(x_val)
    # if abs(cost_val) < tolorate:
    # break
    epoch += 1
    used_epochs += 1
    return xs, used_epochs


    ###############
    # ADADELTA #
    ###############
    print "Using AdaDelta with rho = %f and epsilon = %f" %(rho, epsilon)
    egs = [
    theano.shared(
    value = np.zeros(param_shape,
    dtype = theano.config.floatX
    ),
    borrow = True,
    name = "Eg:" + param.name
    )
    for param_shape, param in zip(param_shapes, params)
    ]

    exs = [
    theano.shared(
    value = np.zeros(param_shape,
    dtype = theano.config.floatX
    ),
    borrow = True,
    name = "Ex:" + param.name
    )
    for param_shape, param in zip(param_shapes, params)
    ]

    new_egs = [
    rho * eg + (1 - rho) * g ** 2
    for eg, g in zip(egs, param_grads)
    ]

    delta_x = [
    -(T.sqrt(ex + epsilon) / T.sqrt(new_eg + epsilon)) * g
    for new_eg, ex, g in zip(new_egs, exs, param_grads)
    ]
    new_exs = [
    rho * ex + (1 - rho) * (dx ** 2)
    for ex, dx in zip(exs, delta_x)
    ]

    egs_updates = zip(egs, new_egs)
    exs_updates = zip(exs, new_exs)
    param_updates = [
    (p, p + dx)
    for dx, g, p in zip(delta_x, param_grads, params)
    ]

    updates = egs_updates + exs_updates + param_updates

    f = make_func(x, cost, updates, init_x)
    adadelta_xs, adadelta_epochs = simulate(f)

    ##############
    # ADAGRAD #
    ##############
    print "Using AdaGrad with gamma = %f and epsilon = %f" %(gamma, epsilon)
    grad_hists = [
    theano.shared(
    value = np.zeros(param_shape,
    dtype = theano.config.floatX
    ),
    borrow = True,
    name = "grad_hist:" + param.name
    )
    for param_shape, param in zip(param_shapes, params)
    ]

    new_grad_hists = [
    g_hist + g ** 2
    for g_hist, g in zip(grad_hists, param_grads)
    ]

    param_updates = [
    (param, param - theano.printing.Print("lr")(gamma * epsilon / (T.sqrt(g_hist) + epsilon)) * param_grad)
    for param, param_grad in zip(params, param_grads)
    ]

    grad_hist_update = zip(grad_hists, new_grad_hists)

    updates = grad_hist_update + param_updates

    f = make_func(x, cost, updates, init_x)
    adagrad_xs, adagrad_epochs = simulate(f)

    ###############
    # constant lr #
    ###############
    print "Usin constant learning rate %f" %(const_lr)

    updates = [
    (param, param - const_lr * param_grad)
    for param, param_grad in zip(params, param_grads)
    ]

    f = make_func(x, cost, updates, init_x)
    const_lr_xs, const_lr_epochs = simulate(f)

    from matplotlib import pyplot as plt

    def myplot(data, style, title, plot_number, total):
    plt.subplot(1,total,plot_number)
    x, y = zip(*data)
    plt.plot(x, y, 'ro-')
    plt.title(title)
    plt.xlim([-10, 10]); plt.ylim([-10, 10])

    myplot(adadelta_xs,
    'ro-',
    "AdaDelta(%d epochs)" %(adadelta_epochs),
    1, 3)

    myplot(adagrad_xs,
    'ro-',
    "AdaGrad(%d epochs)" %(adagrad_epochs),
    2, 3)

    myplot(const_lr_xs,
    'ro-',
    "ConstLR(%d epochs)" %(const_lr_epochs),
    3, 3)

    plt.show()