Skip to content

Instantly share code, notes, and snippets.

@kasakh
Forked from jcmgray/sgd-for-scipy.py
Created January 11, 2022 10:45
Show Gist options
  • Select an option

  • Save kasakh/399e693943fdf1c624cbb280b617b68c to your computer and use it in GitHub Desktop.

Select an option

Save kasakh/399e693943fdf1c624cbb280b617b68c to your computer and use it in GitHub Desktop.

Revisions

  1. @jcmgray jcmgray created this gist Jun 18, 2020.
    108 changes: 108 additions & 0 deletions sgd-for-scipy.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,108 @@
    import numpy as np
    from scipy.optimize import OptimizeResult


    def sgd(
    fun,
    x0,
    jac,
    args=(),
    learning_rate=0.001,
    mass=0.9,
    startiter=0,
    maxiter=1000,
    callback=None,
    **kwargs
    ):
    """``scipy.optimize.minimize`` compatible implementation of stochastic
    gradient descent with momentum.
    Adapted from ``autograd/misc/optimizers.py``.
    """
    x = x0
    velocity = np.zeros_like(x)

    for i in range(startiter, startiter + maxiter):
    g = jac(x)

    if callback and callback(x):
    break

    velocity = mass * velocity - (1.0 - mass) * g
    x = x + learning_rate * velocity

    i += 1
    return OptimizeResult(x=x, fun=fun(x), jac=g, nit=i, nfev=i, success=True)


    def rmsprop(
    fun,
    x0,
    jac,
    args=(),
    learning_rate=0.1,
    gamma=0.9,
    eps=1e-8,
    startiter=0,
    maxiter=1000,
    callback=None,
    **kwargs
    ):
    """``scipy.optimize.minimize`` compatible implementation of root mean
    squared prop: See Adagrad paper for details.
    Adapted from ``autograd/misc/optimizers.py``.
    """
    x = x0
    avg_sq_grad = np.ones_like(x)

    for i in range(startiter, startiter + maxiter):
    g = jac(x)

    if callback and callback(x):
    break

    avg_sq_grad = avg_sq_grad * gamma + g**2 * (1 - gamma)
    x = x - learning_rate * g / (np.sqrt(avg_sq_grad) + eps)

    i += 1
    return OptimizeResult(x=x, fun=fun(x), jac=g, nit=i, nfev=i, success=True)


    def adam(
    fun,
    x0,
    jac,
    args=(),
    learning_rate=0.001,
    beta1=0.9,
    beta2=0.999,
    eps=1e-8,
    startiter=0,
    maxiter=1000,
    callback=None,
    **kwargs
    ):
    """``scipy.optimize.minimize`` compatible implementation of ADAM -
    [http://arxiv.org/pdf/1412.6980.pdf].
    Adapted from ``autograd/misc/optimizers.py``.
    """
    x = x0
    m = np.zeros_like(x)
    v = np.zeros_like(x)

    for i in range(startiter, startiter + maxiter):
    g = jac(x)

    if callback and callback(x):
    break

    m = (1 - beta1) * g + beta1 * m # first moment estimate.
    v = (1 - beta2) * (g**2) + beta2 * v # second moment estimate.
    mhat = m / (1 - beta1**(i + 1)) # bias correction.
    vhat = v / (1 - beta2**(i + 1))
    x = x - learning_rate * mhat / (np.sqrt(vhat) + eps)

    i += 1
    return OptimizeResult(x=x, fun=fun(x), jac=g, nit=i, nfev=i, success=True)