Skip to content

Instantly share code, notes, and snippets.

@sradc
Created October 11, 2021 16:43
Show Gist options
  • Save sradc/34e5b18e90e61a801bcb1997d5355d06 to your computer and use it in GitHub Desktop.
Save sradc/34e5b18e90e61a801bcb1997d5355d06 to your computer and use it in GitHub Desktop.

Revisions

  1. sradc created this gist Oct 11, 2021.
    96 changes: 96 additions & 0 deletions vectorised_autodiff_example.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,96 @@
    # minimal example, using code from: https://sidsite.com/posts/autodiff/

    from collections import defaultdict
    import matplotlib.pyplot as plt
    import numpy as np

    class Variable:
    def __init__(self, value, local_gradients=[]):
    self.value = value
    self.local_gradients = local_gradients

    def __add__(self, other):
    return add(self, other)

    def __mul__(self, other):
    return mul(self, other)

    def __sub__(self, other):
    return add(self, neg(other))

    def get_gradients(variable):
    """ Compute the first derivatives of `variable`
    with respect to child variables.
    """
    gradients = defaultdict(lambda: 0)

    def compute_gradients(variable, path_value):
    for child_variable, local_gradient in variable.local_gradients:
    # "Multiply the edges of a path":
    value_of_path_to_child = path_value * local_gradient
    # "Add together the different paths":
    gradients[child_variable] += value_of_path_to_child
    # recurse through graph:
    compute_gradients(child_variable, value_of_path_to_child)

    compute_gradients(variable, path_value=1)
    # (path_value=1 is from `variable` differentiated w.r.t. itself)
    return gradients

    def add(a, b):
    value = a.value + b.value
    local_gradients = (
    (a, 1),
    (b, 1)
    )
    return Variable(value, local_gradients)

    def mul(a, b):
    value = a.value * b.value
    local_gradients = (
    (a, b.value),
    (b, a.value)
    )
    return Variable(value, local_gradients)

    def neg(a):
    value = -1 * a.value
    local_gradients = (
    (a, -1),
    )
    return Variable(value, local_gradients)

    # convert NumPy array into array of Variable objects:
    to_var = np.vectorize(lambda x : Variable(x))

    # get values from array of Variable objects:
    to_vals = np.vectorize(lambda variable : variable.value)

    # Create linear layer
    np.random.seed(0)

    def update_weights(weights, gradients, lrate):
    for _, weight in np.ndenumerate(weights):
    weight.value -= lrate * gradients[weight]

    input_size = 50
    output_size = 10
    lrate = 0.001

    x = to_var(np.random.random(input_size))
    y_true = to_var(np.random.random(output_size))
    weights = to_var(np.random.random((input_size, output_size)))

    loss_vals = []
    for i in range(100):
    y_pred = np.dot(x, weights)
    loss = np.sum((y_true - y_pred) * (y_true - y_pred))
    loss_vals.append(loss.value)
    gradients = get_gradients(loss)
    update_weights(weights, gradients, lrate)

    plt.plot(loss_vals)
    plt.xlabel("Time step")
    plt.ylabel("Loss")
    plt.title("Single linear layer learning")
    plt.show()