sradc · October 11, 2021 16:43 · Oct 11, 2021
diff --git a/vectorised_autodiff_example.py b/vectorised_autodiff_example.py
@@ -0,0 +1,96 @@
+# minimal example, using code from: https://sidsite.com/posts/autodiff/
+
+from collections import defaultdict
+import matplotlib.pyplot as plt
+import numpy as np
+
+class Variable:
+    def __init__(self, value, local_gradients=[]):
+        self.value = value
+        self.local_gradients = local_gradients
+
+    def __add__(self, other):
+        return add(self, other)
+
+    def __mul__(self, other):
+        return mul(self, other)
+
+    def __sub__(self, other):
+        return add(self, neg(other))    
+
+def get_gradients(variable):
+    """ Compute the first derivatives of `variable` 
+    with respect to child variables.
+    """
+    gradients = defaultdict(lambda: 0)
+
+    def compute_gradients(variable, path_value):
+        for child_variable, local_gradient in variable.local_gradients:
+            # "Multiply the edges of a path":
+            value_of_path_to_child = path_value * local_gradient
+            # "Add together the different paths":
+            gradients[child_variable] += value_of_path_to_child
+            # recurse through graph:
+            compute_gradients(child_variable, value_of_path_to_child)
+
+    compute_gradients(variable, path_value=1)
+    # (path_value=1 is from `variable` differentiated w.r.t. itself)
+    return gradients
+
+def add(a, b):
+    value = a.value + b.value    
+    local_gradients = (
+        (a, 1),
+        (b, 1)
+    )
+    return Variable(value, local_gradients)
+
+def mul(a, b):
+    value = a.value * b.value
+    local_gradients = (
+        (a, b.value),
+        (b, a.value)
+    )
+    return Variable(value, local_gradients)
+
+def neg(a):
+    value = -1 * a.value
+    local_gradients = (
+        (a, -1),
+    )
+    return Variable(value, local_gradients)
+
+# convert NumPy array into array of Variable objects:
+to_var = np.vectorize(lambda x : Variable(x))
+
+# get values from array of Variable objects:
+to_vals = np.vectorize(lambda variable : variable.value)
+
+# Create linear layer
+np.random.seed(0)
+
+def update_weights(weights, gradients, lrate):
+    for _, weight in np.ndenumerate(weights):
+        weight.value -= lrate * gradients[weight]
+
+input_size = 50
+output_size = 10
+lrate = 0.001
+
+x = to_var(np.random.random(input_size))
+y_true = to_var(np.random.random(output_size))
+weights = to_var(np.random.random((input_size, output_size)))
+
+loss_vals = []
+for i in range(100):
+    y_pred = np.dot(x, weights)
+    loss = np.sum((y_true - y_pred) * (y_true - y_pred))
+    loss_vals.append(loss.value)
+    gradients = get_gradients(loss)
+    update_weights(weights, gradients, lrate)
+
+plt.plot(loss_vals)
+plt.xlabel("Time step")
+plt.ylabel("Loss")
+plt.title("Single linear layer learning")
+plt.show()