Created
October 11, 2021 16:43
-
-
Save sradc/34e5b18e90e61a801bcb1997d5355d06 to your computer and use it in GitHub Desktop.
Revisions
-
sradc created this gist
Oct 11, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,96 @@ # minimal example, using code from: https://sidsite.com/posts/autodiff/ from collections import defaultdict import matplotlib.pyplot as plt import numpy as np class Variable: def __init__(self, value, local_gradients=[]): self.value = value self.local_gradients = local_gradients def __add__(self, other): return add(self, other) def __mul__(self, other): return mul(self, other) def __sub__(self, other): return add(self, neg(other)) def get_gradients(variable): """ Compute the first derivatives of `variable` with respect to child variables. """ gradients = defaultdict(lambda: 0) def compute_gradients(variable, path_value): for child_variable, local_gradient in variable.local_gradients: # "Multiply the edges of a path": value_of_path_to_child = path_value * local_gradient # "Add together the different paths": gradients[child_variable] += value_of_path_to_child # recurse through graph: compute_gradients(child_variable, value_of_path_to_child) compute_gradients(variable, path_value=1) # (path_value=1 is from `variable` differentiated w.r.t. itself) return gradients def add(a, b): value = a.value + b.value local_gradients = ( (a, 1), (b, 1) ) return Variable(value, local_gradients) def mul(a, b): value = a.value * b.value local_gradients = ( (a, b.value), (b, a.value) ) return Variable(value, local_gradients) def neg(a): value = -1 * a.value local_gradients = ( (a, -1), ) return Variable(value, local_gradients) # convert NumPy array into array of Variable objects: to_var = np.vectorize(lambda x : Variable(x)) # get values from array of Variable objects: to_vals = np.vectorize(lambda variable : variable.value) # Create linear layer np.random.seed(0) def update_weights(weights, gradients, lrate): for _, weight in np.ndenumerate(weights): weight.value -= lrate * gradients[weight] input_size = 50 output_size = 10 lrate = 0.001 x = to_var(np.random.random(input_size)) y_true = to_var(np.random.random(output_size)) weights = to_var(np.random.random((input_size, output_size))) loss_vals = [] for i in range(100): y_pred = np.dot(x, weights) loss = np.sum((y_true - y_pred) * (y_true - y_pred)) loss_vals.append(loss.value) gradients = get_gradients(loss) update_weights(weights, gradients, lrate) plt.plot(loss_vals) plt.xlabel("Time step") plt.ylabel("Loss") plt.title("Single linear layer learning") plt.show()