riveSunder · June 26, 2020 19:20 · Jun 26, 2020
diff --git a/autograd_mlp.py b/autograd_mlp.py
@@ -0,0 +1,137 @@
+"""
+Simple MLP demo using [autograd](https://github.com/HIPS/autograd)
+With l1 and l2 regularization. 
+
+Depends on autograd and scikit-learn (the latter for the mini digits dataset)
+
+pip install autograd scikit-learn
+"""
+
+from autograd import numpy as np
+from autograd import grad 
+from autograd import elementwise_grad as egrad
+import sklearn.datasets as datasets
+import time
+
+elu = lambda x: x * (x >= 0.) + (np.exp(x) - 1) * (x < 0.)
+grad_elu = grad(elu)
+softmax = lambda x: np.exp(x-np.max(x)) / np.sum(np.exp(x-np.max(x)), axis=1)[:,np.newaxis]
+get_label = lambda x: 1.*np.argmax(x, axis=1)
+accuracy = lambda tgt, pred: np.sum(get_label(pred) == get_label(tgt)) / tgt.shape[0]
+
+normalize = lambda x: (x-np.mean(x)) / np.std(x)
+
+def labels_to_one_hot(tgts):
+    number_classes = np.max(tgts)+1
+    number_samples = tgts.shape[0]
+
+    one_hot = np.zeros((number_samples, number_classes))
+
+    for ii in range(number_samples):
+        one_hot[ii, tgts[ii]] = 1
+
+    return one_hot
+
+def mlp_forward(x, weights, activations):
+
+    for ii in range(len(weights)):
+        x = np.matmul(x, weights[ii])
+        x = activations[ii](x)
+
+    return x
+
+ce_loss = lambda y_tgts, y_pred: - np.sum(y_tgts * np.log(y_pred))
+
+def get_loss(weights, activations, batch):
+
+    y_pred = mlp_forward(batch[0], weights, activations)
+
+    my_loss = ce_loss(batch[1], y_pred)
+    my_loss += 1e-1 * np.sum([np.sum(np.abs(layer**2)) for layer in weights])
+    my_loss += 1e-2 * np.sum([np.sum(np.abs(layer)) for layer in weights])
+
+    return my_loss
+
+if __name__ == "__main__":
+
+    print("loading digits datasets")
+    [xx, tgts] = datasets.load_digits(return_X_y=True)
+
+    xx = normalize(xx)
+
+    print("convert labels to one-hot encoding")
+    one_hot = labels_to_one_hot(tgts)
+
+    # split into training, test, and validation
+
+    num_val = int(0.1 * xx.shape[0])
+
+    np.random.seed(1337)
+    np.random.shuffle(xx)
+
+    np.random.seed(1337)
+    np.random.shuffle(one_hot)
+
+    x_val = xx[:num_val,...]
+    x_test = xx[num_val:num_val*2,...]
+    x_train = xx[2*num_val:,...]
+
+    y_val = one_hot[:num_val,...]
+    y_test = one_hot[num_val:num_val*2,...]
+    y_train = one_hot[2*num_val:,...]
+
+    # some parameters
+    init_scale = 1e-2
+    lr = 1e-3
+    max_epochs = 300
+    disp_every = 10
+    batch_size = 128
+
+    print("initializing mlp weights")
+    dim_x, dim_y, dim_h = x_train.shape[1], y_train.shape[1], 128
+
+    wx2h = init_scale * np.random.randn(dim_x, dim_h)
+    wh2h = init_scale * np.random.randn(dim_h, dim_h)
+    wh2y = init_scale * np.random.randn(dim_h, dim_y)
+
+    weights = [wx2h, wh2h, wh2y]
+    activations = [elu, elu, softmax]
+
+    grad_loss = egrad(get_loss)
+    smooth_loss = 300.
+    smooth_acc = 0.0
+    loss_decay = 0.1
+    t0 = time.time()
+    for epoch in range(max_epochs):
+        t1 = time.time()
+        for batch_start in range(0, x_train.shape[0]-batch_size,batch_size):
+
+            my_batch = [x_train[batch_start:batch_start+batch_size],\
+                    y_train[batch_start:batch_start+batch_size]]
+
+            smooth_loss = (1-loss_decay) * smooth_loss \
+                    + loss_decay * get_loss(weights, activations, my_batch)
+
+            y_pred = mlp_forward(my_batch[0], weights, activations)
+
+            smooth_acc = (1-loss_decay) * smooth_acc \
+                    + loss_decay * accuracy(my_batch[1], y_pred)
+
+            my_grad = grad_loss(weights, activations, my_batch)
+
+            for params, grads in zip(weights, my_grad):
+                params -= lr * grads
+
+        if epoch % disp_every == 0:
+
+            my_batch = [x_val, y_val]
+
+            y_pred = mlp_forward(x_val, weights, activations)
+            val_loss = ce_loss(y_val, y_pred)
+            val_acc = accuracy(y_val, y_pred)
+
+            t2 = time.time()
+            print("epoch {}, training loss {:.2e}, train acc: {:.2e}, val loss {:.2e}, val accuracy {:.2e}"\
+                    .format(epoch, smooth_loss, smooth_acc, val_loss, val_acc))
+            print("total time: {:.2f}, epoch time {:.2f}".format(t2-t0, t2-t1))
+