macournoyer · January 9, 2023 15:12 · Dec 16, 2015 · Dec 16, 2015 · Dec 16, 2015
diff --git a/output.sh → Output b/output.sh → Output
diff --git a/output.sh b/output.sh
@@ -0,0 +1,11 @@
+$ python xor.py
+Training:
+  Epoch   0   MSE: 1.765
+  Epoch 100   MSE: 0.015
+  Epoch 200   MSE: 0.005
+  * Target MSE reached *
+Evaluating:
+  1 XOR 0 = 1 ( 0.904)   Error: 0.096
+  0 XOR 1 = 1 ( 0.908)   Error: 0.092
+  1 XOR 1 = 0 (-0.008)   Error: 0.008
+  0 XOR 0 = 0 ( 0.000)   Error: 0.000
diff --git a/nn.py b/nn.py
@@ -0,0 +1,49 @@
+import numpy as np
+
+# Activation functions
+def tanh(x):
+  return np.tanh(x)
+
+# Derivative of tanh from its output
+def dtanh(y):
+  return 1 - y ** 2
+
+# The neural network framework
+class Layer:
+  def __init__(self, num_inputs, num_outputs):
+    # Init all weights between [-1 .. 1].
+    # Each input is connected to all outputs.
+    # One line per input and one column per output.
+    self.weights = np.random.uniform(-1, 1, (num_inputs, num_outputs))
+
+  def forward(self, input):
+    self.output = tanh(input.dot(self.weights))
+    return self.output
+
+  def computeGradient(self, error):
+    self.delta = error * dtanh(self.output)
+    # Returns the gradient
+    return self.delta.dot(self.weights.T)
+
+  def updateWeights(self, input, learning_rate):
+    self.weights += input.T.dot(self.delta) * learning_rate
+
+class Network:
+  def __init__(self, layers):
+    self.layers = layers
+
+  def forward(self, input):
+    output = input
+    for layer in self.layers:
+      output = layer.forward(output)
+    return output
+
+  def backprop(self, input, error, learning_rate):
+    # Compute deltas at each layer starting from the last one
+    for layer in reversed(self.layers):
+      error = layer.computeGradient(error)
+
+    # Update the weights
+    for layer in self.layers:
+      layer.updateWeights(input, learning_rate)
+      input = layer.output
diff --git a/xor.py b/xor.py
@@ -0,0 +1,67 @@
+import numpy as np
+import nn
+
+# Training examples
+examples = [
+  #             _ XOR _       =          _
+  [ np.array([[ 0,    0 ]]), np.array([[ 0 ]]) ],
+  [ np.array([[ 0,    1 ]]), np.array([[ 1 ]]) ],
+  [ np.array([[ 1,    0 ]]), np.array([[ 1 ]]) ],
+  [ np.array([[ 1,    1 ]]), np.array([[ 0 ]]) ]
+]
+
+
+# Seed random generator to get consistent results
+np.random.seed(0)
+
+
+# Build the model
+num_inputs = 2
+num_hidden = 20 # nodes in hidden layer
+num_output = 1
+network = nn.Network([
+    nn.Layer(num_inputs, num_hidden),
+    nn.Layer(num_hidden, num_output)
+  ])
+
+
+# Train the model
+print("Training:")
+learning_rate = 0.1
+target_mse = 0.01
+
+for epoch in range(500):
+  errors = []
+
+  for input, target in examples:
+    # Forward
+    output = network.forward(input)
+
+    # Compute the error
+    error = target - output
+    errors.append(error)
+
+    # Back-propagate the error
+    network.backprop(input, error, learning_rate)
+
+  # Compute the Mean Squared Error of all examples each 100 epoch
+  if epoch % 100 == 0:
+    mse = (np.array(errors) ** 2).mean()
+    print("  Epoch %3d   MSE: %.3f" % (epoch, mse))
+    if mse <= target_mse:
+      print("  * Target MSE reached *")
+      break
+
+
+# Evaluate the model
+def eval(x, y):
+  output = network.forward(x)
+  normalized = int(round(output[0]))
+  error = y - output[0]
+  return "%d (% .3f)   Error: %.3f" % (normalized, output[0], error)
+
+print("Evaluating:")
+print("  1 XOR 0 = " + eval(np.array([1, 0]), 1))
+print("  0 XOR 1 = " + eval(np.array([0, 1]), 1))
+print("  1 XOR 1 = " + eval(np.array([1, 1]), 0))
+print("  0 XOR 0 = " + eval(np.array([0, 0]), 0))