-
-
Save mick001/38b1eb063bfa783c1b0c32a9cf38de45 to your computer and use it in GitHub Desktop.
Revisions
-
bquast revised this gist
Aug 8, 2016 . 1 changed file with 88 additions and 46 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,3 +1,5 @@ set.seed(1) # define some functions ## convert integer to binary @@ -12,41 +14,56 @@ int2bin <- function(integer, length=8) sigmoid <- function(x, k=1, x0=0) 1 / (1+exp( -k*(x-x0) )) ## sigmoid derivative sigmoid_output_to_derivative <- function(x) x*(1-x) ## tanh derivative tanh_output_to_derivative <- function(x) 1-x^2 # create training numbers X1 = sample(0:1023, 100000, replace=TRUE) X2 = sample(0:1023, 100000, replace=TRUE) # create training response numbers Y <- X1 + X2 # convert to binary X1b <- int2bin(X1, length=10) X2b <- int2bin(X2, length=10) Yb <- int2bin(Y, length=10) # input variables alpha = 0.1 alpha_decay = 0.999 momentum = 0.1 init_weight = 1 batch_size = 20 input_dim = 2 hidden_dim = 8 output_dim = 1 binary_dim = 10 largest_number = 2^binary_dim output_size = 100 # initialise neural network weights synapse_0_i = matrix(runif(n = input_dim *hidden_dim, min=-init_weight, max=init_weight), nrow=input_dim) synapse_0_f = matrix(runif(n = input_dim *hidden_dim, min=-init_weight, max=init_weight), nrow=input_dim) synapse_0_o = matrix(runif(n = input_dim *hidden_dim, min=-init_weight, max=init_weight), nrow=input_dim) synapse_0_c = matrix(runif(n = input_dim *hidden_dim, min=-init_weight, max=init_weight), nrow=input_dim) synapse_1 = matrix(runif(n = hidden_dim*output_dim, min=-init_weight, max=init_weight), nrow=hidden_dim) synapse_h_i = matrix(runif(n = hidden_dim*hidden_dim, min=-init_weight, max=init_weight), nrow=hidden_dim) synapse_h_f = matrix(runif(n = hidden_dim*hidden_dim, min=-init_weight, max=init_weight), nrow=hidden_dim) synapse_h_o = matrix(runif(n = hidden_dim*hidden_dim, min=-init_weight, max=init_weight), nrow=hidden_dim) synapse_h_c = matrix(runif(n = hidden_dim*hidden_dim, min=-init_weight, max=init_weight), nrow=hidden_dim) synapse_b_1 = runif(n = output_dim, min=-init_weight, max=init_weight) synapse_b_i = runif(n = hidden_dim, min=-init_weight, max=init_weight) synapse_b_f = runif(n = hidden_dim, min=-init_weight, max=init_weight) synapse_b_o = runif(n = hidden_dim, min=-init_weight, max=init_weight) synapse_b_c = runif(n = hidden_dim, min=-init_weight, max=init_weight) # initialise state cell c_t_m1 = matrix(0, nrow=1, ncol = hidden_dim) @@ -61,10 +78,14 @@ synapse_h_i_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) synapse_h_f_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) synapse_h_o_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) synapse_h_c_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) synapse_b_1_update = rep(0, output_dim) synapse_b_i_update = rep(0, hidden_dim) synapse_b_f_update = rep(0, hidden_dim) synapse_b_o_update = rep(0, hidden_dim) synapse_b_c_update = rep(0, hidden_dim) # training logic for (j in 1:length(X1)) { # select input variables a = X1b[j,] b = X2b[j,] @@ -88,21 +109,21 @@ for (j in 1:length(X1)) { y = c[position] # hidden layer (input ~+ prev_hidden) i_t = sigmoid((X%*%synapse_0_i) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_i) + synapse_b_i) # add bias? f_t = sigmoid((X%*%synapse_0_f) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_f) + synapse_b_f) # add bias? o_t = sigmoid((X%*%synapse_0_o) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_o) + synapse_b_o) # add bias? c_in_t = tanh( (X%*%synapse_0_c) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_c) + synapse_b_c) c_t = (f_t * c_t_m1[dim(layer_1_values)[1],]) + (i_t * c_in_t) layer_1 = o_t * tanh(c_t) c_t_m1 = rbind(c_t_m1, c_t) # output layer (new binary representation) layer_2 = sigmoid(layer_1 %*% synapse_1 + synapse_b_1) # did we miss?... if so, by how much? layer_2_error = y - layer_2 layer_2_deltas = rbind(layer_2_deltas, layer_2_error * sigmoid_output_to_derivative(layer_2)) overallError = overallError + round(abs(layer_2_error)) # decode estimate so we can print it out d[position] = round(layer_2) @@ -146,37 +167,58 @@ for (j in 1:length(X1)) { synapse_0_f_update = synapse_0_f_update + t(X) %*% layer_1_f_delta synapse_0_o_update = synapse_0_o_update + t(X) %*% layer_1_o_delta synapse_0_c_update = synapse_0_c_update + t(X) %*% layer_1_c_delta synapse_b_1_update = synapse_b_1_update + layer_2_delta synapse_b_i_update = synapse_b_i_update + layer_1_i_delta synapse_b_f_update = synapse_b_f_update + layer_1_f_delta synapse_b_o_update = synapse_b_o_update + layer_1_o_delta synapse_b_c_update = synapse_b_c_update + layer_1_c_delta future_layer_1_i_delta = layer_1_i_delta future_layer_1_f_delta = layer_1_f_delta future_layer_1_o_delta = layer_1_o_delta future_layer_1_c_delta = layer_1_c_delta } if(j %% batch_size ==0) { synapse_0_i = synapse_0_i + ( synapse_0_i_update * alpha ) synapse_0_f = synapse_0_f + ( synapse_0_f_update * alpha ) synapse_0_o = synapse_0_o + ( synapse_0_o_update * alpha ) synapse_0_c = synapse_0_c + ( synapse_0_c_update * alpha ) synapse_1 = synapse_1 + ( synapse_1_update * alpha ) synapse_h_i = synapse_h_i + ( synapse_h_i_update * alpha ) synapse_h_f = synapse_h_f + ( synapse_h_f_update * alpha ) synapse_h_o = synapse_h_o + ( synapse_h_o_update * alpha ) synapse_h_c = synapse_h_c + ( synapse_h_c_update * alpha ) synapse_b_1 = synapse_b_1 + ( synapse_b_1_update * alpha ) synapse_b_i = synapse_b_i + ( synapse_b_i_update * alpha ) synapse_b_f = synapse_b_f + ( synapse_b_f_update * alpha ) synapse_b_o = synapse_b_o + ( synapse_b_o_update * alpha ) synapse_b_c = synapse_b_c + ( synapse_b_c_update * alpha ) alpha = alpha * alpha_decay synapse_0_i_update = synapse_0_i_update * momentum synapse_0_f_update = synapse_0_f_update * momentum synapse_0_o_update = synapse_0_o_update * momentum synapse_0_c_update = synapse_0_c_update * momentum synapse_1_update = synapse_1_update * momentum synapse_h_i_update = synapse_h_i_update * momentum synapse_h_f_update = synapse_h_f_update * momentum synapse_h_o_update = synapse_h_o_update * momentum synapse_h_c_update = synapse_h_c_update * momentum synapse_b_1_update = synapse_b_1_update * momentum synapse_b_i_update = synapse_b_i_update * momentum synapse_b_f_update = synapse_b_f_update * momentum synapse_b_o_update = synapse_b_o_update * momentum synapse_b_c_update = synapse_b_c_update * momentum } # print out progress if(j %% output_size ==0) { print(paste("Error:", overallError," - alpha:",alpha)) print(paste("A :", paste(a, collapse = " "))) print(paste("B :", paste(b, collapse = " "))) print(paste("Pred:", paste(d, collapse = " "))) print(paste("True:", paste(c, collapse = " "))) out = 0 -
bquast revised this gist
Aug 6, 2016 . 1 changed file with 6 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -108,7 +108,9 @@ for (j in 1:length(X1)) { d[position] = round(layer_2) # store hidden layer so we can print it out layer_1_values = rbind(layer_1_values, layer_1) } future_layer_1_i_delta = matrix(0, nrow = 1, ncol = hidden_dim) future_layer_1_f_delta = matrix(0, nrow = 1, ncol = hidden_dim) @@ -180,4 +182,6 @@ for (j in 1:length(X1)) { out = 0 for (x in 1:length(d)) { out[x] = rev(d)[x]*2^(x-1) } print("----------------") } } -
bquast renamed this gist
Jul 22, 2016 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
bquast revised this gist
Jul 22, 2016 . 1 changed file with 75 additions and 20 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -37,14 +37,30 @@ binary_dim = 8 largest_number = 2^binary_dim # initialise neural network weights synapse_0_i = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim) synapse_0_f = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim) synapse_0_o = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim) synapse_0_c = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim) synapse_1 = matrix(runif(n = hidden_dim*output_dim, min=-1, max=1), nrow=hidden_dim) synapse_h_i = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim) synapse_h_f = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim) synapse_h_o = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim) synapse_h_c = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim) # initialise state cell c_t_m1 = matrix(0, nrow=1, ncol = hidden_dim) # initialise synapse updates synapse_0_i_update = matrix(0, nrow = input_dim, ncol = hidden_dim) synapse_0_f_update = matrix(0, nrow = input_dim, ncol = hidden_dim) synapse_0_o_update = matrix(0, nrow = input_dim, ncol = hidden_dim) synapse_0_c_update = matrix(0, nrow = input_dim, ncol = hidden_dim) synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim) synapse_h_i_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) synapse_h_f_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) synapse_h_o_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) synapse_h_c_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) # training logic for (j in 1:length(X1)) { @@ -72,7 +88,13 @@ for (j in 1:length(X1)) { y = c[position] # hidden layer (input ~+ prev_hidden) i_t = sigmoid((X%*%synapse_0_i) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_i)) # add bias? f_t = sigmoid((X%*%synapse_0_f) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_f)) # add bias? o_t = sigmoid((X%*%synapse_0_o) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_o)) # add bias? c_in_t = tanh( (X%*%synapse_0_c) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_c)) c_t = (f_t * c_t_m1[dim(layer_1_values)[1],]) + (i_t * c_in_t) layer_1 = o_t * tanh(c_t) c_t_m1 = rbind(c_t_m1, c_t) # output layer (new binary representation) layer_2 = sigmoid(layer_1 %*% synapse_1) @@ -88,7 +110,10 @@ for (j in 1:length(X1)) { # store hidden layer so we can print it out layer_1_values = rbind(layer_1_values, layer_1) } future_layer_1_i_delta = matrix(0, nrow = 1, ncol = hidden_dim) future_layer_1_f_delta = matrix(0, nrow = 1, ncol = hidden_dim) future_layer_1_o_delta = matrix(0, nrow = 1, ncol = hidden_dim) future_layer_1_c_delta = matrix(0, nrow = 1, ncol = hidden_dim) for (position in 1:binary_dim) { @@ -99,23 +124,53 @@ for (j in 1:length(X1)) { # error at output layer layer_2_delta = layer_2_deltas[dim(layer_2_deltas)[1]-(position-1),] # error at hidden layer layer_1_i_delta = (future_layer_1_i_delta %*% t(synapse_h_i) + layer_2_delta %*% t(synapse_1)) * sigmoid_output_to_derivative(layer_1) layer_1_f_delta = (future_layer_1_f_delta %*% t(synapse_h_f) + layer_2_delta %*% t(synapse_1)) * sigmoid_output_to_derivative(layer_1) layer_1_o_delta = (future_layer_1_o_delta %*% t(synapse_h_o) + layer_2_delta %*% t(synapse_1)) * sigmoid_output_to_derivative(layer_1) layer_1_c_delta = (future_layer_1_c_delta %*% t(synapse_h_c) + layer_2_delta %*% t(synapse_1)) * sigmoid_output_to_derivative(layer_1) # let's update all our weights so we can try again synapse_1_update = synapse_1_update + matrix(layer_1) %*% layer_2_delta synapse_h_i_update = synapse_h_i_update + matrix(prev_layer_1) %*% layer_1_i_delta synapse_h_f_update = synapse_h_f_update + matrix(prev_layer_1) %*% layer_1_f_delta synapse_h_o_update = synapse_h_o_update + matrix(prev_layer_1) %*% layer_1_o_delta synapse_h_c_update = synapse_h_c_update + matrix(prev_layer_1) %*% layer_1_c_delta synapse_0_i_update = synapse_0_i_update + t(X) %*% layer_1_i_delta synapse_0_f_update = synapse_0_f_update + t(X) %*% layer_1_f_delta synapse_0_o_update = synapse_0_o_update + t(X) %*% layer_1_o_delta synapse_0_c_update = synapse_0_c_update + t(X) %*% layer_1_c_delta future_layer_1_i_delta = layer_1_i_delta future_layer_1_f_delta = layer_1_f_delta future_layer_1_o_delta = layer_1_o_delta future_layer_1_c_delta = layer_1_c_delta } synapse_0_i = synapse_0_i + ( synapse_0_i_update * alpha ) synapse_0_f = synapse_0_f + ( synapse_0_f_update * alpha ) synapse_0_o = synapse_0_o + ( synapse_0_o_update * alpha ) synapse_0_c = synapse_0_c + ( synapse_0_c_update * alpha ) synapse_1 = synapse_1 + ( synapse_1_update * alpha ) synapse_h_i = synapse_h_i + ( synapse_h_i_update * alpha ) synapse_h_f = synapse_h_f + ( synapse_h_f_update * alpha ) synapse_h_o = synapse_h_o + ( synapse_h_o_update * alpha ) synapse_h_c = synapse_h_c + ( synapse_h_c_update * alpha ) synapse_0_i_update = synapse_0_i_update * 0 synapse_0_f_update = synapse_0_f_update * 0 synapse_0_o_update = synapse_0_o_update * 0 synapse_0_c_update = synapse_0_c_update * 0 synapse_1_update = synapse_1_update * 0 synapse_h_i_update = synapse_h_i_update * 0 synapse_h_f_update = synapse_h_f_update * 0 synapse_h_o_update = synapse_h_o_update * 0 synapse_h_c_update = synapse_h_c_update * 0 # print out progress if(j %% 1000 ==0) { -
bquast revised this gist
Jul 22, 2016 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -38,11 +38,11 @@ largest_number = 2^binary_dim # initialize neural network weights synapse_0 = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim) synapse_1 = matrix(runif(n = hidden_dim*output_dim, min=-1, max=1), nrow=hidden_dim) synapse_h = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim) synapse_0_update = matrix(0, nrow = input_dim, ncol = hidden_dim) synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim) synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) -
bquast revised this gist
Jul 22, 2016 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -47,7 +47,7 @@ synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim) synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) # training logic for (j in 1:length(X1)) { # select input variables a = X1b[j,] -
bquast revised this gist
Jul 22, 2016 . 1 changed file with 3 additions and 7 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -28,17 +28,13 @@ X1b <- int2bin(X1, length=8) X2b <- int2bin(X2, length=8) Yb <- int2bin(Y, length=8) # input variables alpha = 0.1 input_dim = 2 hidden_dim = 16 output_dim = 1 binary_dim = 8 largest_number = 2^binary_dim # initialize neural network weights @@ -51,7 +47,7 @@ synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim) synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) # training logic for (j in 1:length(X1b)) { # select input variables a = X1b[j,] -
bquast revised this gist
Jul 22, 2016 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -51,7 +51,7 @@ synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim) synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) # training logic for (j in 1:length(X1b) { # select input variables a = X1b[j,] -
bquast renamed this gist
Jul 22, 2016 . 1 changed file with 0 additions and 0 deletions.There are no files selected for viewing
File renamed without changes. -
bquast revised this gist
Jul 22, 2016 . 1 changed file with 6 additions and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -24,9 +24,9 @@ X2 = sample(0:127, 10000, replace=TRUE) Y <- X1 + X2 # convert to binary X1b <- int2bin(X1, length=8) X2b <- int2bin(X2, length=8) Yb <- int2bin(Y, length=8) # training dataset generation # int2binary = @@ -54,11 +54,11 @@ synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) for (j in 1:10000) { # select input variables a = X1b[j,] b = X2b[j,] # response variable c = Yb[j,] # where we'll store our best guesss (binary encoded) d = matrix(0, nrow = 1, ncol = binary_dim) -
bquast revised this gist
Jul 22, 2016 . 1 changed file with 5 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -94,14 +94,14 @@ for (j in 1:10000) { future_layer_1_delta = matrix(0, nrow = 1, ncol = hidden_dim) for (position in 1:binary_dim) { X = cbind(a[binary_dim-(position-1)], b[binary_dim-(position-1)]) layer_1 = layer_1_values[dim(layer_1_values)[1]-(position-1),] prev_layer_1 = layer_1_values[dim(layer_1_values)[1]-position,] # error at output layer layer_2_delta = layer_2_deltas[dim(layer_2_deltas)[1]-(position-1),] # error at hidden layer layer_1_delta = (future_layer_1_delta %*% t(synapse_h) + layer_2_delta %*% t(synapse_1)) * sigmoid_output_to_derivative(layer_1) -
bquast revised this gist
Jul 22, 2016 . 1 changed file with 4 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -69,11 +69,11 @@ for (j in 1:10000) { layer_1_values = matrix(0, nrow=1, ncol = hidden_dim) # moving along the positions in the binary encoding for (position in 1:binary_dim) { # generate input and output X = cbind(a[position],b[position]) y = c[position] # hidden layer (input ~+ prev_hidden) layer_1 = sigmoid((X%*%synapse_0) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h)) @@ -87,7 +87,7 @@ for (j in 1:10000) { overallError = overallError + abs(layer_2_error) # decode estimate so we can print it out d[position] = round(layer_2) # store hidden layer so we can print it out layer_1_values = rbind(layer_1_values, layer_1) } -
bquast revised this gist
Jul 22, 2016 . 1 changed file with 5 additions and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,7 +2,7 @@ ## convert integer to binary i2b <- function(integer, length=8) as.numeric(intToBits(integer))[1:length] ## apply int2bin <- function(integer, length=8) @@ -67,14 +67,13 @@ for (j in 1:10000) { layer_2_deltas = matrix(0) layer_1_values = matrix(0, nrow=1, ncol = hidden_dim) # moving along the positions in the binary encoding for (position in 0:(binary_dim-1)) { # generate input and output X = cbind(a[position+1],b[position+1]) y = c[position+1] # hidden layer (input ~+ prev_hidden) layer_1 = sigmoid((X%*%synapse_0) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h)) @@ -88,7 +87,7 @@ for (j in 1:10000) { overallError = overallError + abs(layer_2_error) # decode estimate so we can print it out d[position+1] = round(layer_2) # store hidden layer so we can print it out layer_1_values = rbind(layer_1_values, layer_1) } @@ -97,7 +96,7 @@ for (j in 1:10000) { for (position in 0:(binary_dim-1)) { X = cbind(a[binary_dim-position], b[binary_dim-position]) layer_1 = layer_1_values[dim(layer_1_values)[1]-position,] prev_layer_1 = layer_1_values[dim(layer_1_values)[1]-(position+1),] -
bquast revised this gist
Jul 22, 2016 . 1 changed file with 32 additions and 22 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,23 +1,38 @@ # define some functions ## convert integer to binary i2b <- function(integer, length=8) rev(as.numeric(intToBits(integer))[1:length]) ## apply int2bin <- function(integer, length=8) t(sapply(integer, i2b, length=length)) ## sigmoid function sigmoid <- function(x, k=1, x0=0) 1 / (1+exp( -k*(x-x0) )) ## derivative sigmoid_output_to_derivative <- function(x) x*(1-x) # create training numbers X1 = sample(0:127, 10000, replace=TRUE) X2 = sample(0:127, 10000, replace=TRUE) # create training response numbers Y <- X1 + X2 # convert to binary X1 <- int2bin(X1, length=8) X2 <- int2bin(X2, length=8) Y <- int2bin(Y, length=8) # training dataset generation # int2binary = binary_dim = 8 largest_number = 2^binary_dim # input variables alpha = 0.1 @@ -38,16 +53,12 @@ synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) # training logic for (j in 1:10000) { # select input variables a = X1[j,] b = X2[j,] # response variable c = Y[j,] # where we'll store our best guesss (binary encoded) d = matrix(0, nrow = 1, ncol = binary_dim) @@ -119,5 +130,4 @@ for (j in 1:10000) { out = 0 for (x in 1:length(d)) { out[x] = rev(d)[x]*2^(x-1) } print("----------------") } } -
bquast created this gist
Jul 22, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,123 @@ # compute sigmoid nonlinearity sigmoid = function(x) { output = 1 / (1+exp(-x)) return(output) } # convert output of sigmoid function to its derivative sigmoid_output_to_derivative = function(output) { return( output*(1-output) ) } # training dataset generation # int2binary = binary_dim = 8 largest_number = 2^binary_dim int2binary = function(x) { tail(rev(as.integer(intToBits(x))), binary_dim) } # for (i in 1:largest_number) { # int2binary[i] = binary[i] } # input variables alpha = 0.1 input_dim = 2 hidden_dim = 16 output_dim = 1 # initialize neural network weights synapse_0 = matrix(runif(n = input_dim*hidden_dim, min=-1, max=1), nrow=input_dim) synapse_1 = matrix(runif(n = hidden_dim*output_dim, min=-1, max=1), nrow=hidden_dim) synapse_h = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim) synapse_0_update = matrix(0, nrow = input_dim, ncol = hidden_dim) synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim) synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim) # training logic for (j in 1:10000) { # generate a simple addition problem (a + b = c) a_int = sample(1:(largest_number/2), 1) # int version a = int2binary(a_int) # binary encoding b_int = sample(1:(largest_number/2), 1) # int version b = int2binary(b_int) # true answer c_int = a_int + b_int c = int2binary(c_int) # where we'll store our best guesss (binary encoded) d = matrix(0, nrow = 1, ncol = binary_dim) overallError = 0 layer_2_deltas = matrix(0) layer_1_values = matrix(0, nrow=1, ncol = hidden_dim) # layer_1_values = rbind(layer_1_values, matrix(0, nrow=1, ncol=hidden_dim)) # moving along the positions in the binary encoding for (position in 0:(binary_dim-1)) { # generate input and output X = cbind(a[binary_dim - position],b[binary_dim - position]) y = c[binary_dim - position] # hidden layer (input ~+ prev_hidden) layer_1 = sigmoid((X%*%synapse_0) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h)) # output layer (new binary representation) layer_2 = sigmoid(layer_1 %*% synapse_1) # did we miss?... if so, by how much? layer_2_error = y - layer_2 layer_2_deltas = rbind(layer_2_deltas, layer_2_error * sigmoid_output_to_derivative(layer_2)) overallError = overallError + abs(layer_2_error) # decode estimate so we can print it out d[binary_dim - position] = round(layer_2) # store hidden layer so we can print it out layer_1_values = rbind(layer_1_values, layer_1) } future_layer_1_delta = matrix(0, nrow = 1, ncol = hidden_dim) for (position in 0:(binary_dim-1)) { X = cbind(a[position+1], b[position+1]) layer_1 = layer_1_values[dim(layer_1_values)[1]-position,] prev_layer_1 = layer_1_values[dim(layer_1_values)[1]-(position+1),] # error at output layer layer_2_delta = layer_2_deltas[dim(layer_2_deltas)[1]-position,] # error at hidden layer layer_1_delta = (future_layer_1_delta %*% t(synapse_h) + layer_2_delta %*% t(synapse_1)) * sigmoid_output_to_derivative(layer_1) # let's update all our weights so we can try again synapse_1_update = synapse_1_update + matrix(layer_1) %*% layer_2_delta synapse_h_update = synapse_h_update + matrix(prev_layer_1) %*% layer_1_delta synapse_0_update = synapse_0_update + t(X) %*% layer_1_delta future_layer_1_delta = layer_1_delta } synapse_0 = synapse_0 + ( synapse_0_update * alpha ) synapse_1 = synapse_1 + ( synapse_1_update * alpha ) synapse_h = synapse_h + ( synapse_h_update * alpha ) synapse_0_update = synapse_0_update * 0 synapse_1_update = synapse_1_update * 0 synapse_h_update = synapse_h_update * 0 # print out progress if(j %% 1000 ==0) { print(paste("Error:", overallError)) print(paste("Pred:", paste(d, collapse = " "))) print(paste("True:", paste(c, collapse = " "))) out = 0 for (x in 1:length(d)) { out[x] = rev(d)[x]*2^(x-1) } print(paste(a_int, "+", b_int, "=", sum(out))) print("----------------") } }