Skip to content

Instantly share code, notes, and snippets.

@mick001
Forked from bquast/LSTM.R
Created August 8, 2016 19:54
Show Gist options
  • Select an option

  • Save mick001/38b1eb063bfa783c1b0c32a9cf38de45 to your computer and use it in GitHub Desktop.

Select an option

Save mick001/38b1eb063bfa783c1b0c32a9cf38de45 to your computer and use it in GitHub Desktop.

Revisions

  1. @bquast bquast revised this gist Aug 8, 2016. 1 changed file with 88 additions and 46 deletions.
    134 changes: 88 additions & 46 deletions LSTM.R
    Original file line number Diff line number Diff line change
    @@ -1,3 +1,5 @@
    set.seed(1)

    # define some functions

    ## convert integer to binary
    @@ -12,41 +14,56 @@ int2bin <- function(integer, length=8)
    sigmoid <- function(x, k=1, x0=0)
    1 / (1+exp( -k*(x-x0) ))

    ## derivative
    ## sigmoid derivative
    sigmoid_output_to_derivative <- function(x)
    x*(1-x)

    ## tanh derivative
    tanh_output_to_derivative <- function(x)
    1-x^2

    # create training numbers
    X1 = sample(0:127, 10000, replace=TRUE)
    X2 = sample(0:127, 10000, replace=TRUE)
    X1 = sample(0:1023, 100000, replace=TRUE)
    X2 = sample(0:1023, 100000, replace=TRUE)

    # create training response numbers
    Y <- X1 + X2

    # convert to binary
    X1b <- int2bin(X1, length=8)
    X2b <- int2bin(X2, length=8)
    Yb <- int2bin(Y, length=8)
    X1b <- int2bin(X1, length=10)
    X2b <- int2bin(X2, length=10)
    Yb <- int2bin(Y, length=10)

    # input variables
    alpha = 0.1
    alpha_decay = 0.999
    momentum = 0.1
    init_weight = 1
    batch_size = 20
    input_dim = 2
    hidden_dim = 16
    hidden_dim = 8
    output_dim = 1
    binary_dim = 8
    binary_dim = 10
    largest_number = 2^binary_dim
    output_size = 100



    # initialise neural network weights
    synapse_0_i = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim)
    synapse_0_f = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim)
    synapse_0_o = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim)
    synapse_0_c = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim)
    synapse_1 = matrix(runif(n = hidden_dim*output_dim, min=-1, max=1), nrow=hidden_dim)
    synapse_h_i = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim)
    synapse_h_f = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim)
    synapse_h_o = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim)
    synapse_h_c = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim)
    synapse_0_i = matrix(runif(n = input_dim *hidden_dim, min=-init_weight, max=init_weight), nrow=input_dim)
    synapse_0_f = matrix(runif(n = input_dim *hidden_dim, min=-init_weight, max=init_weight), nrow=input_dim)
    synapse_0_o = matrix(runif(n = input_dim *hidden_dim, min=-init_weight, max=init_weight), nrow=input_dim)
    synapse_0_c = matrix(runif(n = input_dim *hidden_dim, min=-init_weight, max=init_weight), nrow=input_dim)
    synapse_1 = matrix(runif(n = hidden_dim*output_dim, min=-init_weight, max=init_weight), nrow=hidden_dim)
    synapse_h_i = matrix(runif(n = hidden_dim*hidden_dim, min=-init_weight, max=init_weight), nrow=hidden_dim)
    synapse_h_f = matrix(runif(n = hidden_dim*hidden_dim, min=-init_weight, max=init_weight), nrow=hidden_dim)
    synapse_h_o = matrix(runif(n = hidden_dim*hidden_dim, min=-init_weight, max=init_weight), nrow=hidden_dim)
    synapse_h_c = matrix(runif(n = hidden_dim*hidden_dim, min=-init_weight, max=init_weight), nrow=hidden_dim)
    synapse_b_1 = runif(n = output_dim, min=-init_weight, max=init_weight)
    synapse_b_i = runif(n = hidden_dim, min=-init_weight, max=init_weight)
    synapse_b_f = runif(n = hidden_dim, min=-init_weight, max=init_weight)
    synapse_b_o = runif(n = hidden_dim, min=-init_weight, max=init_weight)
    synapse_b_c = runif(n = hidden_dim, min=-init_weight, max=init_weight)

    # initialise state cell
    c_t_m1 = matrix(0, nrow=1, ncol = hidden_dim)
    @@ -61,10 +78,14 @@ synapse_h_i_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
    synapse_h_f_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
    synapse_h_o_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
    synapse_h_c_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
    synapse_b_1_update = rep(0, output_dim)
    synapse_b_i_update = rep(0, hidden_dim)
    synapse_b_f_update = rep(0, hidden_dim)
    synapse_b_o_update = rep(0, hidden_dim)
    synapse_b_c_update = rep(0, hidden_dim)

    # training logic
    for (j in 1:length(X1)) {

    # select input variables
    a = X1b[j,]
    b = X2b[j,]
    @@ -88,21 +109,21 @@ for (j in 1:length(X1)) {
    y = c[position]

    # hidden layer (input ~+ prev_hidden)
    i_t = sigmoid((X%*%synapse_0_i) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_i)) # add bias?
    f_t = sigmoid((X%*%synapse_0_f) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_f)) # add bias?
    o_t = sigmoid((X%*%synapse_0_o) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_o)) # add bias?
    c_in_t = tanh( (X%*%synapse_0_c) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_c))
    i_t = sigmoid((X%*%synapse_0_i) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_i) + synapse_b_i) # add bias?
    f_t = sigmoid((X%*%synapse_0_f) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_f) + synapse_b_f) # add bias?
    o_t = sigmoid((X%*%synapse_0_o) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_o) + synapse_b_o) # add bias?
    c_in_t = tanh( (X%*%synapse_0_c) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_c) + synapse_b_c)
    c_t = (f_t * c_t_m1[dim(layer_1_values)[1],]) + (i_t * c_in_t)
    layer_1 = o_t * tanh(c_t)
    c_t_m1 = rbind(c_t_m1, c_t)

    # output layer (new binary representation)
    layer_2 = sigmoid(layer_1 %*% synapse_1)
    layer_2 = sigmoid(layer_1 %*% synapse_1 + synapse_b_1)

    # did we miss?... if so, by how much?
    layer_2_error = y - layer_2
    layer_2_deltas = rbind(layer_2_deltas, layer_2_error * sigmoid_output_to_derivative(layer_2))
    overallError = overallError + abs(layer_2_error)
    overallError = overallError + round(abs(layer_2_error))

    # decode estimate so we can print it out
    d[position] = round(layer_2)
    @@ -146,37 +167,58 @@ for (j in 1:length(X1)) {
    synapse_0_f_update = synapse_0_f_update + t(X) %*% layer_1_f_delta
    synapse_0_o_update = synapse_0_o_update + t(X) %*% layer_1_o_delta
    synapse_0_c_update = synapse_0_c_update + t(X) %*% layer_1_c_delta
    synapse_b_1_update = synapse_b_1_update + layer_2_delta
    synapse_b_i_update = synapse_b_i_update + layer_1_i_delta
    synapse_b_f_update = synapse_b_f_update + layer_1_f_delta
    synapse_b_o_update = synapse_b_o_update + layer_1_o_delta
    synapse_b_c_update = synapse_b_c_update + layer_1_c_delta

    future_layer_1_i_delta = layer_1_i_delta
    future_layer_1_f_delta = layer_1_f_delta
    future_layer_1_o_delta = layer_1_o_delta
    future_layer_1_c_delta = layer_1_c_delta
    }

    }
    if(j %% batch_size ==0) {
    synapse_0_i = synapse_0_i + ( synapse_0_i_update * alpha )
    synapse_0_f = synapse_0_f + ( synapse_0_f_update * alpha )
    synapse_0_o = synapse_0_o + ( synapse_0_o_update * alpha )
    synapse_0_c = synapse_0_c + ( synapse_0_c_update * alpha )
    synapse_1 = synapse_1 + ( synapse_1_update * alpha )
    synapse_h_i = synapse_h_i + ( synapse_h_i_update * alpha )
    synapse_h_f = synapse_h_f + ( synapse_h_f_update * alpha )
    synapse_h_o = synapse_h_o + ( synapse_h_o_update * alpha )
    synapse_h_c = synapse_h_c + ( synapse_h_c_update * alpha )
    synapse_b_1 = synapse_b_1 + ( synapse_b_1_update * alpha )
    synapse_b_i = synapse_b_i + ( synapse_b_i_update * alpha )
    synapse_b_f = synapse_b_f + ( synapse_b_f_update * alpha )
    synapse_b_o = synapse_b_o + ( synapse_b_o_update * alpha )
    synapse_b_c = synapse_b_c + ( synapse_b_c_update * alpha )

    alpha = alpha * alpha_decay

    synapse_0_i_update = synapse_0_i_update * momentum
    synapse_0_f_update = synapse_0_f_update * momentum
    synapse_0_o_update = synapse_0_o_update * momentum
    synapse_0_c_update = synapse_0_c_update * momentum
    synapse_1_update = synapse_1_update * momentum
    synapse_h_i_update = synapse_h_i_update * momentum
    synapse_h_f_update = synapse_h_f_update * momentum
    synapse_h_o_update = synapse_h_o_update * momentum
    synapse_h_c_update = synapse_h_c_update * momentum
    synapse_b_1_update = synapse_b_1_update * momentum
    synapse_b_i_update = synapse_b_i_update * momentum
    synapse_b_f_update = synapse_b_f_update * momentum
    synapse_b_o_update = synapse_b_o_update * momentum
    synapse_b_c_update = synapse_b_c_update * momentum
    }

    synapse_0_i = synapse_0_i + ( synapse_0_i_update * alpha )
    synapse_0_f = synapse_0_f + ( synapse_0_f_update * alpha )
    synapse_0_o = synapse_0_o + ( synapse_0_o_update * alpha )
    synapse_0_c = synapse_0_c + ( synapse_0_c_update * alpha )
    synapse_1 = synapse_1 + ( synapse_1_update * alpha )
    synapse_h_i = synapse_h_i + ( synapse_h_i_update * alpha )
    synapse_h_f = synapse_h_f + ( synapse_h_f_update * alpha )
    synapse_h_o = synapse_h_o + ( synapse_h_o_update * alpha )
    synapse_h_c = synapse_h_c + ( synapse_h_c_update * alpha )

    synapse_0_i_update = synapse_0_i_update * 0
    synapse_0_f_update = synapse_0_f_update * 0
    synapse_0_o_update = synapse_0_o_update * 0
    synapse_0_c_update = synapse_0_c_update * 0
    synapse_1_update = synapse_1_update * 0
    synapse_h_i_update = synapse_h_i_update * 0
    synapse_h_f_update = synapse_h_f_update * 0
    synapse_h_o_update = synapse_h_o_update * 0
    synapse_h_c_update = synapse_h_c_update * 0

    # print out progress
    if(j %% 1000 ==0) {
    print(paste("Error:", overallError))
    if(j %% output_size ==0) {
    print(paste("Error:", overallError," - alpha:",alpha))
    print(paste("A :", paste(a, collapse = " ")))
    print(paste("B :", paste(b, collapse = " ")))
    print(paste("Pred:", paste(d, collapse = " ")))
    print(paste("True:", paste(c, collapse = " ")))
    out = 0
  2. @bquast bquast revised this gist Aug 6, 2016. 1 changed file with 6 additions and 2 deletions.
    8 changes: 6 additions & 2 deletions LSTM.R
    Original file line number Diff line number Diff line change
    @@ -108,7 +108,9 @@ for (j in 1:length(X1)) {
    d[position] = round(layer_2)

    # store hidden layer so we can print it out
    layer_1_values = rbind(layer_1_values, layer_1) }
    layer_1_values = rbind(layer_1_values, layer_1)

    }

    future_layer_1_i_delta = matrix(0, nrow = 1, ncol = hidden_dim)
    future_layer_1_f_delta = matrix(0, nrow = 1, ncol = hidden_dim)
    @@ -180,4 +182,6 @@ for (j in 1:length(X1)) {
    out = 0
    for (x in 1:length(d)) {
    out[x] = rev(d)[x]*2^(x-1) }
    print("----------------") } }
    print("----------------")
    }
    }
  3. @bquast bquast renamed this gist Jul 22, 2016. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  4. @bquast bquast revised this gist Jul 22, 2016. 1 changed file with 75 additions and 20 deletions.
    95 changes: 75 additions & 20 deletions RNN.R
    Original file line number Diff line number Diff line change
    @@ -37,14 +37,30 @@ binary_dim = 8
    largest_number = 2^binary_dim


    # initialize neural network weights
    synapse_0 = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim)
    synapse_1 = matrix(runif(n = hidden_dim*output_dim, min=-1, max=1), nrow=hidden_dim)
    synapse_h = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim)
    # initialise neural network weights
    synapse_0_i = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim)
    synapse_0_f = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim)
    synapse_0_o = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim)
    synapse_0_c = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim)
    synapse_1 = matrix(runif(n = hidden_dim*output_dim, min=-1, max=1), nrow=hidden_dim)
    synapse_h_i = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim)
    synapse_h_f = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim)
    synapse_h_o = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim)
    synapse_h_c = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim)

    synapse_0_update = matrix(0, nrow = input_dim, ncol = hidden_dim)
    synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim)
    synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
    # initialise state cell
    c_t_m1 = matrix(0, nrow=1, ncol = hidden_dim)

    # initialise synapse updates
    synapse_0_i_update = matrix(0, nrow = input_dim, ncol = hidden_dim)
    synapse_0_f_update = matrix(0, nrow = input_dim, ncol = hidden_dim)
    synapse_0_o_update = matrix(0, nrow = input_dim, ncol = hidden_dim)
    synapse_0_c_update = matrix(0, nrow = input_dim, ncol = hidden_dim)
    synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim)
    synapse_h_i_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
    synapse_h_f_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
    synapse_h_o_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
    synapse_h_c_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)

    # training logic
    for (j in 1:length(X1)) {
    @@ -72,7 +88,13 @@ for (j in 1:length(X1)) {
    y = c[position]

    # hidden layer (input ~+ prev_hidden)
    layer_1 = sigmoid((X%*%synapse_0) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h))
    i_t = sigmoid((X%*%synapse_0_i) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_i)) # add bias?
    f_t = sigmoid((X%*%synapse_0_f) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_f)) # add bias?
    o_t = sigmoid((X%*%synapse_0_o) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_o)) # add bias?
    c_in_t = tanh( (X%*%synapse_0_c) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h_c))
    c_t = (f_t * c_t_m1[dim(layer_1_values)[1],]) + (i_t * c_in_t)
    layer_1 = o_t * tanh(c_t)
    c_t_m1 = rbind(c_t_m1, c_t)

    # output layer (new binary representation)
    layer_2 = sigmoid(layer_1 %*% synapse_1)
    @@ -88,7 +110,10 @@ for (j in 1:length(X1)) {
    # store hidden layer so we can print it out
    layer_1_values = rbind(layer_1_values, layer_1) }

    future_layer_1_delta = matrix(0, nrow = 1, ncol = hidden_dim)
    future_layer_1_i_delta = matrix(0, nrow = 1, ncol = hidden_dim)
    future_layer_1_f_delta = matrix(0, nrow = 1, ncol = hidden_dim)
    future_layer_1_o_delta = matrix(0, nrow = 1, ncol = hidden_dim)
    future_layer_1_c_delta = matrix(0, nrow = 1, ncol = hidden_dim)

    for (position in 1:binary_dim) {

    @@ -99,23 +124,53 @@ for (j in 1:length(X1)) {
    # error at output layer
    layer_2_delta = layer_2_deltas[dim(layer_2_deltas)[1]-(position-1),]
    # error at hidden layer
    layer_1_delta = (future_layer_1_delta %*% t(synapse_h) + layer_2_delta %*% t(synapse_1)) * sigmoid_output_to_derivative(layer_1)
    layer_1_i_delta = (future_layer_1_i_delta %*% t(synapse_h_i) + layer_2_delta %*% t(synapse_1)) *
    sigmoid_output_to_derivative(layer_1)
    layer_1_f_delta = (future_layer_1_f_delta %*% t(synapse_h_f) + layer_2_delta %*% t(synapse_1)) *
    sigmoid_output_to_derivative(layer_1)
    layer_1_o_delta = (future_layer_1_o_delta %*% t(synapse_h_o) + layer_2_delta %*% t(synapse_1)) *
    sigmoid_output_to_derivative(layer_1)
    layer_1_c_delta = (future_layer_1_c_delta %*% t(synapse_h_c) + layer_2_delta %*% t(synapse_1)) *
    sigmoid_output_to_derivative(layer_1)


    # let's update all our weights so we can try again
    synapse_1_update = synapse_1_update + matrix(layer_1) %*% layer_2_delta
    synapse_h_update = synapse_h_update + matrix(prev_layer_1) %*% layer_1_delta
    synapse_0_update = synapse_0_update + t(X) %*% layer_1_delta
    synapse_1_update = synapse_1_update + matrix(layer_1) %*% layer_2_delta
    synapse_h_i_update = synapse_h_i_update + matrix(prev_layer_1) %*% layer_1_i_delta
    synapse_h_f_update = synapse_h_f_update + matrix(prev_layer_1) %*% layer_1_f_delta
    synapse_h_o_update = synapse_h_o_update + matrix(prev_layer_1) %*% layer_1_o_delta
    synapse_h_c_update = synapse_h_c_update + matrix(prev_layer_1) %*% layer_1_c_delta
    synapse_0_i_update = synapse_0_i_update + t(X) %*% layer_1_i_delta
    synapse_0_f_update = synapse_0_f_update + t(X) %*% layer_1_f_delta
    synapse_0_o_update = synapse_0_o_update + t(X) %*% layer_1_o_delta
    synapse_0_c_update = synapse_0_c_update + t(X) %*% layer_1_c_delta

    future_layer_1_delta = layer_1_delta }
    future_layer_1_i_delta = layer_1_i_delta
    future_layer_1_f_delta = layer_1_f_delta
    future_layer_1_o_delta = layer_1_o_delta
    future_layer_1_c_delta = layer_1_c_delta
    }


    synapse_0 = synapse_0 + ( synapse_0_update * alpha )
    synapse_1 = synapse_1 + ( synapse_1_update * alpha )
    synapse_h = synapse_h + ( synapse_h_update * alpha )
    synapse_0_i = synapse_0_i + ( synapse_0_i_update * alpha )
    synapse_0_f = synapse_0_f + ( synapse_0_f_update * alpha )
    synapse_0_o = synapse_0_o + ( synapse_0_o_update * alpha )
    synapse_0_c = synapse_0_c + ( synapse_0_c_update * alpha )
    synapse_1 = synapse_1 + ( synapse_1_update * alpha )
    synapse_h_i = synapse_h_i + ( synapse_h_i_update * alpha )
    synapse_h_f = synapse_h_f + ( synapse_h_f_update * alpha )
    synapse_h_o = synapse_h_o + ( synapse_h_o_update * alpha )
    synapse_h_c = synapse_h_c + ( synapse_h_c_update * alpha )

    synapse_0_update = synapse_0_update * 0
    synapse_1_update = synapse_1_update * 0
    synapse_h_update = synapse_h_update * 0
    synapse_0_i_update = synapse_0_i_update * 0
    synapse_0_f_update = synapse_0_f_update * 0
    synapse_0_o_update = synapse_0_o_update * 0
    synapse_0_c_update = synapse_0_c_update * 0
    synapse_1_update = synapse_1_update * 0
    synapse_h_i_update = synapse_h_i_update * 0
    synapse_h_f_update = synapse_h_f_update * 0
    synapse_h_o_update = synapse_h_o_update * 0
    synapse_h_c_update = synapse_h_c_update * 0

    # print out progress
    if(j %% 1000 ==0) {
  5. @bquast bquast revised this gist Jul 22, 2016. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions RNN.R
    Original file line number Diff line number Diff line change
    @@ -38,11 +38,11 @@ largest_number = 2^binary_dim


    # initialize neural network weights
    synapse_0 = matrix(runif(n = input_dim*hidden_dim, min=-1, max=1), nrow=input_dim)
    synapse_0 = matrix(runif(n = input_dim *hidden_dim, min=-1, max=1), nrow=input_dim)
    synapse_1 = matrix(runif(n = hidden_dim*output_dim, min=-1, max=1), nrow=hidden_dim)
    synapse_h = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim)

    synapse_0_update = matrix(0, nrow = input_dim, ncol = hidden_dim)
    synapse_0_update = matrix(0, nrow = input_dim, ncol = hidden_dim)
    synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim)
    synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)

  6. @bquast bquast revised this gist Jul 22, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion RNN.R
    Original file line number Diff line number Diff line change
    @@ -47,7 +47,7 @@ synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim)
    synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)

    # training logic
    for (j in 1:length(X1b)) {
    for (j in 1:length(X1)) {

    # select input variables
    a = X1b[j,]
  7. @bquast bquast revised this gist Jul 22, 2016. 1 changed file with 3 additions and 7 deletions.
    10 changes: 3 additions & 7 deletions RNN.R
    Original file line number Diff line number Diff line change
    @@ -28,17 +28,13 @@ X1b <- int2bin(X1, length=8)
    X2b <- int2bin(X2, length=8)
    Yb <- int2bin(Y, length=8)

    # training dataset generation
    # int2binary =
    binary_dim = 8

    largest_number = 2^binary_dim

    # input variables
    alpha = 0.1
    input_dim = 2
    hidden_dim = 16
    output_dim = 1
    binary_dim = 8
    largest_number = 2^binary_dim


    # initialize neural network weights
    @@ -51,7 +47,7 @@ synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim)
    synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)

    # training logic
    for (j in 1:length(X1b) {
    for (j in 1:length(X1b)) {

    # select input variables
    a = X1b[j,]
  8. @bquast bquast revised this gist Jul 22, 2016. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion RNN.R
    Original file line number Diff line number Diff line change
    @@ -51,7 +51,7 @@ synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim)
    synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)

    # training logic
    for (j in 1:10000) {
    for (j in 1:length(X1b) {

    # select input variables
    a = X1b[j,]
  9. @bquast bquast renamed this gist Jul 22, 2016. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  10. @bquast bquast revised this gist Jul 22, 2016. 1 changed file with 6 additions and 6 deletions.
    12 changes: 6 additions & 6 deletions OLD-RNN.R
    Original file line number Diff line number Diff line change
    @@ -24,9 +24,9 @@ X2 = sample(0:127, 10000, replace=TRUE)
    Y <- X1 + X2

    # convert to binary
    X1 <- int2bin(X1, length=8)
    X2 <- int2bin(X2, length=8)
    Y <- int2bin(Y, length=8)
    X1b <- int2bin(X1, length=8)
    X2b <- int2bin(X2, length=8)
    Yb <- int2bin(Y, length=8)

    # training dataset generation
    # int2binary =
    @@ -54,11 +54,11 @@ synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
    for (j in 1:10000) {

    # select input variables
    a = X1[j,]
    b = X2[j,]
    a = X1b[j,]
    b = X2b[j,]

    # response variable
    c = Y[j,]
    c = Yb[j,]

    # where we'll store our best guesss (binary encoded)
    d = matrix(0, nrow = 1, ncol = binary_dim)
  11. @bquast bquast revised this gist Jul 22, 2016. 1 changed file with 5 additions and 5 deletions.
    10 changes: 5 additions & 5 deletions OLD-RNN.R
    Original file line number Diff line number Diff line change
    @@ -94,14 +94,14 @@ for (j in 1:10000) {

    future_layer_1_delta = matrix(0, nrow = 1, ncol = hidden_dim)

    for (position in 0:(binary_dim-1)) {
    for (position in 1:binary_dim) {

    X = cbind(a[binary_dim-position], b[binary_dim-position])
    layer_1 = layer_1_values[dim(layer_1_values)[1]-position,]
    prev_layer_1 = layer_1_values[dim(layer_1_values)[1]-(position+1),]
    X = cbind(a[binary_dim-(position-1)], b[binary_dim-(position-1)])
    layer_1 = layer_1_values[dim(layer_1_values)[1]-(position-1),]
    prev_layer_1 = layer_1_values[dim(layer_1_values)[1]-position,]

    # error at output layer
    layer_2_delta = layer_2_deltas[dim(layer_2_deltas)[1]-position,]
    layer_2_delta = layer_2_deltas[dim(layer_2_deltas)[1]-(position-1),]
    # error at hidden layer
    layer_1_delta = (future_layer_1_delta %*% t(synapse_h) + layer_2_delta %*% t(synapse_1)) * sigmoid_output_to_derivative(layer_1)

  12. @bquast bquast revised this gist Jul 22, 2016. 1 changed file with 4 additions and 4 deletions.
    8 changes: 4 additions & 4 deletions OLD-RNN.R
    Original file line number Diff line number Diff line change
    @@ -69,11 +69,11 @@ for (j in 1:10000) {
    layer_1_values = matrix(0, nrow=1, ncol = hidden_dim)

    # moving along the positions in the binary encoding
    for (position in 0:(binary_dim-1)) {
    for (position in 1:binary_dim) {

    # generate input and output
    X = cbind(a[position+1],b[position+1])
    y = c[position+1]
    X = cbind(a[position],b[position])
    y = c[position]

    # hidden layer (input ~+ prev_hidden)
    layer_1 = sigmoid((X%*%synapse_0) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h))
    @@ -87,7 +87,7 @@ for (j in 1:10000) {
    overallError = overallError + abs(layer_2_error)

    # decode estimate so we can print it out
    d[position+1] = round(layer_2)
    d[position] = round(layer_2)

    # store hidden layer so we can print it out
    layer_1_values = rbind(layer_1_values, layer_1) }
  13. @bquast bquast revised this gist Jul 22, 2016. 1 changed file with 5 additions and 6 deletions.
    11 changes: 5 additions & 6 deletions OLD-RNN.R
    Original file line number Diff line number Diff line change
    @@ -2,7 +2,7 @@

    ## convert integer to binary
    i2b <- function(integer, length=8)
    rev(as.numeric(intToBits(integer))[1:length])
    as.numeric(intToBits(integer))[1:length]

    ## apply
    int2bin <- function(integer, length=8)
    @@ -67,14 +67,13 @@ for (j in 1:10000) {

    layer_2_deltas = matrix(0)
    layer_1_values = matrix(0, nrow=1, ncol = hidden_dim)
    # layer_1_values = rbind(layer_1_values, matrix(0, nrow=1, ncol=hidden_dim))

    # moving along the positions in the binary encoding
    for (position in 0:(binary_dim-1)) {

    # generate input and output
    X = cbind(a[binary_dim - position],b[binary_dim - position])
    y = c[binary_dim - position]
    X = cbind(a[position+1],b[position+1])
    y = c[position+1]

    # hidden layer (input ~+ prev_hidden)
    layer_1 = sigmoid((X%*%synapse_0) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h))
    @@ -88,7 +87,7 @@ for (j in 1:10000) {
    overallError = overallError + abs(layer_2_error)

    # decode estimate so we can print it out
    d[binary_dim - position] = round(layer_2)
    d[position+1] = round(layer_2)

    # store hidden layer so we can print it out
    layer_1_values = rbind(layer_1_values, layer_1) }
    @@ -97,7 +96,7 @@ for (j in 1:10000) {

    for (position in 0:(binary_dim-1)) {

    X = cbind(a[position+1], b[position+1])
    X = cbind(a[binary_dim-position], b[binary_dim-position])
    layer_1 = layer_1_values[dim(layer_1_values)[1]-position,]
    prev_layer_1 = layer_1_values[dim(layer_1_values)[1]-(position+1),]

  14. @bquast bquast revised this gist Jul 22, 2016. 1 changed file with 32 additions and 22 deletions.
    54 changes: 32 additions & 22 deletions OLD-RNN.R
    Original file line number Diff line number Diff line change
    @@ -1,23 +1,38 @@
    # compute sigmoid nonlinearity
    sigmoid = function(x) {
    output = 1 / (1+exp(-x))
    return(output) }
    # define some functions

    # convert output of sigmoid function to its derivative
    sigmoid_output_to_derivative = function(output) {
    return( output*(1-output) ) }
    ## convert integer to binary
    i2b <- function(integer, length=8)
    rev(as.numeric(intToBits(integer))[1:length])

    ## apply
    int2bin <- function(integer, length=8)
    t(sapply(integer, i2b, length=length))

    ## sigmoid function
    sigmoid <- function(x, k=1, x0=0)
    1 / (1+exp( -k*(x-x0) ))

    ## derivative
    sigmoid_output_to_derivative <- function(x)
    x*(1-x)

    # create training numbers
    X1 = sample(0:127, 10000, replace=TRUE)
    X2 = sample(0:127, 10000, replace=TRUE)

    # create training response numbers
    Y <- X1 + X2

    # convert to binary
    X1 <- int2bin(X1, length=8)
    X2 <- int2bin(X2, length=8)
    Y <- int2bin(Y, length=8)

    # training dataset generation
    # int2binary =
    binary_dim = 8

    largest_number = 2^binary_dim
    int2binary = function(x) {
    tail(rev(as.integer(intToBits(x))), binary_dim) }
    # for (i in 1:largest_number) {
    # int2binary[i] = binary[i] }


    # input variables
    alpha = 0.1
    @@ -38,16 +53,12 @@ synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)
    # training logic
    for (j in 1:10000) {

    # generate a simple addition problem (a + b = c)
    a_int = sample(1:(largest_number/2), 1) # int version
    a = int2binary(a_int) # binary encoding

    b_int = sample(1:(largest_number/2), 1) # int version
    b = int2binary(b_int)
    # select input variables
    a = X1[j,]
    b = X2[j,]

    # true answer
    c_int = a_int + b_int
    c = int2binary(c_int)
    # response variable
    c = Y[j,]

    # where we'll store our best guesss (binary encoded)
    d = matrix(0, nrow = 1, ncol = binary_dim)
    @@ -119,5 +130,4 @@ for (j in 1:10000) {
    out = 0
    for (x in 1:length(d)) {
    out[x] = rev(d)[x]*2^(x-1) }
    print(paste(a_int, "+", b_int, "=", sum(out)))
    print("----------------") } }
  15. @bquast bquast created this gist Jul 22, 2016.
    123 changes: 123 additions & 0 deletions OLD-RNN.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,123 @@
    # compute sigmoid nonlinearity
    sigmoid = function(x) {
    output = 1 / (1+exp(-x))
    return(output) }

    # convert output of sigmoid function to its derivative
    sigmoid_output_to_derivative = function(output) {
    return( output*(1-output) ) }


    # training dataset generation
    # int2binary =
    binary_dim = 8

    largest_number = 2^binary_dim
    int2binary = function(x) {
    tail(rev(as.integer(intToBits(x))), binary_dim) }
    # for (i in 1:largest_number) {
    # int2binary[i] = binary[i] }


    # input variables
    alpha = 0.1
    input_dim = 2
    hidden_dim = 16
    output_dim = 1


    # initialize neural network weights
    synapse_0 = matrix(runif(n = input_dim*hidden_dim, min=-1, max=1), nrow=input_dim)
    synapse_1 = matrix(runif(n = hidden_dim*output_dim, min=-1, max=1), nrow=hidden_dim)
    synapse_h = matrix(runif(n = hidden_dim*hidden_dim, min=-1, max=1), nrow=hidden_dim)

    synapse_0_update = matrix(0, nrow = input_dim, ncol = hidden_dim)
    synapse_1_update = matrix(0, nrow = hidden_dim, ncol = output_dim)
    synapse_h_update = matrix(0, nrow = hidden_dim, ncol = hidden_dim)

    # training logic
    for (j in 1:10000) {

    # generate a simple addition problem (a + b = c)
    a_int = sample(1:(largest_number/2), 1) # int version
    a = int2binary(a_int) # binary encoding

    b_int = sample(1:(largest_number/2), 1) # int version
    b = int2binary(b_int)

    # true answer
    c_int = a_int + b_int
    c = int2binary(c_int)

    # where we'll store our best guesss (binary encoded)
    d = matrix(0, nrow = 1, ncol = binary_dim)

    overallError = 0

    layer_2_deltas = matrix(0)
    layer_1_values = matrix(0, nrow=1, ncol = hidden_dim)
    # layer_1_values = rbind(layer_1_values, matrix(0, nrow=1, ncol=hidden_dim))

    # moving along the positions in the binary encoding
    for (position in 0:(binary_dim-1)) {

    # generate input and output
    X = cbind(a[binary_dim - position],b[binary_dim - position])
    y = c[binary_dim - position]

    # hidden layer (input ~+ prev_hidden)
    layer_1 = sigmoid((X%*%synapse_0) + (layer_1_values[dim(layer_1_values)[1],] %*% synapse_h))

    # output layer (new binary representation)
    layer_2 = sigmoid(layer_1 %*% synapse_1)

    # did we miss?... if so, by how much?
    layer_2_error = y - layer_2
    layer_2_deltas = rbind(layer_2_deltas, layer_2_error * sigmoid_output_to_derivative(layer_2))
    overallError = overallError + abs(layer_2_error)

    # decode estimate so we can print it out
    d[binary_dim - position] = round(layer_2)

    # store hidden layer so we can print it out
    layer_1_values = rbind(layer_1_values, layer_1) }

    future_layer_1_delta = matrix(0, nrow = 1, ncol = hidden_dim)

    for (position in 0:(binary_dim-1)) {

    X = cbind(a[position+1], b[position+1])
    layer_1 = layer_1_values[dim(layer_1_values)[1]-position,]
    prev_layer_1 = layer_1_values[dim(layer_1_values)[1]-(position+1),]

    # error at output layer
    layer_2_delta = layer_2_deltas[dim(layer_2_deltas)[1]-position,]
    # error at hidden layer
    layer_1_delta = (future_layer_1_delta %*% t(synapse_h) + layer_2_delta %*% t(synapse_1)) * sigmoid_output_to_derivative(layer_1)

    # let's update all our weights so we can try again
    synapse_1_update = synapse_1_update + matrix(layer_1) %*% layer_2_delta
    synapse_h_update = synapse_h_update + matrix(prev_layer_1) %*% layer_1_delta
    synapse_0_update = synapse_0_update + t(X) %*% layer_1_delta

    future_layer_1_delta = layer_1_delta }


    synapse_0 = synapse_0 + ( synapse_0_update * alpha )
    synapse_1 = synapse_1 + ( synapse_1_update * alpha )
    synapse_h = synapse_h + ( synapse_h_update * alpha )

    synapse_0_update = synapse_0_update * 0
    synapse_1_update = synapse_1_update * 0
    synapse_h_update = synapse_h_update * 0

    # print out progress
    if(j %% 1000 ==0) {
    print(paste("Error:", overallError))
    print(paste("Pred:", paste(d, collapse = " ")))
    print(paste("True:", paste(c, collapse = " ")))
    out = 0
    for (x in 1:length(d)) {
    out[x] = rev(d)[x]*2^(x-1) }
    print(paste(a_int, "+", b_int, "=", sum(out)))
    print("----------------") } }