Skip to content

Instantly share code, notes, and snippets.

@mick001
Last active March 13, 2019 08:16
Show Gist options
  • Select an option

  • Save mick001/45a45b94eab29d81a5f1e46d88632053 to your computer and use it in GitHub Desktop.

Select an option

Save mick001/45a45b94eab29d81a5f1e46d88632053 to your computer and use it in GitHub Desktop.

Revisions

  1. mick001 revised this gist Sep 11, 2016. No changes.
  2. mick001 created this gist Sep 11, 2016.
    146 changes: 146 additions & 0 deletions dmlp.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,146 @@
    import tensorflow as tf
    import pandas as pd
    from sklearn.cross_validation import train_test_split

    FILE_PATH = '~/Desktop/bank-add/bank_equalized.csv' # Path to .csv dataset
    raw_data = pd.read_csv(FILE_PATH) # Open raw .csv

    print("Raw data loaded successfully...\n")
    #------------------------------------------------------------------------------
    # Variables

    Y_LABEL = 'y' # Name of the variable to be predicted
    KEYS = [i for i in raw_data.keys().tolist() if i != Y_LABEL] # Name of predictors
    N_INSTANCES = raw_data.shape[0] # Number of instances
    N_INPUT = raw_data.shape[1] - 1 # Input size
    N_CLASSES = raw_data[Y_LABEL].unique().shape[0] # Number of classes (output size)
    TEST_SIZE = 0.1 # Test set size (% of dataset)
    TRAIN_SIZE = int(N_INSTANCES * (1 - TEST_SIZE)) # Train size
    LEARNING_RATE = 0.001 # Learning rate
    TRAINING_EPOCHS = 400 # Number of epochs
    BATCH_SIZE = 100 # Batch size
    DISPLAY_STEP = 20 # Display progress each x epochs
    HIDDEN_SIZE = 200 # Number of hidden neurons 256
    ACTIVATION_FUNCTION_OUT = tf.nn.tanh # Last layer act fct
    STDDEV = 0.1 # Standard deviation (for weights random init)
    RANDOM_STATE = 100 # Random state for train_test_split

    print("Variables loaded successfully...\n")
    print("Number of predictors \t%s" %(N_INPUT))
    print("Number of classes \t%s" %(N_CLASSES))
    print("Number of instances \t%s" %(N_INSTANCES))
    print("\n")
    print("Metrics displayed:\tPrecision\n")
    #------------------------------------------------------------------------------
    # Loading data

    # Load data
    data = raw_data[KEYS].get_values() # X data
    labels = raw_data[Y_LABEL].get_values() # y data

    # One hot encoding for labels
    labels_ = np.zeros((N_INSTANCES, N_CLASSES))
    labels_[np.arange(N_INSTANCES), labels] = 1

    # Train-test split
    data_train, data_test, labels_train, labels_test = train_test_split(data,
    labels_,
    test_size = TEST_SIZE,
    random_state = RANDOM_STATE)

    print("Data loaded and splitted successfully...\n")
    #------------------------------------------------------------------------------
    # Neural net construction

    # Net params
    n_input = N_INPUT # input n labels
    n_hidden_1 = HIDDEN_SIZE # 1st layer
    n_hidden_2 = HIDDEN_SIZE # 2nd layer
    n_hidden_3 = HIDDEN_SIZE # 3rd layer
    n_hidden_4 = HIDDEN_SIZE # 4th layer
    n_classes = N_CLASSES # output m classes

    # Tf placeholders
    X = tf.placeholder(tf.float32, [None, n_input])
    y = tf.placeholder(tf.float32, [None, n_classes])
    dropout_keep_prob = tf.placeholder(tf.float32)


    def mlp(_X, _weights, _biases, dropout_keep_prob):
    layer1 = tf.nn.dropout(tf.nn.tanh(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1'])), dropout_keep_prob)
    layer2 = tf.nn.dropout(tf.nn.tanh(tf.add(tf.matmul(layer1, _weights['h2']), _biases['b2'])), dropout_keep_prob)
    layer3 = tf.nn.dropout(tf.nn.tanh(tf.add(tf.matmul(layer2, _weights['h3']), _biases['b3'])), dropout_keep_prob)
    layer4 = tf.nn.dropout(tf.nn.tanh(tf.add(tf.matmul(layer3, _weights['h4']), _biases['b4'])), dropout_keep_prob)
    out = ACTIVATION_FUNCTION_OUT(tf.add(tf.matmul(layer4, _weights['out']), _biases['out']))
    return out

    weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1],stddev=STDDEV)),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2],stddev=STDDEV)),
    'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3],stddev=STDDEV)),
    'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4],stddev=STDDEV)),
    'out': tf.Variable(tf.random_normal([n_hidden_4, n_classes],stddev=STDDEV)),
    }

    biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'b3': tf.Variable(tf.random_normal([n_hidden_3])),
    'b4': tf.Variable(tf.random_normal([n_hidden_4])),
    'out': tf.Variable(tf.random_normal([n_classes]))
    }

    # Build model
    pred = mlp(X, weights, biases, dropout_keep_prob)

    # Loss and optimizer
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # softmax loss
    optimizer = tf.train.AdamOptimizer(learning_rate = LEARNING_RATE).minimize(cost)

    # Accuracy
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    print("Net built successfully...\n")
    print("Starting training...\n")
    #------------------------------------------------------------------------------
    # Training

    # Initialize variables
    init_all = tf.initialize_all_variables()

    # Launch session
    sess = tf.Session()
    sess.run(init_all)

    # Training loop
    for epoch in range(TRAINING_EPOCHS):
    avg_cost = 0.
    total_batch = int(data_train.shape[0] / BATCH_SIZE)
    # Loop over all batches
    for i in range(total_batch):
    randidx = np.random.randint(int(TRAIN_SIZE), size = BATCH_SIZE)
    batch_xs = data_train[randidx, :]
    batch_ys = labels_train[randidx, :]
    # Fit using batched data
    sess.run(optimizer, feed_dict={X: batch_xs, y: batch_ys, dropout_keep_prob: 0.9})
    # Calculate average cost
    avg_cost += sess.run(cost, feed_dict={X: batch_xs, y: batch_ys, dropout_keep_prob:1.})/total_batch
    # Display progress
    if epoch % DISPLAY_STEP == 0:
    print ("Epoch: %03d/%03d cost: %.9f" % (epoch, TRAINING_EPOCHS, avg_cost))
    train_acc = sess.run(accuracy, feed_dict={X: batch_xs, y: batch_ys, dropout_keep_prob:1.})
    print ("Training accuracy: %.3f" % (train_acc))


    print ("End of training.\n")
    print("Testing...\n")
    #------------------------------------------------------------------------------
    # Testing

    test_acc = sess.run(accuracy, feed_dict={X: data_test, y: labels_test, dropout_keep_prob:1.})
    print ("Test accuracy: %.3f" % (test_acc))

    sess.close()
    print("Session closed!")