Last active
March 13, 2019 08:16
-
-
Save mick001/45a45b94eab29d81a5f1e46d88632053 to your computer and use it in GitHub Desktop.
Revisions
-
mick001 revised this gist
Sep 11, 2016 . No changes.There are no files selected for viewing
-
mick001 created this gist
Sep 11, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,146 @@ import tensorflow as tf import pandas as pd from sklearn.cross_validation import train_test_split FILE_PATH = '~/Desktop/bank-add/bank_equalized.csv' # Path to .csv dataset raw_data = pd.read_csv(FILE_PATH) # Open raw .csv print("Raw data loaded successfully...\n") #------------------------------------------------------------------------------ # Variables Y_LABEL = 'y' # Name of the variable to be predicted KEYS = [i for i in raw_data.keys().tolist() if i != Y_LABEL] # Name of predictors N_INSTANCES = raw_data.shape[0] # Number of instances N_INPUT = raw_data.shape[1] - 1 # Input size N_CLASSES = raw_data[Y_LABEL].unique().shape[0] # Number of classes (output size) TEST_SIZE = 0.1 # Test set size (% of dataset) TRAIN_SIZE = int(N_INSTANCES * (1 - TEST_SIZE)) # Train size LEARNING_RATE = 0.001 # Learning rate TRAINING_EPOCHS = 400 # Number of epochs BATCH_SIZE = 100 # Batch size DISPLAY_STEP = 20 # Display progress each x epochs HIDDEN_SIZE = 200 # Number of hidden neurons 256 ACTIVATION_FUNCTION_OUT = tf.nn.tanh # Last layer act fct STDDEV = 0.1 # Standard deviation (for weights random init) RANDOM_STATE = 100 # Random state for train_test_split print("Variables loaded successfully...\n") print("Number of predictors \t%s" %(N_INPUT)) print("Number of classes \t%s" %(N_CLASSES)) print("Number of instances \t%s" %(N_INSTANCES)) print("\n") print("Metrics displayed:\tPrecision\n") #------------------------------------------------------------------------------ # Loading data # Load data data = raw_data[KEYS].get_values() # X data labels = raw_data[Y_LABEL].get_values() # y data # One hot encoding for labels labels_ = np.zeros((N_INSTANCES, N_CLASSES)) labels_[np.arange(N_INSTANCES), labels] = 1 # Train-test split data_train, data_test, labels_train, labels_test = train_test_split(data, labels_, test_size = TEST_SIZE, random_state = RANDOM_STATE) print("Data loaded and splitted successfully...\n") #------------------------------------------------------------------------------ # Neural net construction # Net params n_input = N_INPUT # input n labels n_hidden_1 = HIDDEN_SIZE # 1st layer n_hidden_2 = HIDDEN_SIZE # 2nd layer n_hidden_3 = HIDDEN_SIZE # 3rd layer n_hidden_4 = HIDDEN_SIZE # 4th layer n_classes = N_CLASSES # output m classes # Tf placeholders X = tf.placeholder(tf.float32, [None, n_input]) y = tf.placeholder(tf.float32, [None, n_classes]) dropout_keep_prob = tf.placeholder(tf.float32) def mlp(_X, _weights, _biases, dropout_keep_prob): layer1 = tf.nn.dropout(tf.nn.tanh(tf.add(tf.matmul(_X, _weights['h1']), _biases['b1'])), dropout_keep_prob) layer2 = tf.nn.dropout(tf.nn.tanh(tf.add(tf.matmul(layer1, _weights['h2']), _biases['b2'])), dropout_keep_prob) layer3 = tf.nn.dropout(tf.nn.tanh(tf.add(tf.matmul(layer2, _weights['h3']), _biases['b3'])), dropout_keep_prob) layer4 = tf.nn.dropout(tf.nn.tanh(tf.add(tf.matmul(layer3, _weights['h4']), _biases['b4'])), dropout_keep_prob) out = ACTIVATION_FUNCTION_OUT(tf.add(tf.matmul(layer4, _weights['out']), _biases['out'])) return out weights = { 'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1],stddev=STDDEV)), 'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2],stddev=STDDEV)), 'h3': tf.Variable(tf.random_normal([n_hidden_2, n_hidden_3],stddev=STDDEV)), 'h4': tf.Variable(tf.random_normal([n_hidden_3, n_hidden_4],stddev=STDDEV)), 'out': tf.Variable(tf.random_normal([n_hidden_4, n_classes],stddev=STDDEV)), } biases = { 'b1': tf.Variable(tf.random_normal([n_hidden_1])), 'b2': tf.Variable(tf.random_normal([n_hidden_2])), 'b3': tf.Variable(tf.random_normal([n_hidden_3])), 'b4': tf.Variable(tf.random_normal([n_hidden_4])), 'out': tf.Variable(tf.random_normal([n_classes])) } # Build model pred = mlp(X, weights, biases, dropout_keep_prob) # Loss and optimizer cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) # softmax loss optimizer = tf.train.AdamOptimizer(learning_rate = LEARNING_RATE).minimize(cost) # Accuracy correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print("Net built successfully...\n") print("Starting training...\n") #------------------------------------------------------------------------------ # Training # Initialize variables init_all = tf.initialize_all_variables() # Launch session sess = tf.Session() sess.run(init_all) # Training loop for epoch in range(TRAINING_EPOCHS): avg_cost = 0. total_batch = int(data_train.shape[0] / BATCH_SIZE) # Loop over all batches for i in range(total_batch): randidx = np.random.randint(int(TRAIN_SIZE), size = BATCH_SIZE) batch_xs = data_train[randidx, :] batch_ys = labels_train[randidx, :] # Fit using batched data sess.run(optimizer, feed_dict={X: batch_xs, y: batch_ys, dropout_keep_prob: 0.9}) # Calculate average cost avg_cost += sess.run(cost, feed_dict={X: batch_xs, y: batch_ys, dropout_keep_prob:1.})/total_batch # Display progress if epoch % DISPLAY_STEP == 0: print ("Epoch: %03d/%03d cost: %.9f" % (epoch, TRAINING_EPOCHS, avg_cost)) train_acc = sess.run(accuracy, feed_dict={X: batch_xs, y: batch_ys, dropout_keep_prob:1.}) print ("Training accuracy: %.3f" % (train_acc)) print ("End of training.\n") print("Testing...\n") #------------------------------------------------------------------------------ # Testing test_acc = sess.run(accuracy, feed_dict={X: data_test, y: labels_test, dropout_keep_prob:1.}) print ("Test accuracy: %.3f" % (test_acc)) sess.close() print("Session closed!")