Skip to content

Instantly share code, notes, and snippets.

@pyben
Last active December 22, 2016 08:11
Show Gist options
  • Save pyben/41d12f33c9a4717e3adf to your computer and use it in GitHub Desktop.
Save pyben/41d12f33c9a4717e3adf to your computer and use it in GitHub Desktop.

Revisions

  1. pyben revised this gist Jan 17, 2016. 1 changed file with 40 additions and 34 deletions.
    74 changes: 40 additions & 34 deletions sda.py
    Original file line number Diff line number Diff line change
    @@ -1,38 +1,42 @@
    from __future__ import absolute_import
    from __future__ import print_function
    import numpy as np
    np.random.seed(1337) # for reproducibility

    from keras.datasets import mnist
    from keras.datasets import imdb
    from keras.preprocessing.sequence import pad_sequences
    from keras.models import Sequential
    from keras.layers import containers
    from keras.layers.core import Dense, AutoEncoder
    from keras.layers.noise import GaussianNoise
    from keras.layers.core import Dense, AutoEncoder
    from keras.utils import np_utils
    from sklearn.metrics import (precision_score, recall_score,
    f1_score, accuracy_score)

    np.random.seed(1337)

    max_len = 800
    max_words = 20000
    batch_size = 64
    nb_classes = 10
    nb_epoch = 0
    nb_hidden_layers = [784, 600, 500, 400]
    nb_noise_layers = [0.5, 0.2, 0.1, ]

    # the data, shuffled and split between train and test sets
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = X_train.reshape(-1, 784)
    X_test = X_test.reshape(-1, 784)
    X_train = X_train.astype("float32") / 255.0
    X_test = X_test.astype("float32") / 255.0
    print(X_train.shape[0], 'train samples')
    print(X_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    nb_classes = 2
    nb_epoch = 2
    nb_hidden_layers = [800, 500, 300, 100]
    nb_noise_layers = [0.6, 0.4, 0.3, ]

    (X_train, y_train), (X_test, y_test) = \
    imdb.load_data(nb_words=max_words, test_split=0.2)

    X_train = pad_sequences(X_train, maxlen=max_len, dtype='float32')
    X_test = pad_sequences(X_test, maxlen=max_len, dtype='float32')
    X_train_tmp = np.copy(X_train)

    y_train = np.asarray(y_train)
    y_test = np.asarray(y_test)

    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)

    # Layer-wise pre-training
    print('Train: {}'.format(X_train.shape))
    print('Test: {}'.format(X_test.shape))

    trained_encoders = []
    X_train_tmp = X_train
    for i, (n_in, n_out) in enumerate(
    zip(nb_hidden_layers[:-1], nb_hidden_layers[1:]), start=1):

    @@ -42,34 +46,36 @@
    ae = Sequential()
    encoder = containers.Sequential([
    GaussianNoise(nb_noise_layers[i - 1], input_shape=(n_in,)),
    Dense(input_dim=n_in, output_dim=n_out,
    init='uniform', activation='sigmoid')
    ])
    decoder = containers.Sequential([
    Dense(input_dim=n_out, output_dim=n_in, activation='sigmoid')
    Dense(input_dim=n_in, output_dim=n_out, activation='sigmoid'),
    ])
    decoder = Dense(input_dim=n_out, output_dim=n_in, activation='sigmoid')
    ae.add(AutoEncoder(encoder=encoder, decoder=decoder,
    output_reconstruction=False))
    ae.compile(loss='mean_squared_error', optimizer='rmsprop')
    ae.fit(X_train_tmp, X_train_tmp, batch_size=batch_size, nb_epoch=nb_epoch)
    # Store trainined weight
    trained_encoders.append(ae.layers[0].encoder)
    # Update training data
    X_train_tmp = ae.predict(X_train_tmp)

    # Fine-tuning
    print('Fine-tuning')
    model = Sequential()
    for encoder in trained_encoders:
    model.add(encoder)

    model.add(Dense(input_dim=nb_hidden_layers[-1],
    output_dim=nb_classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
    show_accuracy=True, validation_data=(X_test, Y_test))
    score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0)

    print('Test score:', score[0])
    print('Test accuracy:', score[1])
    y_pred = model.predict_classes(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)

    print('Accuracy: {}'.format(accuracy))
    print('Recall: {}'.format(recall))
    print('Precision: {}'.format(precision))
    print('F1: {}'.format(f1))
  2. pyben created this gist Jan 13, 2016.
    75 changes: 75 additions & 0 deletions sda.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,75 @@
    from __future__ import absolute_import
    from __future__ import print_function
    import numpy as np
    np.random.seed(1337) # for reproducibility

    from keras.datasets import mnist
    from keras.models import Sequential
    from keras.layers import containers
    from keras.layers.core import Dense, AutoEncoder
    from keras.layers.noise import GaussianNoise
    from keras.utils import np_utils


    batch_size = 64
    nb_classes = 10
    nb_epoch = 0
    nb_hidden_layers = [784, 600, 500, 400]
    nb_noise_layers = [0.5, 0.2, 0.1, ]

    # the data, shuffled and split between train and test sets
    (X_train, y_train), (X_test, y_test) = mnist.load_data()
    X_train = X_train.reshape(-1, 784)
    X_test = X_test.reshape(-1, 784)
    X_train = X_train.astype("float32") / 255.0
    X_test = X_test.astype("float32") / 255.0
    print(X_train.shape[0], 'train samples')
    print(X_test.shape[0], 'test samples')

    # convert class vectors to binary class matrices
    Y_train = np_utils.to_categorical(y_train, nb_classes)
    Y_test = np_utils.to_categorical(y_test, nb_classes)

    # Layer-wise pre-training
    trained_encoders = []
    X_train_tmp = X_train
    for i, (n_in, n_out) in enumerate(
    zip(nb_hidden_layers[:-1], nb_hidden_layers[1:]), start=1):

    print('Pre-training the layer: Input {} -> Output {}'
    .format(n_in, n_out))

    ae = Sequential()
    encoder = containers.Sequential([
    GaussianNoise(nb_noise_layers[i - 1], input_shape=(n_in,)),
    Dense(input_dim=n_in, output_dim=n_out,
    init='uniform', activation='sigmoid')
    ])
    decoder = containers.Sequential([
    Dense(input_dim=n_out, output_dim=n_in, activation='sigmoid')
    ])
    ae.add(AutoEncoder(encoder=encoder, decoder=decoder,
    output_reconstruction=False))
    ae.compile(loss='mean_squared_error', optimizer='rmsprop')
    ae.fit(X_train_tmp, X_train_tmp, batch_size=batch_size, nb_epoch=nb_epoch)
    # Store trainined weight
    trained_encoders.append(ae.layers[0].encoder)
    # Update training data
    X_train_tmp = ae.predict(X_train_tmp)

    # Fine-tuning
    print('Fine-tuning')
    model = Sequential()
    for encoder in trained_encoders:
    model.add(encoder)
    model.add(Dense(input_dim=nb_hidden_layers[-1],
    output_dim=nb_classes, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

    model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch,
    show_accuracy=True, validation_data=(X_test, Y_test))
    score = model.evaluate(X_test, Y_test, show_accuracy=True, verbose=0)

    print('Test score:', score[0])
    print('Test accuracy:', score[1])