Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save RVTR2007/55e9c1bbcf5953e2b1729bb88c1c3898 to your computer and use it in GitHub Desktop.
Save RVTR2007/55e9c1bbcf5953e2b1729bb88c1c3898 to your computer and use it in GitHub Desktop.

Revisions

  1. @fchollet fchollet revised this gist Mar 14, 2017. 1 changed file with 33 additions and 84 deletions.
    117 changes: 33 additions & 84 deletions classifier_from_little_data_script_2.py
    Original file line number Diff line number Diff line change
    @@ -1,10 +1,8 @@
    '''This script goes along the blog post
    "Building powerful image classification models using very little data"
    from blog.keras.io.
    It uses data that can be downloaded at:
    https://www.kaggle.com/c/dogs-vs-cats/data
    In our setup, we:
    - created a data/ folder
    - created train/ and validation/ subfolders inside data/
    @@ -13,9 +11,7 @@
    - put the cat pictures index 1000-1400 in data/validation/cats
    - put the dogs pictures index 12500-13499 in data/train/dogs
    - put the dog pictures index 13500-13900 in data/validation/dogs
    So that we have 1000 training examples for each class, and 400 validation examples for each class.
    In summary, this is our directory structure:
    ```
    data/
    @@ -39,121 +35,74 @@
    ...
    ```
    '''
    import os
    import h5py
    import numpy as np
    from keras.preprocessing.image import ImageDataGenerator
    from keras.models import Sequential
    from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
    from keras.layers import Activation, Dropout, Flatten, Dense
    from keras.layers import Dropout, Flatten, Dense
    from keras import applications

    # path to the model weights file.
    weights_path = '../keras/examples/vgg16_weights.h5'
    top_model_weights_path = 'bottleneck_fc_model.h5'
    # dimensions of our images.
    img_width, img_height = 150, 150

    top_model_weights_path = 'bottleneck_fc_model.h5'
    train_data_dir = 'data/train'
    validation_data_dir = 'data/validation'
    nb_train_samples = 2000
    nb_validation_samples = 800
    nb_epoch = 50
    epochs = 50
    batch_size = 16


    def save_bottlebeck_features():
    datagen = ImageDataGenerator(rescale=1./255)
    datagen = ImageDataGenerator(rescale=1. / 255)

    # build the VGG16 network
    model = Sequential()
    model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height)))

    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    # load the weights of the VGG16 networks
    # (trained on ImageNet, won the ILSVRC competition in 2014)
    # note: when there is a complete match between your model definition
    # and your weight savefile, you can simply call model.load_weights(filename)
    assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).'
    f = h5py.File(weights_path)
    for k in range(f.attrs['nb_layers']):
    if k >= len(model.layers):
    # we don't look at the last (fully-connected) layers in the savefile
    break
    g = f['layer_{}'.format(k)]
    weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
    model.layers[k].set_weights(weights)
    f.close()
    print('Model loaded.')
    model = applications.VGG16(include_top=False, weights='imagenet')

    generator = datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=32,
    class_mode=None,
    shuffle=False)
    bottleneck_features_train = model.predict_generator(generator, nb_train_samples)
    np.save(open('bottleneck_features_train.npy', 'w'), bottleneck_features_train)
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)
    bottleneck_features_train = model.predict_generator(
    generator, nb_train_samples // batch_size)
    np.save(open('bottleneck_features_train.npy', 'w'),
    bottleneck_features_train)

    generator = datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=32,
    class_mode=None,
    shuffle=False)
    bottleneck_features_validation = model.predict_generator(generator, nb_validation_samples)
    np.save(open('bottleneck_features_validation.npy', 'w'), bottleneck_features_validation)
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode=None,
    shuffle=False)
    bottleneck_features_validation = model.predict_generator(
    generator, nb_validation_samples // batch_size)
    np.save(open('bottleneck_features_validation.npy', 'w'),
    bottleneck_features_validation)


    def train_top_model():
    train_data = np.load(open('bottleneck_features_train.npy'))
    train_labels = np.array([0] * (nb_train_samples / 2) + [1] * (nb_train_samples / 2))
    train_labels = np.array(
    [0] * (nb_train_samples / 2) + [1] * (nb_train_samples / 2))

    validation_data = np.load(open('bottleneck_features_validation.npy'))
    validation_labels = np.array([0] * (nb_validation_samples / 2) + [1] * (nb_validation_samples / 2))
    validation_labels = np.array(
    [0] * (nb_validation_samples / 2) + [1] * (nb_validation_samples / 2))

    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])
    model.compile(optimizer='rmsprop',
    loss='binary_crossentropy', metrics=['accuracy'])

    model.fit(train_data, train_labels,
    nb_epoch=nb_epoch, batch_size=32,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(validation_data, validation_labels))
    model.save_weights(top_model_weights_path)

  2. @fchollet fchollet created this gist Jun 6, 2016.
    162 changes: 162 additions & 0 deletions classifier_from_little_data_script_2.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,162 @@
    '''This script goes along the blog post
    "Building powerful image classification models using very little data"
    from blog.keras.io.
    It uses data that can be downloaded at:
    https://www.kaggle.com/c/dogs-vs-cats/data
    In our setup, we:
    - created a data/ folder
    - created train/ and validation/ subfolders inside data/
    - created cats/ and dogs/ subfolders inside train/ and validation/
    - put the cat pictures index 0-999 in data/train/cats
    - put the cat pictures index 1000-1400 in data/validation/cats
    - put the dogs pictures index 12500-13499 in data/train/dogs
    - put the dog pictures index 13500-13900 in data/validation/dogs
    So that we have 1000 training examples for each class, and 400 validation examples for each class.
    In summary, this is our directory structure:
    ```
    data/
    train/
    dogs/
    dog001.jpg
    dog002.jpg
    ...
    cats/
    cat001.jpg
    cat002.jpg
    ...
    validation/
    dogs/
    dog001.jpg
    dog002.jpg
    ...
    cats/
    cat001.jpg
    cat002.jpg
    ...
    ```
    '''
    import os
    import h5py
    import numpy as np
    from keras.preprocessing.image import ImageDataGenerator
    from keras.models import Sequential
    from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
    from keras.layers import Activation, Dropout, Flatten, Dense

    # path to the model weights file.
    weights_path = '../keras/examples/vgg16_weights.h5'
    top_model_weights_path = 'bottleneck_fc_model.h5'
    # dimensions of our images.
    img_width, img_height = 150, 150

    train_data_dir = 'data/train'
    validation_data_dir = 'data/validation'
    nb_train_samples = 2000
    nb_validation_samples = 800
    nb_epoch = 50


    def save_bottlebeck_features():
    datagen = ImageDataGenerator(rescale=1./255)

    # build the VGG16 network
    model = Sequential()
    model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height)))

    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    # load the weights of the VGG16 networks
    # (trained on ImageNet, won the ILSVRC competition in 2014)
    # note: when there is a complete match between your model definition
    # and your weight savefile, you can simply call model.load_weights(filename)
    assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).'
    f = h5py.File(weights_path)
    for k in range(f.attrs['nb_layers']):
    if k >= len(model.layers):
    # we don't look at the last (fully-connected) layers in the savefile
    break
    g = f['layer_{}'.format(k)]
    weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
    model.layers[k].set_weights(weights)
    f.close()
    print('Model loaded.')

    generator = datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_width, img_height),
    batch_size=32,
    class_mode=None,
    shuffle=False)
    bottleneck_features_train = model.predict_generator(generator, nb_train_samples)
    np.save(open('bottleneck_features_train.npy', 'w'), bottleneck_features_train)

    generator = datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_width, img_height),
    batch_size=32,
    class_mode=None,
    shuffle=False)
    bottleneck_features_validation = model.predict_generator(generator, nb_validation_samples)
    np.save(open('bottleneck_features_validation.npy', 'w'), bottleneck_features_validation)


    def train_top_model():
    train_data = np.load(open('bottleneck_features_train.npy'))
    train_labels = np.array([0] * (nb_train_samples / 2) + [1] * (nb_train_samples / 2))

    validation_data = np.load(open('bottleneck_features_validation.npy'))
    validation_labels = np.array([0] * (nb_validation_samples / 2) + [1] * (nb_validation_samples / 2))

    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

    model.fit(train_data, train_labels,
    nb_epoch=nb_epoch, batch_size=32,
    validation_data=(validation_data, validation_labels))
    model.save_weights(top_model_weights_path)


    save_bottlebeck_features()
    train_top_model()