fchollet · February 26, 2025 01:37 · Mar 14, 2017 · Jun 6, 2016
diff --git a/classifier_from_little_data_script_3.py b/classifier_from_little_data_script_3.py
@@ -1,10 +1,8 @@
 '''This script goes along the blog post
 "Building powerful image classification models using very little data"
 from blog.keras.io.
-
 It uses data that can be downloaded at:
 https://www.kaggle.com/c/dogs-vs-cats/data
-
 In our setup, we:
 - created a data/ folder
 - created train/ and validation/ subfolders inside data/
@@ -13,9 +11,7 @@
 - put the cat pictures index 1000-1400 in data/validation/cats
 - put the dogs pictures index 12500-13499 in data/train/dogs
 - put the dog pictures index 13500-13900 in data/validation/dogs
-
 So that we have 1000 training examples for each class, and 400 validation examples for each class.
-
 In summary, this is our directory structure:
 ```
 data/
@@ -40,80 +36,27 @@
 ```
 '''
 
-import os
-import h5py
-import numpy as np
+from keras import applications
 from keras.preprocessing.image import ImageDataGenerator
 from keras import optimizers
 from keras.models import Sequential
-from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
-from keras.layers import Activation, Dropout, Flatten, Dense
+from keras.layers import Dropout, Flatten, Dense
 
 # path to the model weights files.
 weights_path = '../keras/examples/vgg16_weights.h5'
 top_model_weights_path = 'fc_model.h5'
 # dimensions of our images.
 img_width, img_height = 150, 150
 
-train_data_dir = 'data/train'
-validation_data_dir = 'data/validation'
+train_data_dir = 'cats_and_dogs_small/train'
+validation_data_dir = 'cats_and_dogs_small/validation'
 nb_train_samples = 2000
 nb_validation_samples = 800
-nb_epoch = 50
+epochs = 50
+batch_size = 16
 
 # build the VGG16 network
-model = Sequential()
-model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height)))
-
-model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
-model.add(ZeroPadding2D((1, 1)))
-model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
-model.add(MaxPooling2D((2, 2), strides=(2, 2)))
-
-model.add(ZeroPadding2D((1, 1)))
-model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
-model.add(ZeroPadding2D((1, 1)))
-model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
-model.add(MaxPooling2D((2, 2), strides=(2, 2)))
-
-model.add(ZeroPadding2D((1, 1)))
-model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
-model.add(ZeroPadding2D((1, 1)))
-model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
-model.add(ZeroPadding2D((1, 1)))
-model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
-model.add(MaxPooling2D((2, 2), strides=(2, 2)))
-
-model.add(ZeroPadding2D((1, 1)))
-model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
-model.add(ZeroPadding2D((1, 1)))
-model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
-model.add(ZeroPadding2D((1, 1)))
-model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
-model.add(MaxPooling2D((2, 2), strides=(2, 2)))
-
-model.add(ZeroPadding2D((1, 1)))
-model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
-model.add(ZeroPadding2D((1, 1)))
-model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
-model.add(ZeroPadding2D((1, 1)))
-model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
-model.add(MaxPooling2D((2, 2), strides=(2, 2)))
-
-# load the weights of the VGG16 networks
-# (trained on ImageNet, won the ILSVRC competition in 2014)
-# note: when there is a complete match between your model definition
-# and your weight savefile, you can simply call model.load_weights(filename)
-assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).'
-f = h5py.File(weights_path)
-for k in range(f.attrs['nb_layers']):
-    if k >= len(model.layers):
-        # we don't look at the last (fully-connected) layers in the savefile
-        break
-    g = f['layer_{}'.format(k)]
-    weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
-    model.layers[k].set_weights(weights)
-f.close()
+model = applications.VGG16(weights='imagenet', include_top=False)
 print('Model loaded.')
 
 # build a classifier model to put on top of the convolutional model
@@ -144,29 +87,29 @@
 
 # prepare data augmentation configuration
 train_datagen = ImageDataGenerator(
-        rescale=1./255,
-        shear_range=0.2,
-        zoom_range=0.2,
-        horizontal_flip=True)
+    rescale=1. / 255,
+    shear_range=0.2,
+    zoom_range=0.2,
+    horizontal_flip=True)
 
-test_datagen = ImageDataGenerator(rescale=1./255)
+test_datagen = ImageDataGenerator(rescale=1. / 255)
 
 train_generator = train_datagen.flow_from_directory(
-        train_data_dir,
-        target_size=(img_height, img_width),
-        batch_size=32,
-        class_mode='binary')
+    train_data_dir,
+    target_size=(img_height, img_width),
+    batch_size=batch_size,
+    class_mode='binary')
 
 validation_generator = test_datagen.flow_from_directory(
-        validation_data_dir,
-        target_size=(img_height, img_width),
-        batch_size=32,
-        class_mode='binary')
+    validation_data_dir,
+    target_size=(img_height, img_width),
+    batch_size=batch_size,
+    class_mode='binary')
 
 # fine-tune the model
 model.fit_generator(
-        train_generator,
-        samples_per_epoch=nb_train_samples,
-        nb_epoch=nb_epoch,
-        validation_data=validation_generator,
-        nb_val_samples=nb_validation_samples)
+    train_generator,
+    samples_per_epoch=nb_train_samples,
+    epochs=epochs,
+    validation_data=validation_generator,
+    nb_val_samples=nb_validation_samples)
diff --git a/classifier_from_little_data_script_3.py b/classifier_from_little_data_script_3.py
@@ -0,0 +1,172 @@
+'''This script goes along the blog post
+"Building powerful image classification models using very little data"
+from blog.keras.io.
+
+It uses data that can be downloaded at:
+https://www.kaggle.com/c/dogs-vs-cats/data
+
+In our setup, we:
+- created a data/ folder
+- created train/ and validation/ subfolders inside data/
+- created cats/ and dogs/ subfolders inside train/ and validation/
+- put the cat pictures index 0-999 in data/train/cats
+- put the cat pictures index 1000-1400 in data/validation/cats
+- put the dogs pictures index 12500-13499 in data/train/dogs
+- put the dog pictures index 13500-13900 in data/validation/dogs
+
+So that we have 1000 training examples for each class, and 400 validation examples for each class.
+
+In summary, this is our directory structure:
+```
+data/
+    train/
+        dogs/
+            dog001.jpg
+            dog002.jpg
+            ...
+        cats/
+            cat001.jpg
+            cat002.jpg
+            ...
+    validation/
+        dogs/
+            dog001.jpg
+            dog002.jpg
+            ...
+        cats/
+            cat001.jpg
+            cat002.jpg
+            ...
+```
+'''
+
+import os
+import h5py
+import numpy as np
+from keras.preprocessing.image import ImageDataGenerator
+from keras import optimizers
+from keras.models import Sequential
+from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
+from keras.layers import Activation, Dropout, Flatten, Dense
+
+# path to the model weights files.
+weights_path = '../keras/examples/vgg16_weights.h5'
+top_model_weights_path = 'fc_model.h5'
+# dimensions of our images.
+img_width, img_height = 150, 150
+
+train_data_dir = 'data/train'
+validation_data_dir = 'data/validation'
+nb_train_samples = 2000
+nb_validation_samples = 800
+nb_epoch = 50
+
+# build the VGG16 network
+model = Sequential()
+model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height)))
+
+model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+# load the weights of the VGG16 networks
+# (trained on ImageNet, won the ILSVRC competition in 2014)
+# note: when there is a complete match between your model definition
+# and your weight savefile, you can simply call model.load_weights(filename)
+assert os.path.exists(weights_path), 'Model weights not found (see "weights_path" variable in script).'
+f = h5py.File(weights_path)
+for k in range(f.attrs['nb_layers']):
+    if k >= len(model.layers):
+        # we don't look at the last (fully-connected) layers in the savefile
+        break
+    g = f['layer_{}'.format(k)]
+    weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
+    model.layers[k].set_weights(weights)
+f.close()
+print('Model loaded.')
+
+# build a classifier model to put on top of the convolutional model
+top_model = Sequential()
+top_model.add(Flatten(input_shape=model.output_shape[1:]))
+top_model.add(Dense(256, activation='relu'))
+top_model.add(Dropout(0.5))
+top_model.add(Dense(1, activation='sigmoid'))
+
+# note that it is necessary to start with a fully-trained
+# classifier, including the top classifier,
+# in order to successfully do fine-tuning
+top_model.load_weights(top_model_weights_path)
+
+# add the model on top of the convolutional base
+model.add(top_model)
+
+# set the first 25 layers (up to the last conv block)
+# to non-trainable (weights will not be updated)
+for layer in model.layers[:25]:
+    layer.trainable = False
+
+# compile the model with a SGD/momentum optimizer
+# and a very slow learning rate.
+model.compile(loss='binary_crossentropy',
+              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
+              metrics=['accuracy'])
+
+# prepare data augmentation configuration
+train_datagen = ImageDataGenerator(
+        rescale=1./255,
+        shear_range=0.2,
+        zoom_range=0.2,
+        horizontal_flip=True)
+
+test_datagen = ImageDataGenerator(rescale=1./255)
+
+train_generator = train_datagen.flow_from_directory(
+        train_data_dir,
+        target_size=(img_height, img_width),
+        batch_size=32,
+        class_mode='binary')
+
+validation_generator = test_datagen.flow_from_directory(
+        validation_data_dir,
+        target_size=(img_height, img_width),
+        batch_size=32,
+        class_mode='binary')
+
+# fine-tune the model
+model.fit_generator(
+        train_generator,
+        samples_per_epoch=nb_train_samples,
+        nb_epoch=nb_epoch,
+        validation_data=validation_generator,
+        nb_val_samples=nb_validation_samples)