'''Script to go with AppNexus blog post. Taken from and altered from: classifier_from_little_data_script_1.py https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d which appears at https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html by Francois Chollet It uses data that can be downloaded at: https://www.kaggle.com/c/dogs-vs-cats/data The rest of this file was written by Ryan Woodard | AppNexus | Data Science | 2017 ''' # # Original code from Francois Chollet, Keras # import keras from keras import backend as K from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Flatten, Dropout from keras.models import Model # dimensions of our images. img_width, img_height = 150, 150 train_data_dir = 'data/train' validation_data_dir = 'data/validation' nb_train_samples = 2000 nb_validation_samples = 800 epochs = 5 #50 batch_size = 16 # # left branch of model (convnet) # if K.image_data_format() == 'channels_first': input_shape = (3, img_width, img_height) else: input_shape = (img_width, img_height, 3) # This returns a tensor linput = Input(shape=input_shape) x = Conv2D(32, (3, 3), padding='same', activation='relu')(linput) x = MaxPooling2D((2, 2))(x) x = Conv2D(32, (3, 3), padding='same', activation='relu')(x) x = MaxPooling2D((2, 2))(x) x = Conv2D(64, (3, 3), padding='same', activation='relu')(x) x = MaxPooling2D((2, 2))(x) loutput = Flatten()(x) # # right branch of model (simple feature data, design matrix) # nfeatures = 74 # From akmtdfgen.py test_generator() rinput = Input(shape=(nfeatures,), name='rinput') # # Make the merged model. # x = keras.layers.concatenate([loutput, rinput]) x = Dense(64, activation='relu')(x) x = Dense(64, activation='relu')(x) x = Dropout(0.5)(x) # And finally we add the main logistic regression layer main_output = Dense(1, activation='sigmoid', name='main_output')(x) model = Model(inputs=[linput, rinput], outputs=main_output) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) from akmtdfgen import get_demo_data from akmtdfgen import generator_from_df df_train, df_valid = get_demo_data() assert {2} == set([df_train.target.nunique(), df_train.label.nunique(), df_valid.target.nunique(), df_valid.label.nunique()]) ntrain, nvalid = df_train.shape[0], df_valid.shape[0] print(""" Training set: %d images, 2 classes. Validation set: %d images, 2 classes. """ % (ntrain, nvalid)) # lmodel.fit(data, labels) # starts training target_size = (img_width, img_height) train_generator = generator_from_df(df_train, batch_size, target_size, features="mm_features_train_bc") validation_generator = generator_from_df(df_valid, batch_size, target_size, features="mm_features_valid_bc") nbatches_train, mod = divmod(ntrain, batch_size) nbatches_valid, mod = divmod(nvalid, batch_size) nworkers = 10 # Latest Keras 2.0 API: # fit_generator(self, generator, steps_per_epoch, epochs=1, verbose=1, # callbacks=None, validation_data=None, validation_steps=None, # class_weight=None, max_queue_size=10, workers=1, # use_multiprocessing=False, initial_epoch=0) model.fit_generator( train_generator, steps_per_epoch=nbatches_train, epochs=epochs, verbose=2, validation_data=validation_generator, validation_steps=nbatches_valid, workers=nworkers) #lmodel.save_weights('mm_mt_df_gen.h5')