foursking1 · May 20, 2016 03:24
diff --git a/vgg-model b/vgg-model
 # -*- coding: utf-8 -*-

 import numpy as np

 import os
 import glob
 import cv2
 import math
 import pickle
 import datetime
 import pandas as pd

 from sklearn.cross_validation import train_test_split
 from sklearn.cross_validation import KFold
 from keras.models import Sequential
 from keras.layers.core import Dense, Dropout, Flatten
 from keras.layers.convolutional import Convolution2D, MaxPooling2D, \
                                       ZeroPadding2D

 # from keras.layers.normalization import BatchNormalization
 # from keras.optimizers import Adam
 from keras.optimizers import SGD
 from keras.utils import np_utils
 from keras.models import model_from_json
 # from sklearn.metrics import log_loss
 from numpy.random import permutation


 np.random.seed(2016)
 use_cache = 1
 # color type: 1 - grey, 3 - rgb
 color_type_global = 3

 # color_type = 1 - gray
 # color_type = 3 - RGB


 def get_im(path, img_rows, img_cols, color_type=1):
    # Load as grayscale
    if color_type == 1:
        img = cv2.imread(path, 0)
    elif color_type == 3:
        img = cv2.imread(path)
    # Reduce size
    resized = cv2.resize(img, (img_cols, img_rows))
    # mean_pixel = [103.939, 116.799, 123.68]
    # resized = resized.astype(np.float32, copy=False)

    # for c in range(3):
    #    resized[:, :, c] = resized[:, :, c] - mean_pixel[c]
    # resized = resized.transpose((2, 0, 1))
    # resized = np.expand_dims(img, axis=0)
    return resized


 def get_driver_data():
    dr = dict()
    path = os.path.join('..', 'input', 'driver_imgs_list.csv')
    print('Read drivers data')
    f = open(path, 'r')
    line = f.readline()
    while (1):
        line = f.readline()
        if line == '':
            break
        arr = line.strip().split(',')
        dr[arr[2]] = arr[0]
    f.close()
    return dr


 def load_train(img_rows, img_cols, color_type=1):
    X_train = []
    y_train = []
    driver_id = []

    driver_data = get_driver_data()

    print('Read train images')
    for j in range(10):
        print('Load folder c{}'.format(j))
        path = os.path.join('..', 'input', 'imgs', 'train',
                            'c' + str(j), '*.jpg')
        files = glob.glob(path)
        for fl in files:
            flbase = os.path.basename(fl)
            img = get_im(fl, img_rows, img_cols, color_type)
            X_train.append(img)
            y_train.append(j)
            driver_id.append(driver_data[flbase])

    unique_drivers = sorted(list(set(driver_id)))
    print('Unique drivers: {}'.format(len(unique_drivers)))
    print(unique_drivers)
    return X_train, y_train, driver_id, unique_drivers


 def load_test(img_rows, img_cols, color_type=1):
    print('Read test images')
    path = os.path.join('..', 'input', 'imgs', 'test', '*.jpg')
    files = glob.glob(path)
    X_test = []
    X_test_id = []
    total = 0
    thr = math.floor(len(files)/10)
    for fl in files:
        flbase = os.path.basename(fl)
        img = get_im(fl, img_rows, img_cols, color_type)
        X_test.append(img)
        X_test_id.append(flbase)
        total += 1
        if total % thr == 0:
            print('Read {} images from {}'.format(total, len(files)))

    return X_test, X_test_id


 def cache_data(data, path):
    if not os.path.isdir('cache'):
        os.mkdir('cache')
    if os.path.isdir(os.path.dirname(path)):
        file = open(path, 'wb')
        pickle.dump(data, file)
        file.close()
    else:
        print('Directory doesnt exists')


 def restore_data(path):
    data = dict()
    if os.path.isfile(path):
        print('Restore data from pickle........')
        file = open(path, 'rb')
        data = pickle.load(file)
    return data


 def save_model(model, index, cross=''):
    json_string = model.to_json()
    if not os.path.isdir('cache'):
        os.mkdir('cache')
    json_name = 'architecture' + str(index) + cross + '.json'
    weight_name = 'model_weights' + str(index) + cross + '.h5'
    open(os.path.join('cache', json_name), 'w').write(json_string)
    model.save_weights(os.path.join('cache', weight_name), overwrite=True)


 def read_model(index, cross=''):
    json_name = 'architecture' + str(index) + cross + '.json'
    weight_name = 'model_weights' + str(index) + cross + '.h5'
    model = model_from_json(open(os.path.join('cache', json_name)).read())
    model.load_weights(os.path.join('cache', weight_name))
    return model


 def split_validation_set(train, target, test_size):
    random_state = 51
    X_train, X_test, y_train, y_test = \
        train_test_split(train, target,
                         test_size=test_size,
                         random_state=random_state)
    return X_train, X_test, y_train, y_test


 def create_submission(predictions, test_id, info):
    result1 = pd.DataFrame(predictions, columns=['c0', 'c1', 'c2', 'c3',
                                                 'c4', 'c5', 'c6', 'c7',
                                                 'c8', 'c9'])
    result1.loc[:, 'img'] = pd.Series(test_id, index=result1.index)
    now = datetime.datetime.now()
    if not os.path.isdir('subm'):
        os.mkdir('subm')
    suffix = info + '_' + str(now.strftime("%Y-%m-%d-%H-%M"))
    sub_file = os.path.join('subm', 'submission_' + suffix + '.csv')
    result1.to_csv(sub_file, index=False)


 def read_and_normalize_and_shuffle_train_data(img_rows, img_cols,
                                              color_type=1):

    cache_path = os.path.join('cache', 'train_r_' + str(img_rows) +
                              '_c_' + str(img_cols) + '_t_' +
                              str(color_type) + '.dat')

    if not os.path.isfile(cache_path) or use_cache == 0:
        train_data, train_target, driver_id, unique_drivers = \
            load_train(img_rows, img_cols, color_type)
        cache_data((train_data, train_target, driver_id, unique_drivers),
                   cache_path)
    else:
        print('Restore train from cache!')
        (train_data, train_target, driver_id, unique_drivers) = \
            restore_data(cache_path)

    train_data = np.array(train_data, dtype=np.uint8)
    train_target = np.array(train_target, dtype=np.uint8)

    if color_type == 1:
        train_data = train_data.reshape(train_data.shape[0], color_type,
                                        img_rows, img_cols)
    else:
        train_data = train_data.transpose((0, 3, 1, 2))

    train_target = np_utils.to_categorical(train_target, 10)
    train_data = train_data.astype('float32')
    mean_pixel = [103.939, 116.779, 123.68]
    for c in range(3):
        train_data[:, c, :, :] = train_data[:, c, :, :] - mean_pixel[c]
    # train_data /= 255
    perm = permutation(len(train_target))
    train_data = train_data[perm]
    train_target = train_target[perm]
    print('Train shape:', train_data.shape)
    print(train_data.shape[0], 'train samples')
    return train_data, train_target, driver_id, unique_drivers


 def read_and_normalize_test_data(img_rows=224, img_cols=224, color_type=1):
    cache_path = os.path.join('cache', 'test_r_' + str(img_rows) +
                              '_c_' + str(img_cols) + '_t_' +
                              str(color_type) + '.dat')
    if not os.path.isfile(cache_path) or use_cache == 0:
        test_data, test_id = load_test(img_rows, img_cols, color_type)
        cache_data((test_data, test_id), cache_path)
    else:
        print('Restore test from cache!')
        (test_data, test_id) = restore_data(cache_path)

    test_data = np.array(test_data, dtype=np.uint8)

    if color_type == 1:
        test_data = test_data.reshape(test_data.shape[0], color_type,
                                      img_rows, img_cols)
    else:
        test_data = test_data.transpose((0, 3, 1, 2))

    test_data = test_data.astype('float32')
    mean_pixel = [103.939, 116.779, 123.68]
    for c in range(3):
        test_data[:, c, :, :] = test_data[:, c, :, :] - mean_pixel[c]
    # test_data /= 255
    print('Test shape:', test_data.shape)
    print(test_data.shape[0], 'test samples')
    return test_data, test_id


 def dict_to_list(d):
    ret = []
    for i in d.items():
        ret.append(i[1])
    return ret


 def merge_several_folds_mean(data, nfolds):
    a = np.array(data[0])
    for i in range(1, nfolds):
        a += np.array(data[i])
    a /= nfolds
    return a.tolist()


 def merge_several_folds_geom(data, nfolds):
    a = np.array(data[0])
    for i in range(1, nfolds):
        a *= np.array(data[i])
    a = np.power(a, 1/nfolds)
    return a.tolist()


 def copy_selected_drivers(train_data, train_target, driver_id, driver_list):
    data = []
    target = []
    index = []
    for i in range(len(driver_id)):
        if driver_id[i] in driver_list:
            data.append(train_data[i])
            target.append(train_target[i])
            index.append(i)
    data = np.array(data, dtype=np.float32)
    target = np.array(target, dtype=np.float32)
    index = np.array(index, dtype=np.uint32)
    return data, target, index


 def vgg_std16_model(img_rows, img_cols, color_type=1):
    model = Sequential()
    model.add(ZeroPadding2D((1, 1), input_shape=(color_type,
                                                 img_rows, img_cols)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1000, activation='softmax'))

    model.load_weights('../input/vgg16_weights.h5')

    # Code above loads pre-trained data and
    model.layers.pop()
    model.add(Dense(10, activation='softmax'))
    # Learning rate is changed to 0.001
    sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss='categorical_crossentropy')
    return model


 def run_cross_validation(nfolds=10, nb_epoch=10, split=0.2, modelStr=''):

    # Now it loads color image
    # input image dimensions
    img_rows, img_cols = 224, 224
    batch_size = 64
    random_state = 20

    train_data, train_target, driver_id, unique_drivers = \
        read_and_normalize_and_shuffle_train_data(img_rows, img_cols,
                                                  color_type_global)

    # ishuf_train_data = []
    # shuf_train_target = []
    # index_shuf = range(len(train_target))
    # shuffle(index_shuf)
    # for i in index_shuf:
    #     shuf_train_data.append(train_data[i])
    #     shuf_train_target.append(train_target[i])

    # yfull_train = dict()
    # yfull_test = []
    num_fold = 0
    kf = KFold(len(unique_drivers), n_folds=nfolds,
               shuffle=True, random_state=random_state)
    for train_drivers, test_drivers in kf:
        num_fold += 1
        print('Start KFold number {} from {}'.format(num_fold, nfolds))
        # print('Split train: ', len(X_train), len(Y_train))
        # print('Split valid: ', len(X_valid), len(Y_valid))
        # print('Train drivers: ', unique_list_train)
        # print('Test drivers: ', unique_list_valid)
        # model = create_model_v1(img_rows, img_cols, color_type_global)
        # model = vgg_bn_model(img_rows, img_cols, color_type_global)
        model = vgg_std16_model(img_rows, img_cols, color_type_global)

        model.fit(train_data, train_target, batch_size=batch_size,
                  nb_epoch=nb_epoch,
                  show_accuracy=True, verbose=1,
                  validation_split=split, shuffle=True)

        # print('losses: ' + hist.history.losses[-1])

        # print('Score log_loss: ', score[0])

        save_model(model, num_fold, modelStr)

        # predictions_valid = model.predict(X_valid, batch_size=128, verbose=1)
        # score = log_loss(Y_valid, predictions_valid)
        # print('Score log_loss: ', score)
        # Store valid predictions
        # for i in range(len(test_index)):
        #    yfull_train[test_index[i]] = predictions_valid[i]

    print('Start testing............')
    test_data, test_id = read_and_normalize_test_data(img_rows, img_cols,
                                                      color_type_global)
    yfull_test = []

    for index in range(1, num_fold + 1):
        # 1,2,3,4,5
        # Store test predictions
        model = read_model(index, modelStr)
        test_prediction = model.predict(test_data, batch_size=128, verbose=1)
        yfull_test.append(test_prediction)

    info_string = 'loss_' + modelStr \
                  + '_r_' + str(img_rows) \
                  + '_c_' + str(img_cols) \
                  + '_folds_' + str(nfolds) \
                  + '_ep_' + str(nb_epoch)

    test_res = merge_several_folds_mean(yfull_test, nfolds)
    create_submission(test_res, test_id, info_string)


 def test_model_and_submit(start=1, end=1, modelStr=''):
    img_rows, img_cols = 224, 224
    # batch_size = 64
    # random_state = 51
    nb_epoch = 15

    print('Start testing............')
    test_data, test_id = read_and_normalize_test_data(img_rows, img_cols,
                                                      color_type_global)
    yfull_test = []

    for index in range(start, end + 1):
        # Store test predictions
        model = read_model(index, modelStr)
        test_prediction = model.predict(test_data, batch_size=128, verbose=1)
        yfull_test.append(test_prediction)

    info_string = 'loss_' + modelStr \
                  + '_r_' + str(img_rows) \
                  + '_c_' + str(img_cols) \
                  + '_folds_' + str(end - start + 1) \
                  + '_ep_' + str(nb_epoch)

    test_res = merge_several_folds_mean(yfull_test, end - start + 1)
    create_submission(test_res, test_id, info_string)

 # nfolds, nb_epoch, split
 run_cross_validation(2, 20, 0.15, '_vgg_16_2x20')

 # nb_epoch, split
 # run_one_fold_cross_validation(10, 0.1)

 # test_model_and_submit(1, 10, 'high_epoch')
	# -- coding: utf-8 --

	import numpy as np

	import os
	import glob
	import cv2
	import math
	import pickle
	import datetime
	import pandas as pd

	from sklearn.cross_validation import train_test_split
	from sklearn.cross_validation import KFold
	from keras.models import Sequential
	from keras.layers.core import Dense, Dropout, Flatten
	from keras.layers.convolutional import Convolution2D, MaxPooling2D, \
	ZeroPadding2D

	# from keras.layers.normalization import BatchNormalization
	# from keras.optimizers import Adam
	from keras.optimizers import SGD
	from keras.utils import np_utils
	from keras.models import model_from_json
	# from sklearn.metrics import log_loss
	from numpy.random import permutation


	np.random.seed(2016)
	use_cache = 1
	# color type: 1 - grey, 3 - rgb
	color_type_global = 3

	# color_type = 1 - gray
	# color_type = 3 - RGB


	def get_im(path, img_rows, img_cols, color_type=1):
	# Load as grayscale
	if color_type == 1:
	img = cv2.imread(path, 0)
	elif color_type == 3:
	img = cv2.imread(path)
	# Reduce size
	resized = cv2.resize(img, (img_cols, img_rows))
	# mean_pixel = [103.939, 116.799, 123.68]
	# resized = resized.astype(np.float32, copy=False)

	# for c in range(3):
	# resized[:, :, c] = resized[:, :, c] - mean_pixel[c]
	# resized = resized.transpose((2, 0, 1))
	# resized = np.expand_dims(img, axis=0)
	return resized


	def get_driver_data():
	dr = dict()
	path = os.path.join('..', 'input', 'driver_imgs_list.csv')
	print('Read drivers data')
	f = open(path, 'r')
	line = f.readline()
	while (1):
	line = f.readline()
	if line == '':
	break
	arr = line.strip().split(',')
	dr[arr[2]] = arr[0]
	f.close()
	return dr


	def load_train(img_rows, img_cols, color_type=1):
	X_train = []
	y_train = []
	driver_id = []

	driver_data = get_driver_data()

	print('Read train images')
	for j in range(10):
	print('Load folder c{}'.format(j))
	path = os.path.join('..', 'input', 'imgs', 'train',
	'c' + str(j), '*.jpg')
	files = glob.glob(path)
	for fl in files:
	flbase = os.path.basename(fl)
	img = get_im(fl, img_rows, img_cols, color_type)
	X_train.append(img)
	y_train.append(j)
	driver_id.append(driver_data[flbase])

	unique_drivers = sorted(list(set(driver_id)))
	print('Unique drivers: {}'.format(len(unique_drivers)))
	print(unique_drivers)
	return X_train, y_train, driver_id, unique_drivers


	def load_test(img_rows, img_cols, color_type=1):
	print('Read test images')
	path = os.path.join('..', 'input', 'imgs', 'test', '*.jpg')
	files = glob.glob(path)
	X_test = []
	X_test_id = []
	total = 0
	thr = math.floor(len(files)/10)
	for fl in files:
	flbase = os.path.basename(fl)
	img = get_im(fl, img_rows, img_cols, color_type)
	X_test.append(img)
	X_test_id.append(flbase)
	total += 1
	if total % thr == 0:
	print('Read {} images from {}'.format(total, len(files)))

	return X_test, X_test_id


	def cache_data(data, path):
	if not os.path.isdir('cache'):
	os.mkdir('cache')
	if os.path.isdir(os.path.dirname(path)):
	file = open(path, 'wb')
	pickle.dump(data, file)
	file.close()
	else:
	print('Directory doesnt exists')


	def restore_data(path):
	data = dict()
	if os.path.isfile(path):
	print('Restore data from pickle........')
	file = open(path, 'rb')
	data = pickle.load(file)
	return data


	def save_model(model, index, cross=''):
	json_string = model.to_json()
	if not os.path.isdir('cache'):
	os.mkdir('cache')
	json_name = 'architecture' + str(index) + cross + '.json'
	weight_name = 'model_weights' + str(index) + cross + '.h5'
	open(os.path.join('cache', json_name), 'w').write(json_string)
	model.save_weights(os.path.join('cache', weight_name), overwrite=True)


	def read_model(index, cross=''):
	json_name = 'architecture' + str(index) + cross + '.json'
	weight_name = 'model_weights' + str(index) + cross + '.h5'
	model = model_from_json(open(os.path.join('cache', json_name)).read())
	model.load_weights(os.path.join('cache', weight_name))
	return model


	def split_validation_set(train, target, test_size):
	random_state = 51
	X_train, X_test, y_train, y_test = \
	train_test_split(train, target,
	test_size=test_size,
	random_state=random_state)
	return X_train, X_test, y_train, y_test


	def create_submission(predictions, test_id, info):
	result1 = pd.DataFrame(predictions, columns=['c0', 'c1', 'c2', 'c3',
	'c4', 'c5', 'c6', 'c7',
	'c8', 'c9'])
	result1.loc[:, 'img'] = pd.Series(test_id, index=result1.index)
	now = datetime.datetime.now()
	if not os.path.isdir('subm'):
	os.mkdir('subm')
	suffix = info + '_' + str(now.strftime("%Y-%m-%d-%H-%M"))
	sub_file = os.path.join('subm', 'submission_' + suffix + '.csv')
	result1.to_csv(sub_file, index=False)


	def read_and_normalize_and_shuffle_train_data(img_rows, img_cols,
	color_type=1):

	cache_path = os.path.join('cache', 'train_r_' + str(img_rows) +
	'_c_' + str(img_cols) + '_t_' +
	str(color_type) + '.dat')

	if not os.path.isfile(cache_path) or use_cache == 0:
	train_data, train_target, driver_id, unique_drivers = \
	load_train(img_rows, img_cols, color_type)
	cache_data((train_data, train_target, driver_id, unique_drivers),
	cache_path)
	else:
	print('Restore train from cache!')
	(train_data, train_target, driver_id, unique_drivers) = \
	restore_data(cache_path)

	train_data = np.array(train_data, dtype=np.uint8)
	train_target = np.array(train_target, dtype=np.uint8)

	if color_type == 1:
	train_data = train_data.reshape(train_data.shape[0], color_type,
	img_rows, img_cols)
	else:
	train_data = train_data.transpose((0, 3, 1, 2))

	train_target = np_utils.to_categorical(train_target, 10)
	train_data = train_data.astype('float32')
	mean_pixel = [103.939, 116.779, 123.68]
	for c in range(3):
	train_data[:, c, :, :] = train_data[:, c, :, :] - mean_pixel[c]
	# train_data /= 255
	perm = permutation(len(train_target))
	train_data = train_data[perm]
	train_target = train_target[perm]
	print('Train shape:', train_data.shape)
	print(train_data.shape[0], 'train samples')
	return train_data, train_target, driver_id, unique_drivers


	def read_and_normalize_test_data(img_rows=224, img_cols=224, color_type=1):
	cache_path = os.path.join('cache', 'test_r_' + str(img_rows) +
	'_c_' + str(img_cols) + '_t_' +
	str(color_type) + '.dat')
	if not os.path.isfile(cache_path) or use_cache == 0:
	test_data, test_id = load_test(img_rows, img_cols, color_type)
	cache_data((test_data, test_id), cache_path)
	else:
	print('Restore test from cache!')
	(test_data, test_id) = restore_data(cache_path)

	test_data = np.array(test_data, dtype=np.uint8)

	if color_type == 1:
	test_data = test_data.reshape(test_data.shape[0], color_type,
	img_rows, img_cols)
	else:
	test_data = test_data.transpose((0, 3, 1, 2))

	test_data = test_data.astype('float32')
	mean_pixel = [103.939, 116.779, 123.68]
	for c in range(3):
	test_data[:, c, :, :] = test_data[:, c, :, :] - mean_pixel[c]
	# test_data /= 255
	print('Test shape:', test_data.shape)
	print(test_data.shape[0], 'test samples')
	return test_data, test_id


	def dict_to_list(d):
	ret = []
	for i in d.items():
	ret.append(i[1])
	return ret


	def merge_several_folds_mean(data, nfolds):
	a = np.array(data[0])
	for i in range(1, nfolds):
	a += np.array(data[i])
	a /= nfolds
	return a.tolist()


	def merge_several_folds_geom(data, nfolds):
	a = np.array(data[0])
	for i in range(1, nfolds):
	a *= np.array(data[i])
	a = np.power(a, 1/nfolds)
	return a.tolist()


	def copy_selected_drivers(train_data, train_target, driver_id, driver_list):
	data = []
	target = []
	index = []
	for i in range(len(driver_id)):
	if driver_id[i] in driver_list:
	data.append(train_data[i])
	target.append(train_target[i])
	index.append(i)
	data = np.array(data, dtype=np.float32)
	target = np.array(target, dtype=np.float32)
	index = np.array(index, dtype=np.uint32)
	return data, target, index


	def vgg_std16_model(img_rows, img_cols, color_type=1):
	model = Sequential()
	model.add(ZeroPadding2D((1, 1), input_shape=(color_type,
	img_rows, img_cols)))
	model.add(Convolution2D(64, 3, 3, activation='relu'))
	model.add(ZeroPadding2D((1, 1)))
	model.add(Convolution2D(64, 3, 3, activation='relu'))
	model.add(MaxPooling2D((2, 2), strides=(2, 2)))

	model.add(ZeroPadding2D((1, 1)))
	model.add(Convolution2D(128, 3, 3, activation='relu'))
	model.add(ZeroPadding2D((1, 1)))
	model.add(Convolution2D(128, 3, 3, activation='relu'))
	model.add(MaxPooling2D((2, 2), strides=(2, 2)))

	model.add(ZeroPadding2D((1, 1)))
	model.add(Convolution2D(256, 3, 3, activation='relu'))
	model.add(ZeroPadding2D((1, 1)))
	model.add(Convolution2D(256, 3, 3, activation='relu'))
	model.add(ZeroPadding2D((1, 1)))
	model.add(Convolution2D(256, 3, 3, activation='relu'))
	model.add(MaxPooling2D((2, 2), strides=(2, 2)))

	model.add(ZeroPadding2D((1, 1)))
	model.add(Convolution2D(512, 3, 3, activation='relu'))
	model.add(ZeroPadding2D((1, 1)))
	model.add(Convolution2D(512, 3, 3, activation='relu'))
	model.add(ZeroPadding2D((1, 1)))
	model.add(Convolution2D(512, 3, 3, activation='relu'))
	model.add(MaxPooling2D((2, 2), strides=(2, 2)))

	model.add(ZeroPadding2D((1, 1)))
	model.add(Convolution2D(512, 3, 3, activation='relu'))
	model.add(ZeroPadding2D((1, 1)))
	model.add(Convolution2D(512, 3, 3, activation='relu'))
	model.add(ZeroPadding2D((1, 1)))
	model.add(Convolution2D(512, 3, 3, activation='relu'))
	model.add(MaxPooling2D((2, 2), strides=(2, 2)))

	model.add(Flatten())
	model.add(Dense(4096, activation='relu'))
	model.add(Dropout(0.5))
	model.add(Dense(4096, activation='relu'))
	model.add(Dropout(0.5))
	model.add(Dense(1000, activation='softmax'))

	model.load_weights('../input/vgg16_weights.h5')

	# Code above loads pre-trained data and
	model.layers.pop()
	model.add(Dense(10, activation='softmax'))
	# Learning rate is changed to 0.001
	sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
	model.compile(optimizer=sgd, loss='categorical_crossentropy')
	return model


	def run_cross_validation(nfolds=10, nb_epoch=10, split=0.2, modelStr=''):

	# Now it loads color image
	# input image dimensions
	img_rows, img_cols = 224, 224
	batch_size = 64
	random_state = 20

	train_data, train_target, driver_id, unique_drivers = \
	read_and_normalize_and_shuffle_train_data(img_rows, img_cols,
	color_type_global)

	# ishuf_train_data = []
	# shuf_train_target = []
	# index_shuf = range(len(train_target))
	# shuffle(index_shuf)
	# for i in index_shuf:
	# shuf_train_data.append(train_data[i])
	# shuf_train_target.append(train_target[i])

	# yfull_train = dict()
	# yfull_test = []
	num_fold = 0
	kf = KFold(len(unique_drivers), n_folds=nfolds,
	shuffle=True, random_state=random_state)
	for train_drivers, test_drivers in kf:
	num_fold += 1
	print('Start KFold number {} from {}'.format(num_fold, nfolds))
	# print('Split train: ', len(X_train), len(Y_train))
	# print('Split valid: ', len(X_valid), len(Y_valid))
	# print('Train drivers: ', unique_list_train)
	# print('Test drivers: ', unique_list_valid)
	# model = create_model_v1(img_rows, img_cols, color_type_global)
	# model = vgg_bn_model(img_rows, img_cols, color_type_global)
	model = vgg_std16_model(img_rows, img_cols, color_type_global)

	model.fit(train_data, train_target, batch_size=batch_size,
	nb_epoch=nb_epoch,
	show_accuracy=True, verbose=1,
	validation_split=split, shuffle=True)

	# print('losses: ' + hist.history.losses[-1])

	# print('Score log_loss: ', score[0])

	save_model(model, num_fold, modelStr)

	# predictions_valid = model.predict(X_valid, batch_size=128, verbose=1)
	# score = log_loss(Y_valid, predictions_valid)
	# print('Score log_loss: ', score)
	# Store valid predictions
	# for i in range(len(test_index)):
	# yfull_train[test_index[i]] = predictions_valid[i]

	print('Start testing............')
	test_data, test_id = read_and_normalize_test_data(img_rows, img_cols,
	color_type_global)
	yfull_test = []

	for index in range(1, num_fold + 1):
	# 1,2,3,4,5
	# Store test predictions
	model = read_model(index, modelStr)
	test_prediction = model.predict(test_data, batch_size=128, verbose=1)
	yfull_test.append(test_prediction)

	info_string = 'loss_' + modelStr \
	+ '_r_' + str(img_rows) \
	+ '_c_' + str(img_cols) \
	+ '_folds_' + str(nfolds) \
	+ '_ep_' + str(nb_epoch)

	test_res = merge_several_folds_mean(yfull_test, nfolds)
	create_submission(test_res, test_id, info_string)


	def test_model_and_submit(start=1, end=1, modelStr=''):
	img_rows, img_cols = 224, 224
	# batch_size = 64
	# random_state = 51
	nb_epoch = 15

	print('Start testing............')
	test_data, test_id = read_and_normalize_test_data(img_rows, img_cols,
	color_type_global)
	yfull_test = []

	for index in range(start, end + 1):
	# Store test predictions
	model = read_model(index, modelStr)
	test_prediction = model.predict(test_data, batch_size=128, verbose=1)
	yfull_test.append(test_prediction)

	info_string = 'loss_' + modelStr \
	+ '_r_' + str(img_rows) \
	+ '_c_' + str(img_cols) \
	+ '_folds_' + str(end - start + 1) \
	+ '_ep_' + str(nb_epoch)

	test_res = merge_several_folds_mean(yfull_test, end - start + 1)
	create_submission(test_res, test_id, info_string)

	# nfolds, nb_epoch, split
	run_cross_validation(2, 20, 0.15, '_vgg_16_2x20')

	# nb_epoch, split
	# run_one_fold_cross_validation(10, 0.1)

	# test_model_and_submit(1, 10, 'high_epoch')