############################################################ # BEGIN ############################################################ name: "ScenesSiamese" ############################################################ # DATA LAYERS ############################################################ # Train data layers ############################################################ layer { name: "data_image" type: "ImageData" top: "image" top: "label_image" include { phase: TRAIN } image_data_param { source: "/raid/lbaraldi/scene/bbc_train_image.txt" batch_size: 100 } transform_param { mirror: true crop_size: 227 mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" } } layer { name: "data_image_p" type: "ImageData" top: "image_p" top: "label_image_p" include { phase: TRAIN } image_data_param { source: "/raid/lbaraldi/scene/bbc_train_image_p.txt" batch_size: 100 } transform_param { mirror: true crop_size: 227 mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" } } layer { name: "data_multimodal" type: "HDF5Data" top: "time" top: "time_p" top: "shot_id" top: "shot_id_p" top: "histograms" top: "histograms_p" top: "label" include { phase: TRAIN } hdf5_data_param { source: "/raid/lbaraldi/scene/bbc_train_h5.txt" batch_size: 100 } } ############################################################ # Test data layers ############################################################ layer { name: "data_image" type: "ImageData" top: "image" top: "label_image" include { phase: TEST } image_data_param { source: "/raid/lbaraldi/scene/bbc_test_image.txt" batch_size: 100 } transform_param { mirror: true crop_size: 227 mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" } } layer { name: "data_image_p" type: "ImageData" top: "image_p" top: "label_image_p" include { phase: TEST } image_data_param { source: "/raid/lbaraldi/scene/bbc_test_image_p.txt" batch_size: 100 } transform_param { mirror: true crop_size: 227 mean_file: "data/ilsvrc12/imagenet_mean.binaryproto" } } layer { name: "data_multimodal" type: "HDF5Data" top: "time" top: "time_p" top: "shot_id" top: "shot_id_p" top: "histograms" top: "histograms_p" top: "label" include { phase: TEST } hdf5_data_param { source: "/raid/lbaraldi/scene/bbc_test_h5.txt" batch_size: 100 } } ############################################################ # FIRST CONVOLUTIONAL NEURAL NETWORK ############################################################ layer { name: "conv1" type: "Convolution" bottom: "image" top: "conv1" param { name: "conv1_w" lr_mult: 1 decay_mult: 1 } param { name: "conv1_b" lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 96 kernel_size: 11 stride: 4 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu1" type: "ReLU" bottom: "conv1" top: "conv1" } layer { name: "pool1" type: "Pooling" bottom: "conv1" top: "pool1" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm1" type: "LRN" bottom: "pool1" top: "norm1" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv2" type: "Convolution" bottom: "norm1" top: "conv2" param { name: "conv2_w" lr_mult: 1 decay_mult: 1 } param { name: "conv2_b" lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu2" type: "ReLU" bottom: "conv2" top: "conv2" } layer { name: "pool2" type: "Pooling" bottom: "conv2" top: "pool2" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm2" type: "LRN" bottom: "pool2" top: "norm2" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv3" type: "Convolution" bottom: "norm2" top: "conv3" param { name: "conv3_w" lr_mult: 1 decay_mult: 1 } param { name: "conv3_b" lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu3" type: "ReLU" bottom: "conv3" top: "conv3" } layer { name: "conv4" type: "Convolution" bottom: "conv3" top: "conv4" param { name: "conv4_w" lr_mult: 1 decay_mult: 1 } param { name: "conv4_b" lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu4" type: "ReLU" bottom: "conv4" top: "conv4" } layer { name: "conv5" type: "Convolution" bottom: "conv4" top: "conv5" param { name: "conv5_w" lr_mult: 1 decay_mult: 1 } param { name: "conv5_b" lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu5" type: "ReLU" bottom: "conv5" top: "conv5" } layer { name: "pool5" type: "Pooling" bottom: "conv5" top: "pool5" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "fc6" type: "InnerProduct" bottom: "pool5" top: "fc6" param { name: "fc6_w" lr_mult: 1 decay_mult: 1 } param { name: "fc6_b" lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu6" type: "ReLU" bottom: "fc6" top: "fc6" } layer { name: "drop6" type: "Dropout" bottom: "fc6" top: "fc6" dropout_param { dropout_ratio: 0.5 } } layer { name: "fc7" type: "InnerProduct" bottom: "fc6" top: "fc7" param { name: "fc7_w" lr_mult: 1 decay_mult: 1 } param { name: "fc7_b" lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu7" type: "ReLU" bottom: "fc7" top: "fc7" } layer { name: "drop7" type: "Dropout" bottom: "fc7" top: "fc7" dropout_param { dropout_ratio: 0.5 } } layer { name: "fc8" type: "InnerProduct" bottom: "fc7" top: "fc8" param { name: "fc8_w" lr_mult: 1 decay_mult: 1 } param { name: "fc8_b" lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 1183 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "fc8_relu" type: "ReLU" bottom: "fc8" top: "fc8" } ############################################################ # SECOND CONVOLUTIONAL NEURAL NETWORK ############################################################ layer { name: "conv1_p" type: "Convolution" bottom: "image_p" top: "conv1_p" param { name: "conv1_w" lr_mult: 1 decay_mult: 1 } param { name: "conv1_b" lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 96 kernel_size: 11 stride: 4 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu1_p" type: "ReLU" bottom: "conv1_p" top: "conv1_p" } layer { name: "pool1_p" type: "Pooling" bottom: "conv1_p" top: "pool1_p" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm1_p" type: "LRN" bottom: "pool1_p" top: "norm1_p" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv2_p" type: "Convolution" bottom: "norm1_p" top: "conv2_p" param { name: "conv2_w" lr_mult: 1 decay_mult: 1 } param { name: "conv2_b" lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 2 kernel_size: 5 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu2_p" type: "ReLU" bottom: "conv2_p" top: "conv2_p" } layer { name: "pool2_p" type: "Pooling" bottom: "conv2_p" top: "pool2_p" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "norm2_p" type: "LRN" bottom: "pool2_p" top: "norm2_p" lrn_param { local_size: 5 alpha: 0.0001 beta: 0.75 } } layer { name: "conv3_p" type: "Convolution" bottom: "norm2_p" top: "conv3_p" param { name: "conv3_w" lr_mult: 1 decay_mult: 1 } param { name: "conv3_b" lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "relu3_p" type: "ReLU" bottom: "conv3_p" top: "conv3_p" } layer { name: "conv4_p" type: "Convolution" bottom: "conv3_p" top: "conv4_p" param { name: "conv4_w" lr_mult: 1 decay_mult: 1 } param { name: "conv4_b" lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 384 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu4_p" type: "ReLU" bottom: "conv4_p" top: "conv4_p" } layer { name: "conv5_p" type: "Convolution" bottom: "conv4_p" top: "conv5_p" param { name: "conv5_w" lr_mult: 1 decay_mult: 1 } param { name: "conv5_b" lr_mult: 2 decay_mult: 0 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 group: 2 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu5_p" type: "ReLU" bottom: "conv5_p" top: "conv5_p" } layer { name: "pool5_p" type: "Pooling" bottom: "conv5_p" top: "pool5_p" pooling_param { pool: MAX kernel_size: 3 stride: 2 } } layer { name: "fc6_p" type: "InnerProduct" bottom: "pool5_p" top: "fc6_p" param { name: "fc6_w" lr_mult: 1 decay_mult: 1 } param { name: "fc6_b" lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu6_p" type: "ReLU" bottom: "fc6_p" top: "fc6_p" } layer { name: "drop6_p" type: "Dropout" bottom: "fc6_p" top: "fc6_p" dropout_param { dropout_ratio: 0.5 } } layer { name: "fc7_p" type: "InnerProduct" bottom: "fc6_p" top: "fc7_p" param { name: "fc7_w" lr_mult: 1 decay_mult: 1 } param { name: "fc7_b" lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 4096 weight_filler { type: "gaussian" std: 0.005 } bias_filler { type: "constant" value: 1 } } } layer { name: "relu7_p" type: "ReLU" bottom: "fc7_p" top: "fc7_p" } layer { name: "drop7_p" type: "Dropout" bottom: "fc7_p" top: "fc7_p" dropout_param { dropout_ratio: 0.5 } } layer { name: "fc8_p" type: "InnerProduct" bottom: "fc7_p" top: "fc8_p" param { name: "fc8_w" lr_mult: 1 decay_mult: 1 } param { name: "fc8_b" lr_mult: 2 decay_mult: 0 } inner_product_param { num_output: 1183 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0 } } } layer { name: "fc8_p_relu" type: "ReLU" bottom: "fc8_p" top: "fc8_p" } ############################################################ # MERGE + LAST IP LAYERS ############################################################ layer { name: "merge" type: "Concat" bottom: "fc8" bottom: "time" bottom: "histograms" top: "merge" } layer { name: "merge_p" type: "Concat" bottom: "fc8_p" bottom: "time_p" bottom: "histograms_p" top: "merge_p" } layer { name: "fc_final" type: "InnerProduct" bottom: "merge" top: "fc_final" param { name: "fc_final_w" lr_mult: 4 # Higher learning rate since this layer is starting from random decay_mult: 1 } param { name: "fc_final_b" lr_mult: 4 # Higher learning rate since this layer is starting from random decay_mult: 0 } inner_product_param { num_output: 200 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0.01 } } } layer { name: "fc_final_p" type: "InnerProduct" bottom: "merge_p" top: "fc_final_p" param { name: "fc_final_w" lr_mult: 4 # Higher learning rate since this layer is starting from random decay_mult: 1 } param { name: "fc_final_b" lr_mult: 4 # Higher learning rate since this layer is starting from random decay_mult: 0 } inner_product_param { num_output: 200 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" value: 0.01 } } } ############################################################ # LOSS/ACCURACY LAYERS ############################################################ layer { name: "loss" type: "ContrastiveLoss" bottom: "fc_final" bottom: "fc_final_p" bottom: "label" top: "loss" contrastive_loss_param { margin: 1.0 } } ############################################################ # END ############################################################ layer { name: "shot_id_silence" type: "Silence" bottom: "shot_id" } layer { name: "shot_id_silence_p" type: "Silence" bottom: "shot_id_p" } layer { name: "label_image_silence" type: "Silence" bottom: "label_image" } layer { name: "label_image_silence_p" type: "Silence" bottom: "label_image_p" }