name: "VGG_ILSVRC_16_layers" input: 'data' input_shape { dim: 1 dim: 3 dim: 600 dim: 900 } input: 'im_info' input_shape { dim: 1 dim: 3 } layer { name: "conv1_1" type: "Convolution" bottom: "data" top: "conv1_1" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 64 pad: 1 kernel_size: 3 } } layer { name: "relu1_1" type: "ReLU" bottom: "conv1_1" top: "conv1_1" } layer { name: "conv1_2" type: "Convolution" bottom: "conv1_1" top: "conv1_2" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 64 pad: 1 kernel_size: 3 } } layer { name: "relu1_2" type: "ReLU" bottom: "conv1_2" top: "conv1_2" } layer { name: "pool1" type: "Pooling" bottom: "conv1_2" top: "pool1" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv2_1" type: "Convolution" bottom: "pool1" top: "conv2_1" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 128 pad: 1 kernel_size: 3 } } layer { name: "relu2_1" type: "ReLU" bottom: "conv2_1" top: "conv2_1" } layer { name: "conv2_2" type: "Convolution" bottom: "conv2_1" top: "conv2_2" param { lr_mult: 0 decay_mult: 0 } param { lr_mult: 0 decay_mult: 0 } convolution_param { num_output: 128 pad: 1 kernel_size: 3 } } layer { name: "relu2_2" type: "ReLU" bottom: "conv2_2" top: "conv2_2" } layer { name: "pool2" type: "Pooling" bottom: "conv2_2" top: "pool2" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv3_1" type: "Convolution" bottom: "pool2" top: "conv3_1" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 } } layer { name: "relu3_1" type: "ReLU" bottom: "conv3_1" top: "conv3_1" } layer { name: "conv3_2" type: "Convolution" bottom: "conv3_1" top: "conv3_2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 } } layer { name: "relu3_2" type: "ReLU" bottom: "conv3_2" top: "conv3_2" } layer { name: "conv3_3" type: "Convolution" bottom: "conv3_2" top: "conv3_3" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 256 pad: 1 kernel_size: 3 } } layer { name: "relu3_3" type: "ReLU" bottom: "conv3_3" top: "conv3_3" } layer { name: "pool3" type: "Pooling" bottom: "conv3_3" top: "pool3" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv4_1" type: "Convolution" bottom: "pool3" top: "conv4_1" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 } } layer { name: "relu4_1" type: "ReLU" bottom: "conv4_1" top: "conv4_1" } layer { name: "conv4_2" type: "Convolution" bottom: "conv4_1" top: "conv4_2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 } } layer { name: "relu4_2" type: "ReLU" bottom: "conv4_2" top: "conv4_2" } layer { name: "conv4_3" type: "Convolution" bottom: "conv4_2" top: "conv4_3" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 } } layer { name: "relu4_3" type: "ReLU" bottom: "conv4_3" top: "conv4_3" } layer { name: "pool4" type: "Pooling" bottom: "conv4_3" top: "pool4" pooling_param { pool: MAX kernel_size: 2 stride: 2 } } layer { name: "conv5_1" type: "Convolution" bottom: "pool4" top: "conv5_1" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 } } layer { name: "relu5_1" type: "ReLU" bottom: "conv5_1" top: "conv5_1" } layer { name: "conv5_2" type: "Convolution" bottom: "conv5_1" top: "conv5_2" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 } } layer { name: "relu5_2" type: "ReLU" bottom: "conv5_2" top: "conv5_2" } layer { name: "conv5_3" type: "Convolution" bottom: "conv5_2" top: "conv5_3" param { lr_mult: 1 } param { lr_mult: 2 } convolution_param { num_output: 512 pad: 1 kernel_size: 3 } } layer { name: "relu5_3" type: "ReLU" bottom: "conv5_3" top: "conv5_3" } #========= RPN ============ # prepare lstm inputs layer { name: "im2col" bottom: "conv5_3" top: "im2col" type: "Im2col" convolution_param { pad: 1 kernel_size: 3 } } layer { name: "im2col_transpose" top: "im2col_transpose" bottom: "im2col" type: "Transpose" transpose_param { dim: 3 dim: 2 dim: 0 dim: 1 } } layer { name: "lstm_input" type: "Reshape" bottom: "im2col_transpose" top: "lstm_input" reshape_param { shape { dim: -1 } axis: 1 num_axes: 2 } } layer { name: "lstm" type: "Lstm" bottom: "lstm_input" top: "lstm" lstm_param { num_output: 128 weight_filler { type: "gaussian" std: 0.01 } bias_filler { type: "constant" } clipping_threshold: 1 } } # ===================== rlstm =================== layer { name: "lstm-reverse1" type: "Reverse" bottom: "lstm_input" top: "rlstm_input" reverse_param { axis: 0 } } layer { name: "rlstm" type: "Lstm" bottom: "rlstm_input" top: "rlstm-output" lstm_param { num_output: 128 } } layer { name: "lstm-reverse2" type: "Reverse" bottom: "rlstm-output" top: "rlstm" reverse_param { axis: 0 } } # merge lstm and rlstm layer { name: "merge_lstm_rlstm" type: "Concat" bottom: "lstm" bottom: "rlstm" top: "merge_lstm_rlstm" concat_param { axis: 2 } } layer { name: "lstm_output_reshape" type: "Reshape" bottom: "merge_lstm_rlstm" top: "lstm_output_reshape" reshape_param { shape { dim: -1 dim: 1 } axis: 1 num_axes: 1 } } # transpose size of output as (N, C, H, W) layer { name: "lstm_output" type: "Transpose" bottom: "lstm_output_reshape" top: "lstm_output" transpose_param { dim: 2 dim: 3 dim: 1 dim: 0 } } layer { name: "fc" bottom: "lstm_output" top: "fc" type: "Convolution" convolution_param { num_output: 512 kernel_size: 1 } } layer { name: "relu_fc" type: "ReLU" bottom: "fc" top: "fc" } layer { name: "rpn_cls_score" type: "Convolution" bottom: "fc" top: "rpn_cls_score" param { lr_mult: 1.0 } param { lr_mult: 2.0 } convolution_param { num_output: 20 kernel_size: 1 pad: 0 stride: 1 } } layer { bottom: "rpn_cls_score" top: "rpn_cls_score_reshape" name: "rpn_cls_score_reshape" type: "Reshape" reshape_param { shape { dim: 0 dim: 2 dim: -1 dim: 0 } } } layer { name: "rpn_bbox_pred" type: "Convolution" bottom: "fc" top: "rpn_bbox_pred" param { lr_mult: 1.0 } param { lr_mult: 2.0 } convolution_param { num_output: 20 kernel_size: 1 pad: 0 stride: 1 } } layer { name: "rpn_cls_prob" type: "Softmax" bottom: "rpn_cls_score_reshape" top: "rpn_cls_prob" } layer { name: 'rpn_cls_prob_reshape' type: 'Reshape' bottom: 'rpn_cls_prob' top: 'rpn_cls_prob_reshape' reshape_param { shape { dim: 0 dim: 20 dim: -1 dim: 0 } } } layer { name: 'proposal' type: 'Python' bottom: 'rpn_cls_prob_reshape' bottom: 'rpn_bbox_pred' bottom: 'im_info' top: 'rois' top: 'scores' python_param { module: 'layers.text_proposal_layer' layer: 'ProposalLayer' param_str: "'feat_stride': 16" } }