yardstick17 · November 25, 2023 23:46 · Sep 29, 2017 · Sep 27, 2017 · Sep 27, 2017
diff --git a/spp_deep_network.py b/spp_deep_network.py
@@ -1,4 +1,120 @@
 CUSTOM_OUTPUT_CATEGORIES = 2
+import keras.backend as K
+from keras.engine.topology import Layer
+
+
+class SpatialPyramidPooling(Layer):
+    '''Spatial pyramid pooling layer for 2D inputs.
+    See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
+    K. He, X. Zhang, S. Ren, J. Sun
+    # Arguments
+        pool_list: list of int
+            List of pooling regions to use. The length of the list is the number of pooling regions,
+            each int in the list is the number of regions in that pool. For example [1,2,4] would be 3
+            regions with 1, 2x2 and 4x4 max pools, so 21 outputs per feature map
+    # Input shape
+        4D tensor with shape:
+        `(samples, channels, rows, cols)` if dim_ordering='th'
+        or 4D tensor with shape:
+        `(samples, rows, cols, channels)` if dim_ordering='tf'.
+    # Output shape
+        2D tensor with shape:
+        `(samples, channels * sum([i * i for i in pool_list])`
+    '''
+
+    def __init__(self, pool_list, **kwargs):
+
+        self.dim_ordering = K.image_dim_ordering()
+        assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
+
+        self.pool_list = pool_list
+
+        self.num_outputs_per_channel = sum([i * i for i in pool_list])
+
+        super(SpatialPyramidPooling, self).__init__(**kwargs)
+
+    def build(self, input_shape):
+        if self.dim_ordering == 'th':
+            self.nb_channels = input_shape[1]
+        elif self.dim_ordering == 'tf':
+            self.nb_channels = input_shape[3]
+
+    def get_output_shape_for(self, input_shape):
+        return (input_shape[0], self.nb_channels * self.num_outputs_per_channel)
+
+    def get_config(self):
+        config = {'pool_list': self.pool_list}
+        base_config = super(SpatialPyramidPooling, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    def call(self, x, mask=None):
+
+        input_shape = K.shape(x)
+
+        if self.dim_ordering == 'th':
+            num_rows = input_shape[2]
+            num_cols = input_shape[3]
+        elif self.dim_ordering == 'tf':
+            num_rows = input_shape[1]
+            num_cols = input_shape[2]
+
+        row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list]
+        col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list]
+
+        outputs = []
+
+        if self.dim_ordering == 'th':
+            for pool_num, num_pool_regions in enumerate(self.pool_list):
+                for jy in range(num_pool_regions):
+                    for ix in range(num_pool_regions):
+                        x1 = ix * col_length[pool_num]
+                        x2 = ix * col_length[pool_num] + col_length[pool_num]
+                        y1 = jy * row_length[pool_num]
+                        y2 = jy * row_length[pool_num] + row_length[pool_num]
+
+                        x1 = K.cast(K.round(x1), 'int32')
+                        x2 = K.cast(K.round(x2), 'int32')
+                        y1 = K.cast(K.round(y1), 'int32')
+                        y2 = K.cast(K.round(y2), 'int32')
+                        new_shape = [input_shape[0], input_shape[1],
+                                     y2 - y1, x2 - x1]
+                        x_crop = x[:, :, y1:y2, x1:x2]
+                        xm = K.reshape(x_crop, new_shape)
+                        pooled_val = K.max(xm, axis=(2, 3))
+                        outputs.append(pooled_val)
+
+        elif self.dim_ordering == 'tf':
+            for pool_num, num_pool_regions in enumerate(self.pool_list):
+                for jy in range(num_pool_regions):
+                    for ix in range(num_pool_regions):
+                        x1 = ix * col_length[pool_num]
+                        x2 = ix * col_length[pool_num] + col_length[pool_num]
+                        y1 = jy * row_length[pool_num]
+                        y2 = jy * row_length[pool_num] + row_length[pool_num]
+
+                        x1 = K.cast(K.round(x1), 'int32')
+                        x2 = K.cast(K.round(x2), 'int32')
+                        y1 = K.cast(K.round(y1), 'int32')
+                        y2 = K.cast(K.round(y2), 'int32')
+
+                        new_shape = [input_shape[0], y2 - y1,
+                                     x2 - x1, input_shape[3]]
+
+                        x_crop = x[:, y1:y2, x1:x2, :]
+                        xm = K.reshape(x_crop, new_shape)
+                        pooled_val = K.max(xm, axis=(1, 2))
+                        outputs.append(pooled_val)
+
+        if self.dim_ordering == 'th':
+            outputs = K.concatenate(outputs)
+        elif self.dim_ordering == 'tf':
+            # outputs = K.concatenate(outputs,axis = 1)
+            outputs = K.concatenate(outputs)
+            # outputs = K.reshape(outputs,(len(self.pool_list),self.num_outputs_per_channel,input_shape[0],input_shape[1]))
+            # outputs = K.permute_dimensions(outputs,(3,1,0,2))
+            # outputs = K.reshape(outputs,(input_shape[0], self.num_outputs_per_channel * self.nb_channels))
+
+        return outputs
 
 def Spp():
 

diff --git a/spp_deep_network.py b/spp_deep_network.py
@@ -0,0 +1,28 @@
+CUSTOM_OUTPUT_CATEGORIES = 2
+
+def Spp():
+
+    # uses theano ordering. Note that we leave the image size as None to allow multiple image sizes
+    model = Sequential()
+
+    model.add(Convolution2D(96, 11, 11, border_mode='same', input_shape=(3, None, None), activation='relu'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+
+    model.add(Convolution2D(32, 3, 3, activation='relu'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+
+    model.add(Convolution2D(64, 3, 3, border_mode='same', activation='relu'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+
+    model.add(Convolution2D(64, 3, 3, activation='relu'))
+    model.add(MaxPooling2D(pool_size=(2, 2)))
+
+    model.add(SpatialPyramidPooling([1, 2, 4]))
+
+    model.add(Dense(4096, activation='relu', name='dense_1'))
+    model.add(Dropout(0.5))
+    model.add(Dense(4096, activation='relu', name='dense_2'))
+    model.add(Dropout(0.5))
+    model.add(Dense(CUSTOM_OUTPUT_CATEGORIES, name='dense_3'))
+    model.add(Activation('softmax'))
+    return model