Bartzi · June 12, 2017 08:57 · Jun 12, 2017
diff --git a/bilinear_interpolation.py b/bilinear_interpolation.py
@@ -0,0 +1,285 @@
+from chainer import Function
+from chainer import cuda
+from chainer.utils import type_check, force_array
+
+
+def cupy_place(arr, mask, val):
+    _place_kernel = cuda.cupy.ElementwiseKernel(
+        'T a, S mask, T val',
+        'T out',
+        'out = mask ? val : a',
+        'cupy_place')
+
+    val = cuda.cupy.array(val, dtype=arr.dtype)
+    out = cuda.cupy.empty_like(arr)
+    return _place_kernel(arr, mask, val, out)
+
+
+def apply_with_mask(arr, mask, elementwise_func, *args):
+    value = elementwise_func(arr, *args)
+    return cupy_place(arr, mask, value)
+
+
+class BilinearInterpolation(Function):
+
+    def __init__(self, out_height, out_width):
+        self.out_height = out_height
+        self.out_width = out_width
+        self.x_max = None
+        self.y_max = None
+        self.x_min = None
+        self.y_min = None
+
+    def check_type_forward(self, in_types):
+        type_check.expect(in_types.size() == 2)
+        x_type, points_type = in_types
+        type_check.expect(
+            x_type.ndim == 3,
+            x_type.dtype.kind == 'f',
+            points_type.ndim == 3,
+            points_type.dtype.kind == 'f',
+            points_type.shape[-1] == self.out_width * self.out_height,
+        )
+
+    def forward_cpu(self, inputs):
+        xp = cuda.get_array_module(*inputs)
+        U, points = inputs
+        batch_size, height, width = U.shape
+
+        # resize grid to be in image dimensions
+        points = (points + 1) / 2
+        points[:, 0] *= width
+        points[:, 1] *= height
+
+        x_s = points[:, 0]
+        y_s = points[:, 1]
+
+        # determine min indices
+        self.x_min = xp.clip(xp.floor(x_s * 0.999999), 0, width - 1).astype(xp.int32)
+        self.y_min = xp.clip(xp.floor(y_s * 0.999999), 0, width - 1).astype(xp.int32)
+
+        # determine max indices
+        self.x_max = xp.clip((self.x_min + 1), 0, width - 1).astype(xp.int32)
+        self.y_max = xp.clip((self.y_min + 1), 0, height - 1).astype(xp.int32)
+
+        # remove points outside of image
+        x_invalid = xp.logical_or(x_s < 0, (width - 1) < x_s)
+        y_invalid = xp.logical_or(y_s < 0, (height - 1) < y_s)
+        self.invalid = xp.logical_or(x_invalid, y_invalid)
+
+        self.left_weight = 1.0 - (x_s - self.x_min).astype(U.dtype)
+        self.top_weight = 1.0 - (y_s - self.y_min).astype(U.dtype)
+        self.right_weight = 1.0 - self.left_weight.astype(U.dtype)
+        self.bottom_weight = 1.0 - self.top_weight.astype(U.dtype)
+
+        self.left_weight[self.invalid] = 0
+        self.top_weight[self.invalid] = 0
+        self.right_weight[self.invalid] = 0
+        self.bottom_weight[self.invalid] = 0
+
+        batch_axis = xp.expand_dims(xp.arange(batch_size), 1)
+        top_interpolated = self.left_weight * U[batch_axis, self.y_min, self.x_min] + self.right_weight * U[batch_axis, self.y_min, self.x_max]
+        bottom_interpolated = self.left_weight * U[batch_axis, self.y_max, self.x_min] + self.right_weight * U[batch_axis, self.y_max, self.x_max]
+
+        V = self.top_weight * top_interpolated + self.bottom_weight * bottom_interpolated
+        V = V.reshape((batch_size, self.out_height, self.out_width))
+
+        return force_array(V, dtype=U.dtype),
+
+    def forward_gpu(self, inputs):
+        xp = cuda.get_array_module(*inputs)
+        U, points = inputs
+        batch_size, height, width = U.shape
+
+        # resize grid to be in image dimensions
+        points = (points + 1) / 2
+        points[:, 0] *= width
+        points[:, 1] *= height
+
+        x_s = points[:, 0]
+        y_s = points[:, 1]
+
+        # move points on boundary a little bit inward to ensure correct computation
+        # apply masking twice in order to avoid race conditions
+        x_s_copy = x_s.copy()
+        y_s_copy = y_s.copy()
+
+        on_boundary = (x_s == 0)
+        replacement = apply_with_mask(x_s_copy, on_boundary, xp.nextafter, xp.array(1.0).astype(x_s.dtype))
+        x_s = cupy_place(x_s_copy, on_boundary, replacement)
+
+        on_boundary = (x_s == width - 1)
+        replacement = apply_with_mask(x_s_copy, on_boundary, xp.nextafter, xp.array(0.0).astype(x_s.dtype))
+        x_s = cupy_place(x_s_copy, on_boundary, replacement)
+
+        on_boundary = (y_s == 0)
+        replacement = apply_with_mask(y_s_copy, on_boundary, xp.nextafter, xp.array(1.0).astype(y_s.dtype))
+        y_s = cupy_place(y_s_copy, on_boundary, replacement)
+
+        on_boundary = (y_s == width - 1)
+        replacement = apply_with_mask(y_s_copy, on_boundary, xp.nextafter, xp.array(0.0).astype(y_s.dtype))
+        y_s = cupy_place(y_s_copy, on_boundary, replacement)
+
+        # determine min indices
+        self.x_min = xp.clip(xp.floor(x_s), 0, width - 1).astype(xp.int32)
+        self.y_min = xp.clip(xp.floor(y_s), 0, width - 1).astype(xp.int32)
+
+        # determine max indices
+        self.x_max = xp.clip((self.x_min + 1), 0, width - 1).astype(xp.int32)
+        self.y_max = xp.clip((self.y_min + 1), 0, height - 1).astype(xp.int32)
+
+        # remove points outside of image
+        x_invalid = xp.logical_or(x_s < 0, (width - 1) < x_s)
+        y_invalid = xp.logical_or(y_s < 0, (height - 1) < y_s)
+        self.invalid = xp.logical_or(x_invalid, y_invalid)
+
+        self.left_weight = 1.0 - (x_s - self.x_min).astype(U.dtype)
+        self.top_weight = 1.0 - (y_s - self.y_min).astype(U.dtype)
+        self.right_weight = 1.0 - self.left_weight.astype(U.dtype)
+        self.bottom_weight = 1.0 - self.top_weight.astype(U.dtype)
+
+        replacement = xp.zeros_like(self.invalid, dtype=U.dtype)
+        self.left_weight = cupy_place(self.left_weight.copy(), self.invalid, replacement.copy())
+        self.top_weight = cupy_place(self.top_weight.copy(), self.invalid, replacement.copy())
+        self.right_weight = cupy_place(self.right_weight.copy(), self.invalid, replacement.copy())
+        self.bottom_weight = cupy_place(self.bottom_weight.copy(), self.invalid, replacement.copy())
+
+        interpolate_kernel = cuda.cupy.ElementwiseKernel(
+            'raw T U, T left, T right, T top, T bottom, int32 x_min, int32 y_min, int32 x_max, int32 y_max, int32 pixels_per_batch, int32 width, int32 height',
+            'T V',
+            '''
+                int batchNum = i / pixels_per_batch;
+                int batchOffset = batchNum * width * height;
+
+                T top_left = left * U[batchOffset + y_min * width + x_min];
+                T top_right = right * U[batchOffset + y_min * width + x_max];
+                T bottom_left = left * U[batchOffset + y_max * width + x_min];
+                T bottom_right = right * U[batchOffset + y_max * width + x_max];
+
+                V = top * (top_left + top_right) + bottom * (bottom_left + bottom_right);
+            ''',
+            'interpolation_kernel'
+        )
+
+        V = interpolate_kernel(
+            xp.ravel(U),
+            self.left_weight,
+            self.right_weight,
+            self.top_weight,
+            self.bottom_weight,
+            self.x_min,
+            self.y_min,
+            self.x_max,
+            self.y_max,
+            self.out_height * self.out_width,
+            width,
+            height,
+        )
+
+        V = V.reshape((batch_size, self.out_height, self.out_width))
+        return force_array(V, dtype=U.dtype),
+
+    def backward_cpu(self, inputs, gradients):
+        xp = cuda.get_array_module(*inputs)
+        U, points = inputs
+        gV = gradients[0]
+
+        batch_size, height, width = U.shape
+        flattened_gV = xp.reshape(xp.ravel(gV), (batch_size, -1))
+
+        gU = xp.zeros((batch_size, height * width), dtype=U.dtype)
+
+        batch_axis = xp.expand_dims(xp.arange(batch_size), 1)
+
+        U_tl = U[batch_axis, self.y_min, self.x_min]
+        U_tr = U[batch_axis, self.y_min, self.x_max]
+        U_bl = U[batch_axis, self.y_max, self.x_min]
+        U_br = U[batch_axis, self.y_max, self.x_max]
+
+        gx = self.top_weight * (U_tr - U_tl) + self.bottom_weight * (U_br - U_bl)
+        gy = self.left_weight * (U_bl - U_tl) + self.right_weight * (U_br - U_tr)
+        gx = gx * flattened_gV
+        gy = gy * flattened_gV
+
+        gx = xp.expand_dims(gx, 1)
+        gy = xp.expand_dims(gy, 1)
+        gpoints = xp.hstack((gx, gy))
+
+        top_left_indices = width * self.y_min + self.x_min
+        top_right_indices = width * self.y_min + self.x_max
+        bottom_left_indices = width * self.y_max + self.x_min
+        bottom_right_indices = width * self.y_max + self.x_max
+        indices = xp.hstack((top_left_indices, top_right_indices, bottom_left_indices, bottom_right_indices))
+
+        top_left_weight = self.top_weight * self.left_weight * flattened_gV
+        top_right_weight = self.top_weight * self.right_weight * flattened_gV
+        bottom_left_weight = self.bottom_weight * self.left_weight * flattened_gV
+        bottom_right_weight = self.bottom_weight * self.right_weight * flattened_gV
+        weights = xp.hstack((top_left_weight, top_right_weight, bottom_left_weight, bottom_right_weight))
+
+        for b in range(batch_size):
+            gU[b] = xp.bincount(indices[b], weights=weights[b], minlength=height * width)
+
+        return force_array(gU.reshape(*U.shape), dtype=gV.dtype), force_array(gpoints.reshape(*points.shape), dtype=points.dtype),
+
+    def backward_gpu(self, inputs, gradients):
+        xp = cuda.get_array_module(*inputs)
+        U, points = inputs
+        batch_size, height, width = U.shape
+
+        gV, = gradients
+        flattened_gV = xp.reshape(xp.ravel(gV), (batch_size, -1))
+
+        batch_indices = xp.asarray(xp.repeat(xp.arange(batch_size, dtype=xp.int32), points.shape[-1]))
+        top_left_indices = batch_indices * (width * height) + self.y_min.ravel() * width + self.x_min.ravel()
+        top_right_indices = batch_indices * (width * height) + self.y_min.ravel() * width + self.x_max.ravel()
+        bottom_left_indices = batch_indices * (width * height) + self.y_max.ravel() * width + self.x_min.ravel()
+        bottom_right_indices = batch_indices * (width * height) + self.y_max.ravel() * width + self.x_max.ravel()
+
+        U_tl = xp.take(U, top_left_indices).reshape((batch_size, -1))
+        U_tr = xp.take(U, top_right_indices).reshape((batch_size, -1))
+        U_bl = xp.take(U, bottom_left_indices).reshape((batch_size, -1))
+        U_br = xp.take(U, bottom_right_indices).reshape((batch_size, -1))
+
+        gx = self.top_weight * (U_tr - U_tl) + self.bottom_weight * (U_br - U_bl)
+        gy = self.left_weight * (U_bl - U_tl) + self.right_weight * (U_br - U_tr)
+
+        gx = xp.expand_dims(gx * flattened_gV, 1)
+        gy = xp.expand_dims(gy * flattened_gV, 1)
+        gpoints = xp.hstack((gx, gy))
+
+        top_left_weight = self.top_weight * self.left_weight * flattened_gV
+        top_right_weight = self.top_weight * self.right_weight * flattened_gV
+        bottom_left_weight = self.bottom_weight * self.left_weight * flattened_gV
+        bottom_right_weight = self.bottom_weight * self.right_weight * flattened_gV
+
+        gU_kernel = cuda.cupy.ElementwiseKernel(
+            'int32 elements_per_batch, int32 width, int32 height, T top_left_weight, T top_right_weight, T bottom_left_weight, T bottom_right_weight, int32 x_min, int32 x_max, int32 y_min, int32 y_max',
+            'raw T gU',
+            '''
+                int batchNum = i / elements_per_batch;
+                int batchOffset = batchNum * width * height;
+
+                int index = (batchOffset + y_min * width + x_min);
+                atomicAdd(&gU[index], top_left_weight);
+
+                index = (batchOffset + y_min * width + x_max);
+                atomicAdd(&gU[index], top_right_weight);
+                
+                index = (batchOffset + y_max * width + x_min);
+                atomicAdd(&gU[index], bottom_left_weight);
+
+                index = (batchOffset + y_max * width + x_max);
+                atomicAdd(&gU[index], bottom_right_weight);
+            ''',
+            'compute_gU_bilinear_interpolation',
+        )
+
+        gU = xp.zeros_like(U, dtype=U.dtype)
+        gU = gU_kernel(self.out_width * self.out_height, width, height, top_left_weight, top_right_weight, bottom_left_weight, bottom_right_weight, self.x_min, self.x_max, self.y_min, self.y_max, gU)
+
+        return force_array(gU.reshape(*U.shape), dtype=U.dtype), force_array(gpoints, dtype=points.dtype)
+
+
+def bilinear_interpolation(batch, points_s, out_height, out_width):
+    return BilinearInterpolation(out_height, out_width)(batch, points_s)
No results found