Created
June 12, 2017 08:57
-
-
Save Bartzi/d21426edfca1d2ce70d91fc918d41eea to your computer and use it in GitHub Desktop.
Bilinear Interpolation with Chainer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from chainer import Function | |
| from chainer import cuda | |
| from chainer.utils import type_check, force_array | |
| def cupy_place(arr, mask, val): | |
| _place_kernel = cuda.cupy.ElementwiseKernel( | |
| 'T a, S mask, T val', | |
| 'T out', | |
| 'out = mask ? val : a', | |
| 'cupy_place') | |
| val = cuda.cupy.array(val, dtype=arr.dtype) | |
| out = cuda.cupy.empty_like(arr) | |
| return _place_kernel(arr, mask, val, out) | |
| def apply_with_mask(arr, mask, elementwise_func, *args): | |
| value = elementwise_func(arr, *args) | |
| return cupy_place(arr, mask, value) | |
| class BilinearInterpolation(Function): | |
| def __init__(self, out_height, out_width): | |
| self.out_height = out_height | |
| self.out_width = out_width | |
| self.x_max = None | |
| self.y_max = None | |
| self.x_min = None | |
| self.y_min = None | |
| def check_type_forward(self, in_types): | |
| type_check.expect(in_types.size() == 2) | |
| x_type, points_type = in_types | |
| type_check.expect( | |
| x_type.ndim == 3, | |
| x_type.dtype.kind == 'f', | |
| points_type.ndim == 3, | |
| points_type.dtype.kind == 'f', | |
| points_type.shape[-1] == self.out_width * self.out_height, | |
| ) | |
| def forward_cpu(self, inputs): | |
| xp = cuda.get_array_module(*inputs) | |
| U, points = inputs | |
| batch_size, height, width = U.shape | |
| # resize grid to be in image dimensions | |
| points = (points + 1) / 2 | |
| points[:, 0] *= width | |
| points[:, 1] *= height | |
| x_s = points[:, 0] | |
| y_s = points[:, 1] | |
| # determine min indices | |
| self.x_min = xp.clip(xp.floor(x_s * 0.999999), 0, width - 1).astype(xp.int32) | |
| self.y_min = xp.clip(xp.floor(y_s * 0.999999), 0, width - 1).astype(xp.int32) | |
| # determine max indices | |
| self.x_max = xp.clip((self.x_min + 1), 0, width - 1).astype(xp.int32) | |
| self.y_max = xp.clip((self.y_min + 1), 0, height - 1).astype(xp.int32) | |
| # remove points outside of image | |
| x_invalid = xp.logical_or(x_s < 0, (width - 1) < x_s) | |
| y_invalid = xp.logical_or(y_s < 0, (height - 1) < y_s) | |
| self.invalid = xp.logical_or(x_invalid, y_invalid) | |
| self.left_weight = 1.0 - (x_s - self.x_min).astype(U.dtype) | |
| self.top_weight = 1.0 - (y_s - self.y_min).astype(U.dtype) | |
| self.right_weight = 1.0 - self.left_weight.astype(U.dtype) | |
| self.bottom_weight = 1.0 - self.top_weight.astype(U.dtype) | |
| self.left_weight[self.invalid] = 0 | |
| self.top_weight[self.invalid] = 0 | |
| self.right_weight[self.invalid] = 0 | |
| self.bottom_weight[self.invalid] = 0 | |
| batch_axis = xp.expand_dims(xp.arange(batch_size), 1) | |
| top_interpolated = self.left_weight * U[batch_axis, self.y_min, self.x_min] + self.right_weight * U[batch_axis, self.y_min, self.x_max] | |
| bottom_interpolated = self.left_weight * U[batch_axis, self.y_max, self.x_min] + self.right_weight * U[batch_axis, self.y_max, self.x_max] | |
| V = self.top_weight * top_interpolated + self.bottom_weight * bottom_interpolated | |
| V = V.reshape((batch_size, self.out_height, self.out_width)) | |
| return force_array(V, dtype=U.dtype), | |
| def forward_gpu(self, inputs): | |
| xp = cuda.get_array_module(*inputs) | |
| U, points = inputs | |
| batch_size, height, width = U.shape | |
| # resize grid to be in image dimensions | |
| points = (points + 1) / 2 | |
| points[:, 0] *= width | |
| points[:, 1] *= height | |
| x_s = points[:, 0] | |
| y_s = points[:, 1] | |
| # move points on boundary a little bit inward to ensure correct computation | |
| # apply masking twice in order to avoid race conditions | |
| x_s_copy = x_s.copy() | |
| y_s_copy = y_s.copy() | |
| on_boundary = (x_s == 0) | |
| replacement = apply_with_mask(x_s_copy, on_boundary, xp.nextafter, xp.array(1.0).astype(x_s.dtype)) | |
| x_s = cupy_place(x_s_copy, on_boundary, replacement) | |
| on_boundary = (x_s == width - 1) | |
| replacement = apply_with_mask(x_s_copy, on_boundary, xp.nextafter, xp.array(0.0).astype(x_s.dtype)) | |
| x_s = cupy_place(x_s_copy, on_boundary, replacement) | |
| on_boundary = (y_s == 0) | |
| replacement = apply_with_mask(y_s_copy, on_boundary, xp.nextafter, xp.array(1.0).astype(y_s.dtype)) | |
| y_s = cupy_place(y_s_copy, on_boundary, replacement) | |
| on_boundary = (y_s == width - 1) | |
| replacement = apply_with_mask(y_s_copy, on_boundary, xp.nextafter, xp.array(0.0).astype(y_s.dtype)) | |
| y_s = cupy_place(y_s_copy, on_boundary, replacement) | |
| # determine min indices | |
| self.x_min = xp.clip(xp.floor(x_s), 0, width - 1).astype(xp.int32) | |
| self.y_min = xp.clip(xp.floor(y_s), 0, width - 1).astype(xp.int32) | |
| # determine max indices | |
| self.x_max = xp.clip((self.x_min + 1), 0, width - 1).astype(xp.int32) | |
| self.y_max = xp.clip((self.y_min + 1), 0, height - 1).astype(xp.int32) | |
| # remove points outside of image | |
| x_invalid = xp.logical_or(x_s < 0, (width - 1) < x_s) | |
| y_invalid = xp.logical_or(y_s < 0, (height - 1) < y_s) | |
| self.invalid = xp.logical_or(x_invalid, y_invalid) | |
| self.left_weight = 1.0 - (x_s - self.x_min).astype(U.dtype) | |
| self.top_weight = 1.0 - (y_s - self.y_min).astype(U.dtype) | |
| self.right_weight = 1.0 - self.left_weight.astype(U.dtype) | |
| self.bottom_weight = 1.0 - self.top_weight.astype(U.dtype) | |
| replacement = xp.zeros_like(self.invalid, dtype=U.dtype) | |
| self.left_weight = cupy_place(self.left_weight.copy(), self.invalid, replacement.copy()) | |
| self.top_weight = cupy_place(self.top_weight.copy(), self.invalid, replacement.copy()) | |
| self.right_weight = cupy_place(self.right_weight.copy(), self.invalid, replacement.copy()) | |
| self.bottom_weight = cupy_place(self.bottom_weight.copy(), self.invalid, replacement.copy()) | |
| interpolate_kernel = cuda.cupy.ElementwiseKernel( | |
| 'raw T U, T left, T right, T top, T bottom, int32 x_min, int32 y_min, int32 x_max, int32 y_max, int32 pixels_per_batch, int32 width, int32 height', | |
| 'T V', | |
| ''' | |
| int batchNum = i / pixels_per_batch; | |
| int batchOffset = batchNum * width * height; | |
| T top_left = left * U[batchOffset + y_min * width + x_min]; | |
| T top_right = right * U[batchOffset + y_min * width + x_max]; | |
| T bottom_left = left * U[batchOffset + y_max * width + x_min]; | |
| T bottom_right = right * U[batchOffset + y_max * width + x_max]; | |
| V = top * (top_left + top_right) + bottom * (bottom_left + bottom_right); | |
| ''', | |
| 'interpolation_kernel' | |
| ) | |
| V = interpolate_kernel( | |
| xp.ravel(U), | |
| self.left_weight, | |
| self.right_weight, | |
| self.top_weight, | |
| self.bottom_weight, | |
| self.x_min, | |
| self.y_min, | |
| self.x_max, | |
| self.y_max, | |
| self.out_height * self.out_width, | |
| width, | |
| height, | |
| ) | |
| V = V.reshape((batch_size, self.out_height, self.out_width)) | |
| return force_array(V, dtype=U.dtype), | |
| def backward_cpu(self, inputs, gradients): | |
| xp = cuda.get_array_module(*inputs) | |
| U, points = inputs | |
| gV = gradients[0] | |
| batch_size, height, width = U.shape | |
| flattened_gV = xp.reshape(xp.ravel(gV), (batch_size, -1)) | |
| gU = xp.zeros((batch_size, height * width), dtype=U.dtype) | |
| batch_axis = xp.expand_dims(xp.arange(batch_size), 1) | |
| U_tl = U[batch_axis, self.y_min, self.x_min] | |
| U_tr = U[batch_axis, self.y_min, self.x_max] | |
| U_bl = U[batch_axis, self.y_max, self.x_min] | |
| U_br = U[batch_axis, self.y_max, self.x_max] | |
| gx = self.top_weight * (U_tr - U_tl) + self.bottom_weight * (U_br - U_bl) | |
| gy = self.left_weight * (U_bl - U_tl) + self.right_weight * (U_br - U_tr) | |
| gx = gx * flattened_gV | |
| gy = gy * flattened_gV | |
| gx = xp.expand_dims(gx, 1) | |
| gy = xp.expand_dims(gy, 1) | |
| gpoints = xp.hstack((gx, gy)) | |
| top_left_indices = width * self.y_min + self.x_min | |
| top_right_indices = width * self.y_min + self.x_max | |
| bottom_left_indices = width * self.y_max + self.x_min | |
| bottom_right_indices = width * self.y_max + self.x_max | |
| indices = xp.hstack((top_left_indices, top_right_indices, bottom_left_indices, bottom_right_indices)) | |
| top_left_weight = self.top_weight * self.left_weight * flattened_gV | |
| top_right_weight = self.top_weight * self.right_weight * flattened_gV | |
| bottom_left_weight = self.bottom_weight * self.left_weight * flattened_gV | |
| bottom_right_weight = self.bottom_weight * self.right_weight * flattened_gV | |
| weights = xp.hstack((top_left_weight, top_right_weight, bottom_left_weight, bottom_right_weight)) | |
| for b in range(batch_size): | |
| gU[b] = xp.bincount(indices[b], weights=weights[b], minlength=height * width) | |
| return force_array(gU.reshape(*U.shape), dtype=gV.dtype), force_array(gpoints.reshape(*points.shape), dtype=points.dtype), | |
| def backward_gpu(self, inputs, gradients): | |
| xp = cuda.get_array_module(*inputs) | |
| U, points = inputs | |
| batch_size, height, width = U.shape | |
| gV, = gradients | |
| flattened_gV = xp.reshape(xp.ravel(gV), (batch_size, -1)) | |
| batch_indices = xp.asarray(xp.repeat(xp.arange(batch_size, dtype=xp.int32), points.shape[-1])) | |
| top_left_indices = batch_indices * (width * height) + self.y_min.ravel() * width + self.x_min.ravel() | |
| top_right_indices = batch_indices * (width * height) + self.y_min.ravel() * width + self.x_max.ravel() | |
| bottom_left_indices = batch_indices * (width * height) + self.y_max.ravel() * width + self.x_min.ravel() | |
| bottom_right_indices = batch_indices * (width * height) + self.y_max.ravel() * width + self.x_max.ravel() | |
| U_tl = xp.take(U, top_left_indices).reshape((batch_size, -1)) | |
| U_tr = xp.take(U, top_right_indices).reshape((batch_size, -1)) | |
| U_bl = xp.take(U, bottom_left_indices).reshape((batch_size, -1)) | |
| U_br = xp.take(U, bottom_right_indices).reshape((batch_size, -1)) | |
| gx = self.top_weight * (U_tr - U_tl) + self.bottom_weight * (U_br - U_bl) | |
| gy = self.left_weight * (U_bl - U_tl) + self.right_weight * (U_br - U_tr) | |
| gx = xp.expand_dims(gx * flattened_gV, 1) | |
| gy = xp.expand_dims(gy * flattened_gV, 1) | |
| gpoints = xp.hstack((gx, gy)) | |
| top_left_weight = self.top_weight * self.left_weight * flattened_gV | |
| top_right_weight = self.top_weight * self.right_weight * flattened_gV | |
| bottom_left_weight = self.bottom_weight * self.left_weight * flattened_gV | |
| bottom_right_weight = self.bottom_weight * self.right_weight * flattened_gV | |
| gU_kernel = cuda.cupy.ElementwiseKernel( | |
| 'int32 elements_per_batch, int32 width, int32 height, T top_left_weight, T top_right_weight, T bottom_left_weight, T bottom_right_weight, int32 x_min, int32 x_max, int32 y_min, int32 y_max', | |
| 'raw T gU', | |
| ''' | |
| int batchNum = i / elements_per_batch; | |
| int batchOffset = batchNum * width * height; | |
| int index = (batchOffset + y_min * width + x_min); | |
| atomicAdd(&gU[index], top_left_weight); | |
| index = (batchOffset + y_min * width + x_max); | |
| atomicAdd(&gU[index], top_right_weight); | |
| index = (batchOffset + y_max * width + x_min); | |
| atomicAdd(&gU[index], bottom_left_weight); | |
| index = (batchOffset + y_max * width + x_max); | |
| atomicAdd(&gU[index], bottom_right_weight); | |
| ''', | |
| 'compute_gU_bilinear_interpolation', | |
| ) | |
| gU = xp.zeros_like(U, dtype=U.dtype) | |
| gU = gU_kernel(self.out_width * self.out_height, width, height, top_left_weight, top_right_weight, bottom_left_weight, bottom_right_weight, self.x_min, self.x_max, self.y_min, self.y_max, gU) | |
| return force_array(gU.reshape(*U.shape), dtype=U.dtype), force_array(gpoints, dtype=points.dtype) | |
| def bilinear_interpolation(batch, points_s, out_height, out_width): | |
| return BilinearInterpolation(out_height, out_width)(batch, points_s) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment