Skip to content

Instantly share code, notes, and snippets.

@Bartzi
Created June 12, 2017 08:57
Show Gist options
  • Select an option

  • Save Bartzi/d21426edfca1d2ce70d91fc918d41eea to your computer and use it in GitHub Desktop.

Select an option

Save Bartzi/d21426edfca1d2ce70d91fc918d41eea to your computer and use it in GitHub Desktop.
Bilinear Interpolation with Chainer
from chainer import Function
from chainer import cuda
from chainer.utils import type_check, force_array
def cupy_place(arr, mask, val):
_place_kernel = cuda.cupy.ElementwiseKernel(
'T a, S mask, T val',
'T out',
'out = mask ? val : a',
'cupy_place')
val = cuda.cupy.array(val, dtype=arr.dtype)
out = cuda.cupy.empty_like(arr)
return _place_kernel(arr, mask, val, out)
def apply_with_mask(arr, mask, elementwise_func, *args):
value = elementwise_func(arr, *args)
return cupy_place(arr, mask, value)
class BilinearInterpolation(Function):
def __init__(self, out_height, out_width):
self.out_height = out_height
self.out_width = out_width
self.x_max = None
self.y_max = None
self.x_min = None
self.y_min = None
def check_type_forward(self, in_types):
type_check.expect(in_types.size() == 2)
x_type, points_type = in_types
type_check.expect(
x_type.ndim == 3,
x_type.dtype.kind == 'f',
points_type.ndim == 3,
points_type.dtype.kind == 'f',
points_type.shape[-1] == self.out_width * self.out_height,
)
def forward_cpu(self, inputs):
xp = cuda.get_array_module(*inputs)
U, points = inputs
batch_size, height, width = U.shape
# resize grid to be in image dimensions
points = (points + 1) / 2
points[:, 0] *= width
points[:, 1] *= height
x_s = points[:, 0]
y_s = points[:, 1]
# determine min indices
self.x_min = xp.clip(xp.floor(x_s * 0.999999), 0, width - 1).astype(xp.int32)
self.y_min = xp.clip(xp.floor(y_s * 0.999999), 0, width - 1).astype(xp.int32)
# determine max indices
self.x_max = xp.clip((self.x_min + 1), 0, width - 1).astype(xp.int32)
self.y_max = xp.clip((self.y_min + 1), 0, height - 1).astype(xp.int32)
# remove points outside of image
x_invalid = xp.logical_or(x_s < 0, (width - 1) < x_s)
y_invalid = xp.logical_or(y_s < 0, (height - 1) < y_s)
self.invalid = xp.logical_or(x_invalid, y_invalid)
self.left_weight = 1.0 - (x_s - self.x_min).astype(U.dtype)
self.top_weight = 1.0 - (y_s - self.y_min).astype(U.dtype)
self.right_weight = 1.0 - self.left_weight.astype(U.dtype)
self.bottom_weight = 1.0 - self.top_weight.astype(U.dtype)
self.left_weight[self.invalid] = 0
self.top_weight[self.invalid] = 0
self.right_weight[self.invalid] = 0
self.bottom_weight[self.invalid] = 0
batch_axis = xp.expand_dims(xp.arange(batch_size), 1)
top_interpolated = self.left_weight * U[batch_axis, self.y_min, self.x_min] + self.right_weight * U[batch_axis, self.y_min, self.x_max]
bottom_interpolated = self.left_weight * U[batch_axis, self.y_max, self.x_min] + self.right_weight * U[batch_axis, self.y_max, self.x_max]
V = self.top_weight * top_interpolated + self.bottom_weight * bottom_interpolated
V = V.reshape((batch_size, self.out_height, self.out_width))
return force_array(V, dtype=U.dtype),
def forward_gpu(self, inputs):
xp = cuda.get_array_module(*inputs)
U, points = inputs
batch_size, height, width = U.shape
# resize grid to be in image dimensions
points = (points + 1) / 2
points[:, 0] *= width
points[:, 1] *= height
x_s = points[:, 0]
y_s = points[:, 1]
# move points on boundary a little bit inward to ensure correct computation
# apply masking twice in order to avoid race conditions
x_s_copy = x_s.copy()
y_s_copy = y_s.copy()
on_boundary = (x_s == 0)
replacement = apply_with_mask(x_s_copy, on_boundary, xp.nextafter, xp.array(1.0).astype(x_s.dtype))
x_s = cupy_place(x_s_copy, on_boundary, replacement)
on_boundary = (x_s == width - 1)
replacement = apply_with_mask(x_s_copy, on_boundary, xp.nextafter, xp.array(0.0).astype(x_s.dtype))
x_s = cupy_place(x_s_copy, on_boundary, replacement)
on_boundary = (y_s == 0)
replacement = apply_with_mask(y_s_copy, on_boundary, xp.nextafter, xp.array(1.0).astype(y_s.dtype))
y_s = cupy_place(y_s_copy, on_boundary, replacement)
on_boundary = (y_s == width - 1)
replacement = apply_with_mask(y_s_copy, on_boundary, xp.nextafter, xp.array(0.0).astype(y_s.dtype))
y_s = cupy_place(y_s_copy, on_boundary, replacement)
# determine min indices
self.x_min = xp.clip(xp.floor(x_s), 0, width - 1).astype(xp.int32)
self.y_min = xp.clip(xp.floor(y_s), 0, width - 1).astype(xp.int32)
# determine max indices
self.x_max = xp.clip((self.x_min + 1), 0, width - 1).astype(xp.int32)
self.y_max = xp.clip((self.y_min + 1), 0, height - 1).astype(xp.int32)
# remove points outside of image
x_invalid = xp.logical_or(x_s < 0, (width - 1) < x_s)
y_invalid = xp.logical_or(y_s < 0, (height - 1) < y_s)
self.invalid = xp.logical_or(x_invalid, y_invalid)
self.left_weight = 1.0 - (x_s - self.x_min).astype(U.dtype)
self.top_weight = 1.0 - (y_s - self.y_min).astype(U.dtype)
self.right_weight = 1.0 - self.left_weight.astype(U.dtype)
self.bottom_weight = 1.0 - self.top_weight.astype(U.dtype)
replacement = xp.zeros_like(self.invalid, dtype=U.dtype)
self.left_weight = cupy_place(self.left_weight.copy(), self.invalid, replacement.copy())
self.top_weight = cupy_place(self.top_weight.copy(), self.invalid, replacement.copy())
self.right_weight = cupy_place(self.right_weight.copy(), self.invalid, replacement.copy())
self.bottom_weight = cupy_place(self.bottom_weight.copy(), self.invalid, replacement.copy())
interpolate_kernel = cuda.cupy.ElementwiseKernel(
'raw T U, T left, T right, T top, T bottom, int32 x_min, int32 y_min, int32 x_max, int32 y_max, int32 pixels_per_batch, int32 width, int32 height',
'T V',
'''
int batchNum = i / pixels_per_batch;
int batchOffset = batchNum * width * height;
T top_left = left * U[batchOffset + y_min * width + x_min];
T top_right = right * U[batchOffset + y_min * width + x_max];
T bottom_left = left * U[batchOffset + y_max * width + x_min];
T bottom_right = right * U[batchOffset + y_max * width + x_max];
V = top * (top_left + top_right) + bottom * (bottom_left + bottom_right);
''',
'interpolation_kernel'
)
V = interpolate_kernel(
xp.ravel(U),
self.left_weight,
self.right_weight,
self.top_weight,
self.bottom_weight,
self.x_min,
self.y_min,
self.x_max,
self.y_max,
self.out_height * self.out_width,
width,
height,
)
V = V.reshape((batch_size, self.out_height, self.out_width))
return force_array(V, dtype=U.dtype),
def backward_cpu(self, inputs, gradients):
xp = cuda.get_array_module(*inputs)
U, points = inputs
gV = gradients[0]
batch_size, height, width = U.shape
flattened_gV = xp.reshape(xp.ravel(gV), (batch_size, -1))
gU = xp.zeros((batch_size, height * width), dtype=U.dtype)
batch_axis = xp.expand_dims(xp.arange(batch_size), 1)
U_tl = U[batch_axis, self.y_min, self.x_min]
U_tr = U[batch_axis, self.y_min, self.x_max]
U_bl = U[batch_axis, self.y_max, self.x_min]
U_br = U[batch_axis, self.y_max, self.x_max]
gx = self.top_weight * (U_tr - U_tl) + self.bottom_weight * (U_br - U_bl)
gy = self.left_weight * (U_bl - U_tl) + self.right_weight * (U_br - U_tr)
gx = gx * flattened_gV
gy = gy * flattened_gV
gx = xp.expand_dims(gx, 1)
gy = xp.expand_dims(gy, 1)
gpoints = xp.hstack((gx, gy))
top_left_indices = width * self.y_min + self.x_min
top_right_indices = width * self.y_min + self.x_max
bottom_left_indices = width * self.y_max + self.x_min
bottom_right_indices = width * self.y_max + self.x_max
indices = xp.hstack((top_left_indices, top_right_indices, bottom_left_indices, bottom_right_indices))
top_left_weight = self.top_weight * self.left_weight * flattened_gV
top_right_weight = self.top_weight * self.right_weight * flattened_gV
bottom_left_weight = self.bottom_weight * self.left_weight * flattened_gV
bottom_right_weight = self.bottom_weight * self.right_weight * flattened_gV
weights = xp.hstack((top_left_weight, top_right_weight, bottom_left_weight, bottom_right_weight))
for b in range(batch_size):
gU[b] = xp.bincount(indices[b], weights=weights[b], minlength=height * width)
return force_array(gU.reshape(*U.shape), dtype=gV.dtype), force_array(gpoints.reshape(*points.shape), dtype=points.dtype),
def backward_gpu(self, inputs, gradients):
xp = cuda.get_array_module(*inputs)
U, points = inputs
batch_size, height, width = U.shape
gV, = gradients
flattened_gV = xp.reshape(xp.ravel(gV), (batch_size, -1))
batch_indices = xp.asarray(xp.repeat(xp.arange(batch_size, dtype=xp.int32), points.shape[-1]))
top_left_indices = batch_indices * (width * height) + self.y_min.ravel() * width + self.x_min.ravel()
top_right_indices = batch_indices * (width * height) + self.y_min.ravel() * width + self.x_max.ravel()
bottom_left_indices = batch_indices * (width * height) + self.y_max.ravel() * width + self.x_min.ravel()
bottom_right_indices = batch_indices * (width * height) + self.y_max.ravel() * width + self.x_max.ravel()
U_tl = xp.take(U, top_left_indices).reshape((batch_size, -1))
U_tr = xp.take(U, top_right_indices).reshape((batch_size, -1))
U_bl = xp.take(U, bottom_left_indices).reshape((batch_size, -1))
U_br = xp.take(U, bottom_right_indices).reshape((batch_size, -1))
gx = self.top_weight * (U_tr - U_tl) + self.bottom_weight * (U_br - U_bl)
gy = self.left_weight * (U_bl - U_tl) + self.right_weight * (U_br - U_tr)
gx = xp.expand_dims(gx * flattened_gV, 1)
gy = xp.expand_dims(gy * flattened_gV, 1)
gpoints = xp.hstack((gx, gy))
top_left_weight = self.top_weight * self.left_weight * flattened_gV
top_right_weight = self.top_weight * self.right_weight * flattened_gV
bottom_left_weight = self.bottom_weight * self.left_weight * flattened_gV
bottom_right_weight = self.bottom_weight * self.right_weight * flattened_gV
gU_kernel = cuda.cupy.ElementwiseKernel(
'int32 elements_per_batch, int32 width, int32 height, T top_left_weight, T top_right_weight, T bottom_left_weight, T bottom_right_weight, int32 x_min, int32 x_max, int32 y_min, int32 y_max',
'raw T gU',
'''
int batchNum = i / elements_per_batch;
int batchOffset = batchNum * width * height;
int index = (batchOffset + y_min * width + x_min);
atomicAdd(&gU[index], top_left_weight);
index = (batchOffset + y_min * width + x_max);
atomicAdd(&gU[index], top_right_weight);
index = (batchOffset + y_max * width + x_min);
atomicAdd(&gU[index], bottom_left_weight);
index = (batchOffset + y_max * width + x_max);
atomicAdd(&gU[index], bottom_right_weight);
''',
'compute_gU_bilinear_interpolation',
)
gU = xp.zeros_like(U, dtype=U.dtype)
gU = gU_kernel(self.out_width * self.out_height, width, height, top_left_weight, top_right_weight, bottom_left_weight, bottom_right_weight, self.x_min, self.x_max, self.y_min, self.y_max, gU)
return force_array(gU.reshape(*U.shape), dtype=U.dtype), force_array(gpoints, dtype=points.dtype)
def bilinear_interpolation(batch, points_s, out_height, out_width):
return BilinearInterpolation(out_height, out_width)(batch, points_s)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment