Skip to content

Instantly share code, notes, and snippets.

@Bartzi
Created June 12, 2017 08:57
Show Gist options
  • Select an option

  • Save Bartzi/d21426edfca1d2ce70d91fc918d41eea to your computer and use it in GitHub Desktop.

Select an option

Save Bartzi/d21426edfca1d2ce70d91fc918d41eea to your computer and use it in GitHub Desktop.

Revisions

  1. Bartzi created this gist Jun 12, 2017.
    285 changes: 285 additions & 0 deletions bilinear_interpolation.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,285 @@
    from chainer import Function
    from chainer import cuda
    from chainer.utils import type_check, force_array


    def cupy_place(arr, mask, val):
    _place_kernel = cuda.cupy.ElementwiseKernel(
    'T a, S mask, T val',
    'T out',
    'out = mask ? val : a',
    'cupy_place')

    val = cuda.cupy.array(val, dtype=arr.dtype)
    out = cuda.cupy.empty_like(arr)
    return _place_kernel(arr, mask, val, out)


    def apply_with_mask(arr, mask, elementwise_func, *args):
    value = elementwise_func(arr, *args)
    return cupy_place(arr, mask, value)


    class BilinearInterpolation(Function):

    def __init__(self, out_height, out_width):
    self.out_height = out_height
    self.out_width = out_width
    self.x_max = None
    self.y_max = None
    self.x_min = None
    self.y_min = None

    def check_type_forward(self, in_types):
    type_check.expect(in_types.size() == 2)
    x_type, points_type = in_types
    type_check.expect(
    x_type.ndim == 3,
    x_type.dtype.kind == 'f',
    points_type.ndim == 3,
    points_type.dtype.kind == 'f',
    points_type.shape[-1] == self.out_width * self.out_height,
    )

    def forward_cpu(self, inputs):
    xp = cuda.get_array_module(*inputs)
    U, points = inputs
    batch_size, height, width = U.shape

    # resize grid to be in image dimensions
    points = (points + 1) / 2
    points[:, 0] *= width
    points[:, 1] *= height

    x_s = points[:, 0]
    y_s = points[:, 1]

    # determine min indices
    self.x_min = xp.clip(xp.floor(x_s * 0.999999), 0, width - 1).astype(xp.int32)
    self.y_min = xp.clip(xp.floor(y_s * 0.999999), 0, width - 1).astype(xp.int32)

    # determine max indices
    self.x_max = xp.clip((self.x_min + 1), 0, width - 1).astype(xp.int32)
    self.y_max = xp.clip((self.y_min + 1), 0, height - 1).astype(xp.int32)

    # remove points outside of image
    x_invalid = xp.logical_or(x_s < 0, (width - 1) < x_s)
    y_invalid = xp.logical_or(y_s < 0, (height - 1) < y_s)
    self.invalid = xp.logical_or(x_invalid, y_invalid)

    self.left_weight = 1.0 - (x_s - self.x_min).astype(U.dtype)
    self.top_weight = 1.0 - (y_s - self.y_min).astype(U.dtype)
    self.right_weight = 1.0 - self.left_weight.astype(U.dtype)
    self.bottom_weight = 1.0 - self.top_weight.astype(U.dtype)

    self.left_weight[self.invalid] = 0
    self.top_weight[self.invalid] = 0
    self.right_weight[self.invalid] = 0
    self.bottom_weight[self.invalid] = 0

    batch_axis = xp.expand_dims(xp.arange(batch_size), 1)
    top_interpolated = self.left_weight * U[batch_axis, self.y_min, self.x_min] + self.right_weight * U[batch_axis, self.y_min, self.x_max]
    bottom_interpolated = self.left_weight * U[batch_axis, self.y_max, self.x_min] + self.right_weight * U[batch_axis, self.y_max, self.x_max]

    V = self.top_weight * top_interpolated + self.bottom_weight * bottom_interpolated
    V = V.reshape((batch_size, self.out_height, self.out_width))

    return force_array(V, dtype=U.dtype),

    def forward_gpu(self, inputs):
    xp = cuda.get_array_module(*inputs)
    U, points = inputs
    batch_size, height, width = U.shape

    # resize grid to be in image dimensions
    points = (points + 1) / 2
    points[:, 0] *= width
    points[:, 1] *= height

    x_s = points[:, 0]
    y_s = points[:, 1]

    # move points on boundary a little bit inward to ensure correct computation
    # apply masking twice in order to avoid race conditions
    x_s_copy = x_s.copy()
    y_s_copy = y_s.copy()

    on_boundary = (x_s == 0)
    replacement = apply_with_mask(x_s_copy, on_boundary, xp.nextafter, xp.array(1.0).astype(x_s.dtype))
    x_s = cupy_place(x_s_copy, on_boundary, replacement)

    on_boundary = (x_s == width - 1)
    replacement = apply_with_mask(x_s_copy, on_boundary, xp.nextafter, xp.array(0.0).astype(x_s.dtype))
    x_s = cupy_place(x_s_copy, on_boundary, replacement)

    on_boundary = (y_s == 0)
    replacement = apply_with_mask(y_s_copy, on_boundary, xp.nextafter, xp.array(1.0).astype(y_s.dtype))
    y_s = cupy_place(y_s_copy, on_boundary, replacement)

    on_boundary = (y_s == width - 1)
    replacement = apply_with_mask(y_s_copy, on_boundary, xp.nextafter, xp.array(0.0).astype(y_s.dtype))
    y_s = cupy_place(y_s_copy, on_boundary, replacement)

    # determine min indices
    self.x_min = xp.clip(xp.floor(x_s), 0, width - 1).astype(xp.int32)
    self.y_min = xp.clip(xp.floor(y_s), 0, width - 1).astype(xp.int32)

    # determine max indices
    self.x_max = xp.clip((self.x_min + 1), 0, width - 1).astype(xp.int32)
    self.y_max = xp.clip((self.y_min + 1), 0, height - 1).astype(xp.int32)

    # remove points outside of image
    x_invalid = xp.logical_or(x_s < 0, (width - 1) < x_s)
    y_invalid = xp.logical_or(y_s < 0, (height - 1) < y_s)
    self.invalid = xp.logical_or(x_invalid, y_invalid)

    self.left_weight = 1.0 - (x_s - self.x_min).astype(U.dtype)
    self.top_weight = 1.0 - (y_s - self.y_min).astype(U.dtype)
    self.right_weight = 1.0 - self.left_weight.astype(U.dtype)
    self.bottom_weight = 1.0 - self.top_weight.astype(U.dtype)

    replacement = xp.zeros_like(self.invalid, dtype=U.dtype)
    self.left_weight = cupy_place(self.left_weight.copy(), self.invalid, replacement.copy())
    self.top_weight = cupy_place(self.top_weight.copy(), self.invalid, replacement.copy())
    self.right_weight = cupy_place(self.right_weight.copy(), self.invalid, replacement.copy())
    self.bottom_weight = cupy_place(self.bottom_weight.copy(), self.invalid, replacement.copy())

    interpolate_kernel = cuda.cupy.ElementwiseKernel(
    'raw T U, T left, T right, T top, T bottom, int32 x_min, int32 y_min, int32 x_max, int32 y_max, int32 pixels_per_batch, int32 width, int32 height',
    'T V',
    '''
    int batchNum = i / pixels_per_batch;
    int batchOffset = batchNum * width * height;
    T top_left = left * U[batchOffset + y_min * width + x_min];
    T top_right = right * U[batchOffset + y_min * width + x_max];
    T bottom_left = left * U[batchOffset + y_max * width + x_min];
    T bottom_right = right * U[batchOffset + y_max * width + x_max];
    V = top * (top_left + top_right) + bottom * (bottom_left + bottom_right);
    ''',
    'interpolation_kernel'
    )

    V = interpolate_kernel(
    xp.ravel(U),
    self.left_weight,
    self.right_weight,
    self.top_weight,
    self.bottom_weight,
    self.x_min,
    self.y_min,
    self.x_max,
    self.y_max,
    self.out_height * self.out_width,
    width,
    height,
    )

    V = V.reshape((batch_size, self.out_height, self.out_width))
    return force_array(V, dtype=U.dtype),

    def backward_cpu(self, inputs, gradients):
    xp = cuda.get_array_module(*inputs)
    U, points = inputs
    gV = gradients[0]

    batch_size, height, width = U.shape
    flattened_gV = xp.reshape(xp.ravel(gV), (batch_size, -1))

    gU = xp.zeros((batch_size, height * width), dtype=U.dtype)

    batch_axis = xp.expand_dims(xp.arange(batch_size), 1)

    U_tl = U[batch_axis, self.y_min, self.x_min]
    U_tr = U[batch_axis, self.y_min, self.x_max]
    U_bl = U[batch_axis, self.y_max, self.x_min]
    U_br = U[batch_axis, self.y_max, self.x_max]

    gx = self.top_weight * (U_tr - U_tl) + self.bottom_weight * (U_br - U_bl)
    gy = self.left_weight * (U_bl - U_tl) + self.right_weight * (U_br - U_tr)
    gx = gx * flattened_gV
    gy = gy * flattened_gV

    gx = xp.expand_dims(gx, 1)
    gy = xp.expand_dims(gy, 1)
    gpoints = xp.hstack((gx, gy))

    top_left_indices = width * self.y_min + self.x_min
    top_right_indices = width * self.y_min + self.x_max
    bottom_left_indices = width * self.y_max + self.x_min
    bottom_right_indices = width * self.y_max + self.x_max
    indices = xp.hstack((top_left_indices, top_right_indices, bottom_left_indices, bottom_right_indices))

    top_left_weight = self.top_weight * self.left_weight * flattened_gV
    top_right_weight = self.top_weight * self.right_weight * flattened_gV
    bottom_left_weight = self.bottom_weight * self.left_weight * flattened_gV
    bottom_right_weight = self.bottom_weight * self.right_weight * flattened_gV
    weights = xp.hstack((top_left_weight, top_right_weight, bottom_left_weight, bottom_right_weight))

    for b in range(batch_size):
    gU[b] = xp.bincount(indices[b], weights=weights[b], minlength=height * width)

    return force_array(gU.reshape(*U.shape), dtype=gV.dtype), force_array(gpoints.reshape(*points.shape), dtype=points.dtype),

    def backward_gpu(self, inputs, gradients):
    xp = cuda.get_array_module(*inputs)
    U, points = inputs
    batch_size, height, width = U.shape

    gV, = gradients
    flattened_gV = xp.reshape(xp.ravel(gV), (batch_size, -1))

    batch_indices = xp.asarray(xp.repeat(xp.arange(batch_size, dtype=xp.int32), points.shape[-1]))
    top_left_indices = batch_indices * (width * height) + self.y_min.ravel() * width + self.x_min.ravel()
    top_right_indices = batch_indices * (width * height) + self.y_min.ravel() * width + self.x_max.ravel()
    bottom_left_indices = batch_indices * (width * height) + self.y_max.ravel() * width + self.x_min.ravel()
    bottom_right_indices = batch_indices * (width * height) + self.y_max.ravel() * width + self.x_max.ravel()

    U_tl = xp.take(U, top_left_indices).reshape((batch_size, -1))
    U_tr = xp.take(U, top_right_indices).reshape((batch_size, -1))
    U_bl = xp.take(U, bottom_left_indices).reshape((batch_size, -1))
    U_br = xp.take(U, bottom_right_indices).reshape((batch_size, -1))

    gx = self.top_weight * (U_tr - U_tl) + self.bottom_weight * (U_br - U_bl)
    gy = self.left_weight * (U_bl - U_tl) + self.right_weight * (U_br - U_tr)

    gx = xp.expand_dims(gx * flattened_gV, 1)
    gy = xp.expand_dims(gy * flattened_gV, 1)
    gpoints = xp.hstack((gx, gy))

    top_left_weight = self.top_weight * self.left_weight * flattened_gV
    top_right_weight = self.top_weight * self.right_weight * flattened_gV
    bottom_left_weight = self.bottom_weight * self.left_weight * flattened_gV
    bottom_right_weight = self.bottom_weight * self.right_weight * flattened_gV

    gU_kernel = cuda.cupy.ElementwiseKernel(
    'int32 elements_per_batch, int32 width, int32 height, T top_left_weight, T top_right_weight, T bottom_left_weight, T bottom_right_weight, int32 x_min, int32 x_max, int32 y_min, int32 y_max',
    'raw T gU',
    '''
    int batchNum = i / elements_per_batch;
    int batchOffset = batchNum * width * height;
    int index = (batchOffset + y_min * width + x_min);
    atomicAdd(&gU[index], top_left_weight);
    index = (batchOffset + y_min * width + x_max);
    atomicAdd(&gU[index], top_right_weight);
    index = (batchOffset + y_max * width + x_min);
    atomicAdd(&gU[index], bottom_left_weight);
    index = (batchOffset + y_max * width + x_max);
    atomicAdd(&gU[index], bottom_right_weight);
    ''',
    'compute_gU_bilinear_interpolation',
    )

    gU = xp.zeros_like(U, dtype=U.dtype)
    gU = gU_kernel(self.out_width * self.out_height, width, height, top_left_weight, top_right_weight, bottom_left_weight, bottom_right_weight, self.x_min, self.x_max, self.y_min, self.y_max, gU)

    return force_array(gU.reshape(*U.shape), dtype=U.dtype), force_array(gpoints, dtype=points.dtype)


    def bilinear_interpolation(batch, points_s, out_height, out_width):
    return BilinearInterpolation(out_height, out_width)(batch, points_s)