axionbuster · September 26, 2025 17:41
diff --git a/smawk.zig b/smawk.zig
 // The SMAWK algorithm for finding row minima of a totally monotone matrix,
 // based on the notes:
 // - "Advanced Dynamic Programming," https://courses.grainger.illinois.edu/cs473/sp2016/notes/06-sparsedynprog.pdf
 // - Larmore, L.L.; and UNLV, "The SMAWK Algorithm."
 //
 // COPYRIGHT (c) 2025 by YuJin Kim <[email protected]>.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are met:
 //
 // 1. Redistributions of source code must retain the above copyright notice,
 //    this list of conditions and the following disclaimer.
 //
 // 2. Redistributions in binary form must reproduce the above copyright notice,
 //    this list of conditions and the following disclaimer in the documentation
 //    and/or other materials provided with the distribution.
 //
 // 3. Neither the name of the copyright holder nor the names of its contributors
 //    may be used to endorse or promote products derived from this software
 //    without specific prior written permission.
 //
 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 // POSSIBILITY OF SUCH DAMAGE.

 const std = @import("std");

 /// find the row minima of a totally monotone matrix using the SMAWK algorithm.
 pub fn smawkRowMinima(
    alloc: std.mem.Allocator,
    width: usize,
    height: usize,
    value: fn (usize, usize) i32,
 ) ![]usize {
    if (width <= 0 or height <= 0) {
        return try alloc.alloc(usize, 0);
    }
    if (width == 1) {
        const minima = try alloc.alloc(usize, height);
        errdefer alloc.free(minima);
        @memset(minima, 0);
        return minima;
    }
    if (height == 1) {
        const minima = try alloc.alloc(usize, 1);
        errdefer alloc.free(minima);
        // find the minimum of the single row
        var j: usize = 0;
        var minj: usize = 0;
        var minv = value(0, 0);
        while (j + 1 < width) : (j += 1) {
            const v = value(0, j + 1);
            if (v < minv) {
                minv = v;
                minj = j + 1;
            }
        }
        minima[0] = minj;
        return minima;
    }
    const minima = try alloc.alloc(usize, height);
    errdefer alloc.free(minima);
    var go = Go{
        .alloc = alloc,
        .offset = 0,
        .width = width,
        .height = height,
        .scale = 1,
        .minima = minima,
        .columns = null,
        .value_ = value,
    };
    try go.reduce();
    return minima;
 }

 // shared closure for the mutually recursive interpolate and reduce subroutines.
 const Go = struct {
    // actual row index begins at offset, and goes up by scale
    offset: usize,
    scale: usize,
    // width and height are measured in actual indices
    width: usize,
    height: usize,
    // locations of the minima of rows, real indices
    minima: []usize,
    // SMAWK columns (maps virtual columns to to real column indices).
    // if null, means identity mapping (basic optimization).
    columns: ?[]usize,
    // 0-based indexing
    value_: *const fn (usize, usize) i32,
    alloc: std.mem.Allocator,
    // by convention, i is the row index in the reduced matrix
    // and j is the column index in the original matrix
    fn rowOf(this: *const Go, i: usize) usize {
        return i * this.scale + this.offset;
    }
    fn rowInBounds(this: *const Go, i: usize) bool {
        return this.rowOf(i) < this.height;
    }
    fn virtualToRealColumn(this: *const Go, j: usize) usize {
        if (this.columns) |cols| {
            return cols[j];
        } else {
            return j;
        }
    }
    fn virtualWidth(this: *const Go) usize {
        return if (this.columns) |cs| cs.len else this.width;
    }
    fn value(this: *const Go, i: usize, j: usize) i32 {
        return this.value_(this.rowOf(i), this.virtualToRealColumn(j));
    }
    // inclusively end the search here
    fn minimumIxOfReal(this: *const Go, i: usize) usize {
        if (this.rowInBounds(i)) {
            return this.minima[this.rowOf(i)];
        } else if (this.columns) |cs| {
            if (cs.len == 0) {
                std.debug.panic("minimumIxOfReal: no columns", .{});
            } else {
                return cs[cs.len - 1]; // out of bounds
            }
        } else {
            return this.width - 1; // out of bounds
        }
    }
    fn setMinimumIxOf(this: *Go, i: usize, j: usize) void {
        if (this.rowInBounds(i)) {
            this.minima[this.rowOf(i)] = this.virtualToRealColumn(j);
        } else {
            std.debug.panic(
                "setMinimumIxOf: row ({d} -> {d}) is outside of the bounds",
                .{ i, this.rowOf(i) },
            );
        }
    }
    fn virtualHeight(this: *const Go) usize {
        // ceiling division of (height - offset) by scale.
        return (this.height - this.offset + this.scale - 1) / this.scale;
    }
    // INTERPOLATE is used if the matrix is either narrow or square.
    // the procedure is actually standalone [*], and itself actually works
    // for any monotone matrix, not just totally monotone ones.
    fn interpolate(this: *Go) std.mem.Allocator.Error!void {
        if (this.rowInBounds(1)) {
            // recursively find the minima of the odd rows.
            // somewhat surprisingly, this line of code is all we need.
            var go2 = this.*;
            go2.offset = this.offset + this.scale;
            go2.scale = this.scale * 2;
            // [*]: INTERPOLATE is standalone if we call interpolate again
            // here, instead of reduce. in that case, we don't even need
            // the virtual columns.
            try go2.reduce();
        }
        // now the minima of the even rows, by interpolating the odd rows
        // (hence the name of the procedure).
        var i: usize = 0;
        var j: usize = 0;
        // note: width >= 1.
        while (this.rowInBounds(i)) {
            const limitReal = this.minimumIxOfReal(i + 1);
            var minv = this.value(i, j);
            var minj = j;
            while (j < this.virtualWidth() and this.virtualToRealColumn(j) < limitReal) {
                j += 1;
                const v = this.value(i, j);
                // since we're finding the leftmost minimum, use '<', not '<='.
                if (v < minv) {
                    minv = v;
                    minj = j;
                }
            }
            this.setMinimumIxOf(i, minj);
            j = minj; // start from the last minimum
            i += 2;
        }
    }
    // REDUCE is used if the matrix is wide.
    // after this procedure, the matrix becomes much narrower. it might
    // become square but often it becomes very narrow.
    fn reduce(this: *Go) std.mem.Allocator.Error!void {
        const width = this.virtualWidth();
        const height = this.virtualHeight();
        // REDUCE being used as an entry point, we must check the base case.
        if (width <= height or height <= 1) {
            return this.interpolate();
        }
        // implicit stack: cols, t.
        // cols stores the virtual columns
        const cols = try this.alloc.alloc(usize, height);
        defer this.alloc.free(cols);
        var t: usize = 0; // stack size
        // real iteration
        var k: usize = 0;
        while (k < width) : (k += 1) {
            while (t > 0 and this.value(t - 1, cols[t - 1]) > this.value(t - 1, k)) {
                t -= 1;
            }
            if (t < height) {
                cols[t] = k;
                t += 1;
            }
        }
        // map to real indices.
        // cols now stores the real column indices.
        var i: usize = 0;
        while (i < t) : (i += 1) {
            cols[i] = this.virtualToRealColumn(cols[i]);
        }
        var go2 = this.*;
        go2.columns = cols[0..t];
        try go2.interpolate();
    }
 };

 test "interpolate 1 (smawk)" {
    const allocator = std.testing.allocator;

    const M = [5][5]i32{
        .{ 12, 21, 38, 76, 27 },
        .{ 74, 14, 14, 29, 60 },
        .{ 21, 8, 25, 10, 71 },
        .{ 68, 45, 29, 15, 76 },
        .{ 97, 8, 12, 2, 6 },
    };
    const Go2 = struct {
        fn value(i: usize, j: usize) i32 {
            return M[i][j];
        }
    };
    const minima = try smawkRowMinima(allocator, 5, 5, Go2.value);
    defer allocator.free(minima);

    try std.testing.expectEqual(0, minima[0]);
    try std.testing.expectEqual(1, minima[1]);
    try std.testing.expectEqual(1, minima[2]);
    try std.testing.expectEqual(3, minima[3]);
    try std.testing.expectEqual(3, minima[4]);
 }

 test "interpolate 2 (smawk)" {
    const allocator = std.testing.allocator;

    const M = [9][18]i32{
        .{ 25, 21, 13, 10, 20, 13, 19, 35, 37, 41, 58, 66, 82, 99, 124, 133, 156, 178 },
        .{ 42, 35, 26, 20, 29, 21, 25, 37, 36, 39, 56, 64, 76, 91, 116, 125, 146, 164 },
        .{ 57, 48, 35, 28, 33, 24, 28, 40, 37, 37, 54, 61, 72, 83, 107, 113, 131, 146 },
        .{ 78, 65, 51, 42, 44, 35, 38, 48, 42, 42, 55, 61, 70, 80, 100, 106, 120, 135 },
        .{ 90, 76, 58, 48, 49, 39, 42, 48, 39, 35, 47, 51, 56, 63, 80, 86, 97, 110 },
        .{ 103, 85, 67, 56, 55, 44, 44, 49, 39, 33, 41, 44, 49, 56, 71, 75, 84, 96 },
        .{ 123, 105, 86, 75, 73, 59, 57, 62, 51, 44, 50, 52, 55, 59, 72, 74, 80, 92 },
        .{ 142, 123, 100, 86, 82, 65, 61, 62, 50, 43, 47, 45, 46, 46, 58, 59, 65, 73 },
        .{ 151, 130, 104, 88, 80, 59, 52, 49, 37, 29, 29, 24, 23, 20, 28, 25, 31, 39 },
    };
    const Go2 = struct {
        fn value(i: usize, j: usize) i32 {
            return M[i][j];
        }
    };
    const minima = try smawkRowMinima(allocator, 18, 9, Go2.value);
    try std.testing.expectEqual(9, minima.len);
    defer allocator.free(minima);
    const expect = [_]usize{ 3, 3, 5, 5, 9, 9, 9, 9, 13 };
    var i: usize = 0;
    while (i < minima.len) : (i += 1) {
        try std.testing.expectEqual(expect[i], minima[i]);
    }
 }

 // find the row minima of any matrix by brute force
 // only for testing.
 fn naive_rowmins(
    alloc: std.mem.Allocator,
    width: usize,
    height: usize,
    value: fn (usize, usize) i32,
 ) ![]usize {
    if (width <= 0 or height <= 0) {
        return try alloc.alloc(usize, 0);
    }
    const minima = try alloc.alloc(usize, height);
    var i: usize = 0;
    while (i < height) : (i += 1) {
        var j: usize = 0;
        var minj: usize = 0;
        var minv = value(i, 0);
        while (j + 1 < width) : (j += 1) {
            const v = value(i, j + 1);
            if (v < minv) {
                minv = v;
                minj = j + 1;
            }
        }
        minima[i] = minj;
    }
    return minima;
 }

 // A randomized Monge generator composed from the lemma
 // (from "Advanced Dynamic Programming"):
 // - (a) row-constant component A[i,j] += r[i]
 // - (b) column-constant component A[i,j] += c[j]
 // - (c) upper-right rectangle of 1s (with weight) added
 // - (d) lower-left rectangle of 1s (with weight) added
 // - (e) positive multiple (mult >= 1)
 // - (f) sum (we sum all chosen components)
 // - (g) transpose (by swapping i/j in evaluation)
 //
 // We evaluate on-the-fly (no full matrix allocation).

 const MongeCase = struct {
    // final matrix dimensions
    width: usize,
    height: usize,

    // base dimensions (if transposed, base dims are swapped)
    base_width: usize,
    base_height: usize,

    // components
    row_vals: []i32, // length base_height, or empty slice if unused
    col_vals: []i32, // length base_width, or empty slice if unused

    // upper-right rectangle in base coords: rows [0 .. ur_top-1], cols [base_width-ur_right .. base_width-1]
    has_ur: bool,
    ur_top: usize,
    ur_right: usize,
    ur_weight: i32,

    // lower-left rectangle in base coords: rows [base_height-ll_bottom .. base_height-1], cols [0 .. ll_left-1]
    has_ll: bool,
    ll_bottom: usize,
    ll_left: usize,
    ll_weight: i32,

    // positive scaling
    mult: i32,

    // whether final matrix is the transpose of the base expression
    transposed: bool,
 };

 // Global pointer to the current case for the value function.
 var g_case: ?*const MongeCase = null;

 // Evaluate MongeCase at (i, j) in final matrix coords.
 fn monge_value(i: usize, j: usize) i32 {
    const c = g_case orelse std.debug.panic("monge_value: g_case not set", .{});

    // Convert to base coords depending on transposition.
    const bi: usize = if (c.transposed) j else i;
    const bj: usize = if (c.transposed) i else j;

    var acc: i32 = 0;

    if (c.row_vals.len != 0) {
        acc += c.row_vals[bi];
    }
    if (c.col_vals.len != 0) {
        acc += c.col_vals[bj];
    }
    if (c.has_ur) {
        if (bi < c.ur_top and bj >= c.base_width - c.ur_right) {
            acc += c.ur_weight;
        }
    }
    if (c.has_ll) {
        if (bi >= c.base_height - c.ll_bottom and bj < c.ll_left) {
            acc += c.ll_weight;
        }
    }

    // Positive multiple
    return acc * c.mult;
 }

 // Helpers for RNG
 fn rnd_between(r: *std.Random, lo: i32, hi_inclusive: i32) i32 {
    const span_u: u32 = @intCast(hi_inclusive - lo + 1);
    const v: u32 = r.int(u32) % span_u;
    return @as(i32, @intCast(v)) + lo;
 }

 fn rnd_bool(r: *std.Random) bool {
    return (r.int(u1) & 1) == 1;
 }

 test "interpolate randomized monge (fixed-seed)" {
    const allocator = std.testing.allocator;
    const rand = std.Random;
    var prng = rand.DefaultPrng.init(0xD00DFEEDCAFEBABE);
    var rng = prng.random();

    const trials: usize = 200;
    var t: usize = 0;
    while (t < trials) : (t += 1) {
        // Final dims
        const width: usize = @intCast(rnd_between(&rng, 1, 25));
        const height: usize = @intCast(rnd_between(&rng, 1, 25));

        // Randomly decide if we apply transposition to the base expression
        const transposed = rnd_bool(&rng);

        const base_width: usize = if (transposed) height else width;
        const base_height: usize = if (transposed) width else height;

        // Build components
        // Decide to include row-const and/or col-const components
        const use_row = rnd_bool(&rng);
        const use_col = rnd_bool(&rng);

        var row_vals: []i32 = &[_]i32{};
        var col_vals: []i32 = &[_]i32{};

        if (use_row) {
            row_vals = try allocator.alloc(i32, base_height);
            var i: usize = 0;
            while (i < base_height) : (i += 1) {
                // small values to avoid overflow
                row_vals[@intCast(i)] = rnd_between(&rng, -3, 7);
            }
        }

        if (use_col) {
            col_vals = try allocator.alloc(i32, base_width);
            var j: usize = 0;
            while (j < base_width) : (j += 1) {
                col_vals[@intCast(j)] = rnd_between(&rng, -3, 7);
            }
        }

        // Upper-right rectangle
        const has_ur = rnd_bool(&rng);
        var ur_top: usize = 0;
        var ur_right: usize = 0;
        var ur_weight: i32 = 0;
        if (has_ur) {
            ur_top = @intCast(rnd_between(&rng, 1, @intCast(base_height)));
            ur_right = @intCast(rnd_between(&rng, 1, @intCast(base_width)));
            ur_weight = @intCast(rnd_between(&rng, 0, 5)); // nonnegative
        }

        // Lower-left rectangle
        const has_ll = rnd_bool(&rng);
        var ll_bottom: usize = 0;
        var ll_left: usize = 0;
        var ll_weight: i32 = 0;
        if (has_ll) {
            ll_bottom = @intCast(rnd_between(&rng, 1, @intCast(base_height)));
            ll_left = @intCast(rnd_between(&rng, 1, @intCast(base_width)));
            ll_weight = rnd_between(&rng, 0, 5); // nonnegative
        }

        // positive multiple
        const mult: i32 = rnd_between(&rng, 1, 5);

        var case = MongeCase{
            .width = width,
            .height = height,
            .base_width = base_width,
            .base_height = base_height,
            .row_vals = row_vals,
            .col_vals = col_vals,
            .has_ur = has_ur,
            .ur_top = ur_top,
            .ur_right = ur_right,
            .ur_weight = ur_weight,
            .has_ll = has_ll,
            .ll_bottom = ll_bottom,
            .ll_left = ll_left,
            .ll_weight = ll_weight,
            .mult = mult,
            .transposed = transposed,
        };

        // Run both algorithms on this case
        g_case = &case;

        const minima_interp = try smawkRowMinima(allocator, width, height, monge_value);
        defer allocator.free(minima_interp);

        const minima_naive = try naive_rowmins(allocator, width, height, monge_value);
        defer allocator.free(minima_naive);

        // Check equality
        var i: usize = 0;
        while (i < height) : (i += 1) {
            try std.testing.expectEqual(
                minima_naive[i],
                minima_interp[i],
            );
        }

        // Check monotonicity of row minima (nondecreasing)
        if (height > 1) {
            var r: usize = 1;
            while (r < height) : (r += 1) {
                try std.testing.expect(
                    minima_interp[@intCast(r - 1)] <= minima_interp[r],
                );
            }
        }

        // cleanup
        if (row_vals.len != 0) allocator.free(row_vals);
        if (col_vals.len != 0) allocator.free(col_vals);
        g_case = null;
    }
 }
	// The SMAWK algorithm for finding row minima of a totally monotone matrix,
	// based on the notes:
	// - "Advanced Dynamic Programming," https://courses.grainger.illinois.edu/cs473/sp2016/notes/06-sparsedynprog.pdf
	// - Larmore, L.L.; and UNLV, "The SMAWK Algorithm."
	//
	// COPYRIGHT (c) 2025 by YuJin Kim <[email protected]>.
	//
	// Redistribution and use in source and binary forms, with or without
	// modification, are permitted provided that the following conditions are met:
	//
	// 1. Redistributions of source code must retain the above copyright notice,
	// this list of conditions and the following disclaimer.
	//
	// 2. Redistributions in binary form must reproduce the above copyright notice,
	// this list of conditions and the following disclaimer in the documentation
	// and/or other materials provided with the distribution.
	//
	// 3. Neither the name of the copyright holder nor the names of its contributors
	// may be used to endorse or promote products derived from this software
	// without specific prior written permission.
	//
	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS”
	// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
	// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
	// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
	// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
	// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
	// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
	// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
	// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
	// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
	// POSSIBILITY OF SUCH DAMAGE.

	const std = @import("std");

	/// find the row minima of a totally monotone matrix using the SMAWK algorithm.
	pub fn smawkRowMinima(
	alloc: std.mem.Allocator,
	width: usize,
	height: usize,
	value: fn (usize, usize) i32,
	) ![]usize {
	if (width <= 0 or height <= 0) {
	return try alloc.alloc(usize, 0);
	}
	if (width == 1) {
	const minima = try alloc.alloc(usize, height);
	errdefer alloc.free(minima);
	@memset(minima, 0);
	return minima;
	}
	if (height == 1) {
	const minima = try alloc.alloc(usize, 1);
	errdefer alloc.free(minima);
	// find the minimum of the single row
	var j: usize = 0;
	var minj: usize = 0;
	var minv = value(0, 0);
	while (j + 1 < width) : (j += 1) {
	const v = value(0, j + 1);
	if (v < minv) {
	minv = v;
	minj = j + 1;
	}
	}
	minima[0] = minj;
	return minima;
	}
	const minima = try alloc.alloc(usize, height);
	errdefer alloc.free(minima);
	var go = Go{
	.alloc = alloc,
	.offset = 0,
	.width = width,
	.height = height,
	.scale = 1,
	.minima = minima,
	.columns = null,
	.value_ = value,
	};
	try go.reduce();
	return minima;
	}

	// shared closure for the mutually recursive interpolate and reduce subroutines.
	const Go = struct {
	// actual row index begins at offset, and goes up by scale
	offset: usize,
	scale: usize,
	// width and height are measured in actual indices
	width: usize,
	height: usize,
	// locations of the minima of rows, real indices
	minima: []usize,
	// SMAWK columns (maps virtual columns to to real column indices).
	// if null, means identity mapping (basic optimization).
	columns: ?[]usize,
	// 0-based indexing
	value_: *const fn (usize, usize) i32,
	alloc: std.mem.Allocator,
	// by convention, i is the row index in the reduced matrix
	// and j is the column index in the original matrix
	fn rowOf(this: *const Go, i: usize) usize {
	return i * this.scale + this.offset;
	}
	fn rowInBounds(this: *const Go, i: usize) bool {
	return this.rowOf(i) < this.height;
	}
	fn virtualToRealColumn(this: *const Go, j: usize) usize {
	if (this.columns) \|cols\| {
	return cols[j];
	} else {
	return j;
	}
	}
	fn virtualWidth(this: *const Go) usize {
	return if (this.columns) \|cs\| cs.len else this.width;
	}
	fn value(this: *const Go, i: usize, j: usize) i32 {
	return this.value_(this.rowOf(i), this.virtualToRealColumn(j));
	}
	// inclusively end the search here
	fn minimumIxOfReal(this: *const Go, i: usize) usize {
	if (this.rowInBounds(i)) {
	return this.minima[this.rowOf(i)];
	} else if (this.columns) \|cs\| {
	if (cs.len == 0) {
	std.debug.panic("minimumIxOfReal: no columns", .{});
	} else {
	return cs[cs.len - 1]; // out of bounds
	}
	} else {
	return this.width - 1; // out of bounds
	}
	}
	fn setMinimumIxOf(this: *Go, i: usize, j: usize) void {
	if (this.rowInBounds(i)) {
	this.minima[this.rowOf(i)] = this.virtualToRealColumn(j);
	} else {
	std.debug.panic(
	"setMinimumIxOf: row ({d} -> {d}) is outside of the bounds",
	.{ i, this.rowOf(i) },
	);
	}
	}
	fn virtualHeight(this: *const Go) usize {
	// ceiling division of (height - offset) by scale.
	return (this.height - this.offset + this.scale - 1) / this.scale;
	}
	// INTERPOLATE is used if the matrix is either narrow or square.
	// the procedure is actually standalone [*], and itself actually works
	// for any monotone matrix, not just totally monotone ones.
	fn interpolate(this: *Go) std.mem.Allocator.Error!void {
	if (this.rowInBounds(1)) {
	// recursively find the minima of the odd rows.
	// somewhat surprisingly, this line of code is all we need.
	var go2 = this.*;
	go2.offset = this.offset + this.scale;
	go2.scale = this.scale * 2;
	// [*]: INTERPOLATE is standalone if we call interpolate again
	// here, instead of reduce. in that case, we don't even need
	// the virtual columns.
	try go2.reduce();
	}
	// now the minima of the even rows, by interpolating the odd rows
	// (hence the name of the procedure).
	var i: usize = 0;
	var j: usize = 0;
	// note: width >= 1.
	while (this.rowInBounds(i)) {
	const limitReal = this.minimumIxOfReal(i + 1);
	var minv = this.value(i, j);
	var minj = j;
	while (j < this.virtualWidth() and this.virtualToRealColumn(j) < limitReal) {
	j += 1;
	const v = this.value(i, j);
	// since we're finding the leftmost minimum, use '<', not '<='.
	if (v < minv) {
	minv = v;
	minj = j;
	}
	}
	this.setMinimumIxOf(i, minj);
	j = minj; // start from the last minimum
	i += 2;
	}
	}
	// REDUCE is used if the matrix is wide.
	// after this procedure, the matrix becomes much narrower. it might
	// become square but often it becomes very narrow.
	fn reduce(this: *Go) std.mem.Allocator.Error!void {
	const width = this.virtualWidth();
	const height = this.virtualHeight();
	// REDUCE being used as an entry point, we must check the base case.
	if (width <= height or height <= 1) {
	return this.interpolate();
	}
	// implicit stack: cols, t.
	// cols stores the virtual columns
	const cols = try this.alloc.alloc(usize, height);
	defer this.alloc.free(cols);
	var t: usize = 0; // stack size
	// real iteration
	var k: usize = 0;
	while (k < width) : (k += 1) {
	while (t > 0 and this.value(t - 1, cols[t - 1]) > this.value(t - 1, k)) {
	t -= 1;
	}
	if (t < height) {
	cols[t] = k;
	t += 1;
	}
	}
	// map to real indices.
	// cols now stores the real column indices.
	var i: usize = 0;
	while (i < t) : (i += 1) {
	cols[i] = this.virtualToRealColumn(cols[i]);
	}
	var go2 = this.*;
	go2.columns = cols[0..t];
	try go2.interpolate();
	}
	};

	test "interpolate 1 (smawk)" {
	const allocator = std.testing.allocator;

	const M = [5][5]i32{
	.{ 12, 21, 38, 76, 27 },
	.{ 74, 14, 14, 29, 60 },
	.{ 21, 8, 25, 10, 71 },
	.{ 68, 45, 29, 15, 76 },
	.{ 97, 8, 12, 2, 6 },
	};
	const Go2 = struct {
	fn value(i: usize, j: usize) i32 {
	return M[i][j];
	}
	};
	const minima = try smawkRowMinima(allocator, 5, 5, Go2.value);
	defer allocator.free(minima);

	try std.testing.expectEqual(0, minima[0]);
	try std.testing.expectEqual(1, minima[1]);
	try std.testing.expectEqual(1, minima[2]);
	try std.testing.expectEqual(3, minima[3]);
	try std.testing.expectEqual(3, minima[4]);
	}

	test "interpolate 2 (smawk)" {
	const allocator = std.testing.allocator;

	const M = [9][18]i32{
	.{ 25, 21, 13, 10, 20, 13, 19, 35, 37, 41, 58, 66, 82, 99, 124, 133, 156, 178 },
	.{ 42, 35, 26, 20, 29, 21, 25, 37, 36, 39, 56, 64, 76, 91, 116, 125, 146, 164 },
	.{ 57, 48, 35, 28, 33, 24, 28, 40, 37, 37, 54, 61, 72, 83, 107, 113, 131, 146 },
	.{ 78, 65, 51, 42, 44, 35, 38, 48, 42, 42, 55, 61, 70, 80, 100, 106, 120, 135 },
	.{ 90, 76, 58, 48, 49, 39, 42, 48, 39, 35, 47, 51, 56, 63, 80, 86, 97, 110 },
	.{ 103, 85, 67, 56, 55, 44, 44, 49, 39, 33, 41, 44, 49, 56, 71, 75, 84, 96 },
	.{ 123, 105, 86, 75, 73, 59, 57, 62, 51, 44, 50, 52, 55, 59, 72, 74, 80, 92 },
	.{ 142, 123, 100, 86, 82, 65, 61, 62, 50, 43, 47, 45, 46, 46, 58, 59, 65, 73 },
	.{ 151, 130, 104, 88, 80, 59, 52, 49, 37, 29, 29, 24, 23, 20, 28, 25, 31, 39 },
	};
	const Go2 = struct {
	fn value(i: usize, j: usize) i32 {
	return M[i][j];
	}
	};
	const minima = try smawkRowMinima(allocator, 18, 9, Go2.value);
	try std.testing.expectEqual(9, minima.len);
	defer allocator.free(minima);
	const expect = [_]usize{ 3, 3, 5, 5, 9, 9, 9, 9, 13 };
	var i: usize = 0;
	while (i < minima.len) : (i += 1) {
	try std.testing.expectEqual(expect[i], minima[i]);
	}
	}

	// find the row minima of any matrix by brute force
	// only for testing.
	fn naive_rowmins(
	alloc: std.mem.Allocator,
	width: usize,
	height: usize,
	value: fn (usize, usize) i32,
	) ![]usize {
	if (width <= 0 or height <= 0) {
	return try alloc.alloc(usize, 0);
	}
	const minima = try alloc.alloc(usize, height);
	var i: usize = 0;
	while (i < height) : (i += 1) {
	var j: usize = 0;
	var minj: usize = 0;
	var minv = value(i, 0);
	while (j + 1 < width) : (j += 1) {
	const v = value(i, j + 1);
	if (v < minv) {
	minv = v;
	minj = j + 1;
	}
	}
	minima[i] = minj;
	}
	return minima;
	}

	// A randomized Monge generator composed from the lemma
	// (from "Advanced Dynamic Programming"):
	// - (a) row-constant component A[i,j] += r[i]
	// - (b) column-constant component A[i,j] += c[j]
	// - (c) upper-right rectangle of 1s (with weight) added
	// - (d) lower-left rectangle of 1s (with weight) added
	// - (e) positive multiple (mult >= 1)
	// - (f) sum (we sum all chosen components)
	// - (g) transpose (by swapping i/j in evaluation)
	//
	// We evaluate on-the-fly (no full matrix allocation).

	const MongeCase = struct {
	// final matrix dimensions
	width: usize,
	height: usize,

	// base dimensions (if transposed, base dims are swapped)
	base_width: usize,
	base_height: usize,

	// components
	row_vals: []i32, // length base_height, or empty slice if unused
	col_vals: []i32, // length base_width, or empty slice if unused

	// upper-right rectangle in base coords: rows [0 .. ur_top-1], cols [base_width-ur_right .. base_width-1]
	has_ur: bool,
	ur_top: usize,
	ur_right: usize,
	ur_weight: i32,

	// lower-left rectangle in base coords: rows [base_height-ll_bottom .. base_height-1], cols [0 .. ll_left-1]
	has_ll: bool,
	ll_bottom: usize,
	ll_left: usize,
	ll_weight: i32,

	// positive scaling
	mult: i32,

	// whether final matrix is the transpose of the base expression
	transposed: bool,
	};

	// Global pointer to the current case for the value function.
	var g_case: ?*const MongeCase = null;

	// Evaluate MongeCase at (i, j) in final matrix coords.
	fn monge_value(i: usize, j: usize) i32 {
	const c = g_case orelse std.debug.panic("monge_value: g_case not set", .{});

	// Convert to base coords depending on transposition.
	const bi: usize = if (c.transposed) j else i;
	const bj: usize = if (c.transposed) i else j;

	var acc: i32 = 0;

	if (c.row_vals.len != 0) {
	acc += c.row_vals[bi];
	}
	if (c.col_vals.len != 0) {
	acc += c.col_vals[bj];
	}
	if (c.has_ur) {
	if (bi < c.ur_top and bj >= c.base_width - c.ur_right) {
	acc += c.ur_weight;
	}
	}
	if (c.has_ll) {
	if (bi >= c.base_height - c.ll_bottom and bj < c.ll_left) {
	acc += c.ll_weight;
	}
	}

	// Positive multiple
	return acc * c.mult;
	}

	// Helpers for RNG
	fn rnd_between(r: *std.Random, lo: i32, hi_inclusive: i32) i32 {
	const span_u: u32 = @intCast(hi_inclusive - lo + 1);
	const v: u32 = r.int(u32) % span_u;
	return @as(i32, @intCast(v)) + lo;
	}

	fn rnd_bool(r: *std.Random) bool {
	return (r.int(u1) & 1) == 1;
	}

	test "interpolate randomized monge (fixed-seed)" {
	const allocator = std.testing.allocator;
	const rand = std.Random;
	var prng = rand.DefaultPrng.init(0xD00DFEEDCAFEBABE);
	var rng = prng.random();

	const trials: usize = 200;
	var t: usize = 0;
	while (t < trials) : (t += 1) {
	// Final dims
	const width: usize = @intCast(rnd_between(&rng, 1, 25));
	const height: usize = @intCast(rnd_between(&rng, 1, 25));

	// Randomly decide if we apply transposition to the base expression
	const transposed = rnd_bool(&rng);

	const base_width: usize = if (transposed) height else width;
	const base_height: usize = if (transposed) width else height;

	// Build components
	// Decide to include row-const and/or col-const components
	const use_row = rnd_bool(&rng);
	const use_col = rnd_bool(&rng);

	var row_vals: []i32 = &[_]i32{};
	var col_vals: []i32 = &[_]i32{};

	if (use_row) {
	row_vals = try allocator.alloc(i32, base_height);
	var i: usize = 0;
	while (i < base_height) : (i += 1) {
	// small values to avoid overflow
	row_vals[@intCast(i)] = rnd_between(&rng, -3, 7);
	}
	}

	if (use_col) {
	col_vals = try allocator.alloc(i32, base_width);
	var j: usize = 0;
	while (j < base_width) : (j += 1) {
	col_vals[@intCast(j)] = rnd_between(&rng, -3, 7);
	}
	}

	// Upper-right rectangle
	const has_ur = rnd_bool(&rng);
	var ur_top: usize = 0;
	var ur_right: usize = 0;
	var ur_weight: i32 = 0;
	if (has_ur) {
	ur_top = @intCast(rnd_between(&rng, 1, @intCast(base_height)));
	ur_right = @intCast(rnd_between(&rng, 1, @intCast(base_width)));
	ur_weight = @intCast(rnd_between(&rng, 0, 5)); // nonnegative
	}

	// Lower-left rectangle
	const has_ll = rnd_bool(&rng);
	var ll_bottom: usize = 0;
	var ll_left: usize = 0;
	var ll_weight: i32 = 0;
	if (has_ll) {
	ll_bottom = @intCast(rnd_between(&rng, 1, @intCast(base_height)));
	ll_left = @intCast(rnd_between(&rng, 1, @intCast(base_width)));
	ll_weight = rnd_between(&rng, 0, 5); // nonnegative
	}

	// positive multiple
	const mult: i32 = rnd_between(&rng, 1, 5);

	var case = MongeCase{
	.width = width,
	.height = height,
	.base_width = base_width,
	.base_height = base_height,
	.row_vals = row_vals,
	.col_vals = col_vals,
	.has_ur = has_ur,
	.ur_top = ur_top,
	.ur_right = ur_right,
	.ur_weight = ur_weight,
	.has_ll = has_ll,
	.ll_bottom = ll_bottom,
	.ll_left = ll_left,
	.ll_weight = ll_weight,
	.mult = mult,
	.transposed = transposed,
	};

	// Run both algorithms on this case
	g_case = &case;

	const minima_interp = try smawkRowMinima(allocator, width, height, monge_value);
	defer allocator.free(minima_interp);

	const minima_naive = try naive_rowmins(allocator, width, height, monge_value);
	defer allocator.free(minima_naive);

	// Check equality
	var i: usize = 0;
	while (i < height) : (i += 1) {
	try std.testing.expectEqual(
	minima_naive[i],
	minima_interp[i],
	);
	}

	// Check monotonicity of row minima (nondecreasing)
	if (height > 1) {
	var r: usize = 1;
	while (r < height) : (r += 1) {
	try std.testing.expect(
	minima_interp[@intCast(r - 1)] <= minima_interp[r],
	);
	}
	}

	// cleanup
	if (row_vals.len != 0) allocator.free(row_vals);
	if (col_vals.len != 0) allocator.free(col_vals);
	g_case = null;
	}
	}