Skip to content

Instantly share code, notes, and snippets.

@lithdew
Created June 26, 2022 09:26
Show Gist options
  • Select an option

  • Save lithdew/c6e96c3819747a85b9a7284afeb67cf4 to your computer and use it in GitHub Desktop.

Select an option

Save lithdew/c6e96c3819747a85b9a7284afeb67cf4 to your computer and use it in GitHub Desktop.

Revisions

  1. lithdew created this gist Jun 26, 2022.
    385 changes: 385 additions & 0 deletions sparse.zig
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,385 @@
    const std = @import("std");

    const sparse = @This();

    /// This is an implementation of a paged sparse set that stores the payload in
    /// the sparse array.
    //
    /// A sparse set has a dense and a sparse array. The sparse array is directly
    /// indexed by a 64 bit identifier. The sparse element is linked with a dense
    /// element, which allows for liveliness checking. The liveliness check itself
    /// can be performed by doing (psuedo code):
    /// dense[sparse[sparse_id].dense] == sparse_id
    //
    /// To ensure that the sparse array doesn't have to grow to a large size when
    /// using large sparse_id's, the sparse set uses paging. This cuts up the array
    /// into several pages of 4096 elements. When an element is set, the sparse set
    /// ensures that the corresponding page is created. The page associated with an
    /// id is determined by shifting a bit 12 bits to the right.
    //
    /// The sparse set keeps track of a generation count per id, which is increased
    /// each time an id is deleted. The generation is encoded in the returned id.
    //
    /// This sparse set implementation stores payload in the sparse array, which is
    /// not typical. The reason for this is to guarantee that (in combination with
    /// paging) the returned payload pointers are stable. This allows for various
    /// optimizations in the parts of the framework that uses the sparse set.
    //
    /// The sparse set has been designed so that new ids can be generated in bulk, in
    /// an O(1) operation. The way this works is that once a dense-sparse pair is
    /// created, it is never unpaired. Instead it is moved to the end of the dense
    /// array.
    pub fn Set(comptime T: type) type {
    return struct {
    /// A sparse page comprised of 4096 elements. Indices into the dense array
    /// of handles within the set are 1-indexed, as the 0 index indicates a
    /// null index.
    pub const Page = struct {
    indices: [4096]u32 = [_]u32{0} ** 4096,
    items: [4096]T = undefined,
    };

    const Self = @This();

    /// Dense array of handles.
    handles: [*]Handle = undefined,
    num_handles: u32 = 0,
    num_handles_available: u32 = 0,

    /// Sparse array of pages.
    sparse_pages: [*]?*Page = undefined,
    num_sparse_pages: u32 = 0,

    /// Number of elements stored in the set.
    len: u32 = 0,

    /// Deinitialize and free all memory of the set.
    pub fn deinit(self: *Self, gpa: std.mem.Allocator) void {
    gpa.free(self.handles[0..self.num_handles_available]);
    for (self.sparse_pages[0..self.num_sparse_pages]) |page| {
    gpa.destroy(page orelse continue);
    }
    gpa.free(self.sparse_pages[0..self.num_sparse_pages]);
    }

    /// Invalidates all handles in the set while retaining all allocated
    /// memory used to store handles and sparse pages in the set. All
    /// invalidated handles can be recycled through recycle().
    pub fn clearRetainingCapacity(self: *Self) void {
    self.len = 0;
    }

    /// Check whether or not the given handle is alive and associated with
    /// an item T.
    pub fn contains(self: *Self, handle: Handle) bool {
    return self.getPtr(handle) != null;
    }

    /// Gets an item T associated with the given handle if the handle is still
    /// considered to be alive. Returns null otherwise.
    pub fn get(self: *Self, handle: Handle) ?T {
    const ptr = self.getPtr(handle) orelse return null;
    return ptr.*;
    }

    /// Gets a pointer to the item T associated with the given handle if the handle is still
    /// considered to be alive. Returns null otherwise.
    pub fn getPtr(self: *Self, handle: Handle) ?*const T {
    const page_index = handle.getPageIndex();
    if (page_index >= self.num_sparse_pages) {
    return null;
    }
    const page = self.sparse_pages[page_index] orelse return null;
    if (page.indices[handle.getPageOffset()] == 0 or page.indices[handle.getPageOffset()] - 1 >= self.len) {
    return null;
    }
    if (self.handles[page.indices[handle.getPageOffset()] - 1].data.managed.generation != handle.data.managed.generation) {
    return null;
    }
    return &page.items[handle.getPageOffset()];
    }

    /// Provides handles that may be reused for the insertion of new items into the set. A
    /// handle is considered to be reusable if it was previously deleted from the set.
    pub fn recycle(self: *Self) ?Handle {
    if (self.len >= self.num_handles) {
    return null;
    }
    return self.handles[self.len];
    }

    /// Associates an item T to the given handle in the set. Panics if the handle is
    /// no longer alive, which is determined based on the generation count encoded in
    /// the handle, and the generation count that is tracked for the handle in the
    /// set.
    pub fn put(self: *Self, gpa: std.mem.Allocator, handle: Handle, item: T) !void {
    const page_index = handle.getPageIndex();
    const page_offset = handle.getPageOffset();

    try self.ensureSparsePageIndexAvailable(gpa, page_index);

    const page = self.sparse_pages[page_index] orelse page: {
    const page = try gpa.create(Page);
    page.* = .{};
    self.sparse_pages[page_index] = page;
    break :page page;
    };

    if (page.indices[page_offset] != 0) {
    if (page.indices[page_offset] - 1 == self.len) {
    self.len += 1;
    } else if (page.indices[page_offset] - 1 > self.len) {
    const unused_handle = self.handles[self.len];
    const unused_page = self.sparse_pages[unused_handle.getPageIndex()] orelse unreachable;

    self.handles[self.len] = self.handles[page.indices[page_offset] - 1];
    self.handles[page.indices[page_offset] - 1] = unused_handle;

    unused_page.indices[unused_handle.getPageOffset()] = page.indices[page_offset];
    page.indices[page_offset] = self.len + 1;

    self.len += 1;
    }

    if (self.handles[page.indices[page_offset] - 1].data.managed.generation != handle.data.managed.generation) {
    std.debug.panic("BUG: Expected generation {d} when inserting ID {d} with item {any}, but got generation {d}.", .{
    self.handles[page.indices[page_offset] - 1].data.managed.generation,
    handle.id,
    item,
    handle.data.managed.generation,
    });
    }
    } else {
    try self.ensureNumHandlesAvailable(gpa, self.num_handles + 1);

    if (self.len < self.num_handles) {
    const unused_handle = self.handles[self.len];
    const unused_page = self.sparse_pages[unused_handle.getPageIndex()] orelse unreachable;

    self.handles[self.num_handles] = unused_handle;
    unused_page.indices[unused_handle.getPageOffset()] = self.num_handles + 1;
    }

    self.handles[self.len] = handle;
    page.indices[page_offset] = self.len + 1;

    self.num_handles += 1;
    self.len += 1;
    }

    page.items[page_offset] = item;
    }

    /// Ensures that enough memory is allocated to store the given number of handles.
    fn ensureNumHandlesAvailable(self: *Self, gpa: std.mem.Allocator, num_handles: u32) !void {
    var better_capacity = self.num_handles_available;
    if (better_capacity >= num_handles) {
    return;
    }
    while (true) {
    better_capacity += better_capacity / 2 + 8;
    if (better_capacity >= num_handles) {
    break;
    }
    }
    const handles = try gpa.reallocAtLeast(self.handles[0..self.num_handles_available], better_capacity);
    self.handles = handles.ptr;
    self.num_handles_available = @intCast(u32, handles.len);
    }

    /// Ensures that the sparse page at the given index is available.
    fn ensureSparsePageIndexAvailable(self: *Self, gpa: std.mem.Allocator, page_index: u32) !void {
    if (page_index + 1 < self.num_sparse_pages) {
    return;
    }
    const sparse_pages = try gpa.reallocAtLeast(self.sparse_pages[0..self.num_sparse_pages], page_index + 1);
    for (sparse_pages[self.num_sparse_pages..]) |*page| {
    page.* = null;
    }
    self.sparse_pages = sparse_pages.ptr;
    self.num_sparse_pages = @intCast(u32, sparse_pages.len);
    }

    /// Remove an item associated with the given handle from the set. Returns true
    /// if the item was successfully removed.
    pub fn remove(self: *Self, handle: Handle) bool {
    return self.fetchRemove(handle) != null;
    }

    /// Fetch an item T by its handle and remove it from the set. Returns
    /// null if the handle is no longer alive.
    pub fn fetchRemove(self: *Self, handle: Handle) ?T {
    if (self.len == 0) {
    return null;
    }
    const page_index = handle.getPageIndex();
    const page_offset = handle.getPageOffset();
    const page = self.sparse_pages[page_index] orelse return null;
    if (page.indices[page_offset] == 0) {
    return null;
    }
    if (self.handles[page.indices[page_offset] - 1].data.managed.generation != handle.data.managed.generation) {
    return null;
    }
    self.handles[page.indices[page_offset] - 1].data.managed.generation +%= 1;
    if (page.indices[page_offset] - 1 == self.len - 1) {
    self.len -= 1;
    } else if (page.indices[page_offset] - 1 < self.len - 1) {
    const used_handle = self.handles[self.len - 1];
    const used_page = self.sparse_pages[used_handle.getPageIndex()] orelse unreachable;

    self.handles[self.len - 1] = self.handles[page.indices[page_offset] - 1];
    self.handles[page.indices[page_offset] - 1] = used_handle;

    used_page.indices[used_handle.getPageOffset()] = page.indices[page_offset];
    page.indices[page_offset] = (self.len - 1) + 1;

    self.len -= 1;
    } else {
    return null;
    }
    return page.items[page_offset];
    }

    /// Manually override the generation count of a handle stored in the set.
    pub fn update(self: *Self, handle: Handle) void {
    const page_index = handle.getPageIndex();
    const page_offset = handle.getPageOffset();
    const page = self.sparse_pages[page_index] orelse return;
    if (page.indices[page_offset] == 0) {
    return;
    }
    self.handles[page.indices[page_offset] - 1].data.managed.generation = handle.data.managed.generation;
    }
    };
    }

    /// A 64-bit handle which may be associated with data in a sparse set. The
    /// lower 32 bits of a handle may be used to store miscellaneous metadata
    /// such as the generation count of the handle itself.
    pub const Handle = packed struct {
    id: u32,
    data: packed union {
    unmanaged: u32,
    managed: packed struct {
    generation: u16 = 0,
    unmanaged: u16 = 0,
    },
    },

    pub inline fn init(id: u32) Handle {
    return .{ .id = id, .data = .{ .managed = .{} } };
    }

    pub inline fn initWithData(id: u32, data: u32) Handle {
    return .{ .id = id, .data = .{ .unmanaged = data } };
    }

    pub inline fn initWithGeneration(id: u32, generation: u16) Handle {
    return .{ .id = id, .data = .{ .managed = .{ .generation = generation } } };
    }

    pub inline fn getPageIndex(self: Handle) u20 {
    return @intCast(u20, self.id >> 12);
    }

    pub inline fn getPageOffset(self: Handle) u12 {
    return @intCast(u12, self.id & 0xfff);
    }

    pub inline fn getGeneration(self: Handle) u16 {
    return self.data.managed.generation;
    }

    pub inline fn equals(self: Handle, other: Handle) bool {
    return self.id == other.id and self.data.unmanaged == other.data.unmanaged;
    }

    comptime {
    if (@bitSizeOf(Handle) != @bitSizeOf(u64)) {
    @compileError("Expected handles to be 64 bits.");
    }
    }
    };

    test "sparse.Set: strings example" {
    var set: sparse.Set([]const u8) = .{};
    defer set.deinit(std.testing.allocator);

    const Strings = struct {
    set: *sparse.Set([]const u8),
    id: u32 = 0,

    pub fn create(self: *@This()) sparse.Handle {
    if (self.set.recycle()) |handle| {
    return handle;
    }
    const handle = Handle.init(self.id);
    self.id +%= 1;
    return handle;
    }

    pub fn append(self: *@This(), gpa: std.mem.Allocator, item: []const u8) !Handle {
    const handle = self.create();
    try self.set.put(gpa, handle, item);
    return handle;
    }
    };

    var strings: Strings = .{ .set = &set };

    var handle_a = strings.create();
    try std.testing.expect(sparse.Handle.init(0).equals(handle_a));

    var handle_b = strings.create();
    try std.testing.expect(sparse.Handle.init(1).equals(handle_b));

    try set.put(std.testing.allocator, handle_a, "xxx");
    try set.put(std.testing.allocator, handle_b, "yyy");
    try std.testing.expect(set.contains(handle_a));
    try std.testing.expect(set.contains(handle_b));
    try std.testing.expectEqual(@as(u32, 2), set.len);

    try std.testing.expectEqualStrings("xxx", set.get(handle_a) orelse unreachable);
    try std.testing.expectEqualStrings("yyy", set.get(handle_b) orelse unreachable);
    try std.testing.expectEqualStrings("yyy", set.fetchRemove(handle_b) orelse unreachable);
    try std.testing.expectEqualStrings("xxx", set.fetchRemove(handle_a) orelse unreachable);
    try std.testing.expect(!set.contains(handle_a));
    try std.testing.expect(!set.contains(handle_b));
    try std.testing.expectEqual(@as(u32, 0), set.len);

    try std.testing.expectEqual(@as(?[]const u8, null), set.get(handle_a));
    try std.testing.expectEqual(@as(?[]const u8, null), set.get(handle_b));
    try std.testing.expectEqual(@as(?[]const u8, null), set.fetchRemove(handle_a));
    try std.testing.expectEqual(@as(?[]const u8, null), set.fetchRemove(handle_b));
    try std.testing.expect(!set.contains(handle_a));
    try std.testing.expect(!set.contains(handle_b));
    try std.testing.expectEqual(@as(u32, 0), set.len);

    set.clearRetainingCapacity();

    handle_a = strings.create();
    try std.testing.expect(sparse.Handle.initWithGeneration(0, 1).equals(handle_a));
    try set.put(std.testing.allocator, handle_a, "xxx");
    try std.testing.expect(set.contains(handle_a));
    try std.testing.expect(!set.contains(handle_b));

    set.update(sparse.Handle.initWithGeneration(1, 999));

    handle_b = strings.create();
    try std.testing.expect(sparse.Handle.initWithGeneration(1, 999).equals(handle_b));
    try set.put(std.testing.allocator, handle_b, "yyy");
    try std.testing.expect(set.contains(handle_a));
    try std.testing.expect(set.contains(handle_b));

    try std.testing.expect(set.recycle() == null);
    try std.testing.expectEqual(@as(u32, 2), set.len);

    try std.testing.expectEqualStrings("xxx", set.get(handle_a) orelse unreachable);
    try std.testing.expectEqualStrings("yyy", set.get(handle_b) orelse unreachable);
    try std.testing.expectEqualStrings("xxx", set.fetchRemove(handle_a) orelse unreachable);
    try std.testing.expectEqualStrings("yyy", set.fetchRemove(handle_b) orelse unreachable);
    try std.testing.expect(!set.contains(handle_a));
    try std.testing.expect(!set.contains(handle_b));
    try std.testing.expectEqual(@as(u32, 0), set.len);
    }