Last active
December 19, 2023 05:27
-
-
Save RealNeGate/8077c99a2e7ad4ba3cd6d02e131de218 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // This is a PoC for incremental compilation for Zig, the idea is that we | |
| // can break down all of type checking into a simple term rewriting system | |
| // which has one step Church-Rosser. | |
| // | |
| // Church-Rosser (i'll shorten to CR): | |
| // CR states that given our reductions move towards their normal form and eventually | |
| // reach it (thus terminating), the order of reduction is irrelevant, more importantly | |
| // we can say that if we reduce to our normal form in one step then our total reductions | |
| // have linear time complexity. | |
| // | |
| // Each term maps to some chunk of ZIR (decl or certain expressions) and the reduction | |
| // is the type checking step, for the purposes of this "granularity", the terms are | |
| // pure and their results only dependent on it's direct inputs (necessary for our CR) | |
| // | |
| // Time complexity: | |
| // At most a term is visited twice, once where it's not ready (inputs are dirty) and once | |
| // where it's ready so worse case performance is O(2n) where n is the number of possible | |
| // affected nodes from a rewrite. Of course, Big O isn't performance so to talk perf, each | |
| // node is only type checked once in that setup and that's the actually expensive piece, the | |
| // fact that we skim over them without doing must might cost some cache which is annoying but | |
| // not necessarily much actual time. | |
| // | |
| const std = @import("std"); | |
| const print = std.debug.print; | |
| const assert = std.debug.assert; | |
| const Allocator = std.mem.Allocator; | |
| const Worklist = struct { | |
| visited: std.DynamicBitSet, | |
| items: std.ArrayList(*Term), | |
| pub fn alloc(allocator: Allocator) !Worklist { | |
| return .{ | |
| .visited = try std.DynamicBitSet.initEmpty(allocator, 1024), | |
| .items = try std.ArrayList(*Term).initCapacity(allocator, 1024) | |
| }; | |
| } | |
| pub fn push(self: *Worklist, t: *Term) !void { | |
| if (self.visited.isSet(t.id)) { | |
| return; | |
| } | |
| self.visited.set(t.id); | |
| try self.items.append(t); | |
| } | |
| pub fn pop(self: *Worklist) ?*Term { | |
| return self.popWithBase(0); | |
| } | |
| // if it reaches base, it'll stop | |
| pub fn popWithBase(self: *Worklist, base: usize) ?*Term { | |
| if (self.items.items.len == base) { | |
| return null; | |
| } else { | |
| var t = self.items.items[self.items.items.len - 1]; | |
| self.items.items.len -= 1; | |
| self.visited.unset(t.id); | |
| return t; | |
| } | |
| } | |
| pub fn count(self: *Worklist) usize { | |
| return self.items.items.len; | |
| } | |
| }; | |
| // 0 is used for the magic Term in the sky first_compile | |
| var id_counter: u32 = 1; | |
| const User = struct { | |
| // we might wanna store a tag with the user, to | |
| // denote different kinds of dependencies. | |
| t: *Term, | |
| index: u32, | |
| }; | |
| const Term = struct { | |
| id: u32, | |
| // indirectly dirty inputs, once this reaches | |
| // 0 the term can be processed. | |
| dirty_ins: u16 = 0, | |
| // direct changes hold a reference to their | |
| // previous form, if there is none we can assume | |
| // the node is either new or hasn't changed. | |
| old: ?*Term = null, | |
| ins: []*Term, | |
| users: std.ArrayList(User), | |
| // insert term-specific data here i guess | |
| // ... | |
| pub fn alloc(allocator: Allocator) !*Term { | |
| return try allocWithInputs(allocator, &[_]*Term{}); | |
| } | |
| pub fn allocWithInputs(allocator: Allocator, ins: []*Term) !*Term { | |
| var t = try allocator.create(Term); | |
| t.* = Term{ | |
| // give each node a unique ID | |
| .id = id_counter, | |
| .ins = try allocator.alloc(*Term, ins.len), | |
| .users = try std.ArrayList(User).initCapacity(allocator, 4) | |
| }; | |
| id_counter += 1; | |
| // add our lovely inputs | |
| std.mem.copy(*Term, t.ins, ins); | |
| // fill user lists | |
| for (ins, 0..) |in, i| { | |
| print(" set node {}'s slot {} with {}\n", .{ t.id, i, in.id }); | |
| try in.users.append(User{ .t = t, .index = @truncate(u32, i) }); | |
| } | |
| return t; | |
| } | |
| pub fn set_in(t: *Term, in: *Term, i: usize) !void { | |
| // remove old user | |
| var old = t.ins[i]; | |
| for (old.users.items, 0..) |u, user_i| { | |
| if (u.index == i and u.t == t) { | |
| _ = old.users.swapRemove(user_i); | |
| break; | |
| } | |
| } | |
| // add new user | |
| print(" set node {}'s slot {} with {}\n", .{ t.id, i, in.id }); | |
| try in.users.append(User{ .t = t, .index = @truncate(u32, i) }); | |
| t.ins[i] = in; | |
| } | |
| // converts a term into it's "normal form" (type checked | |
| // for the new IR), this function is called once for any | |
| // node because of the one step Church-Rosser property we | |
| // go over above. | |
| // | |
| // returns true if it makes changes which affect other terms. | |
| pub fn progress(t: *Term) bool { | |
| // this isn't necessary, just making an example where | |
| // changes propagate only if they're from leaf nodes. | |
| // | |
| // an example of a realistic return is that changing a | |
| // non-comptime function body will not affect the deps. | |
| // only changing it's prototype. | |
| return t.ins.len == 0; | |
| } | |
| }; | |
| // we start with only the changed nodes on the worklist | |
| pub fn type_check(ws: *Worklist) !void { | |
| //////////////////////////////// | |
| // Pass 1: mark indirect dirty | |
| //////////////////////////////// | |
| var initial = ws.count(); | |
| // i don't wanna allocate two worklists so i'll just do the work on the | |
| // same worklist :p | |
| var n: usize = 0; | |
| for (0..initial) |i| { | |
| var root_change = ws.items.items[i]; | |
| try ws.items.append(root_change); | |
| while (ws.popWithBase(initial)) |t| { | |
| n += 1; | |
| for (t.users.items) |u| { | |
| // user is now a lil dirty | |
| if (u.t.old == null) { | |
| u.t.dirty_ins += 1; | |
| } | |
| try ws.push(u.t); | |
| } | |
| } | |
| // we're back to normal, set the visited flag on this node | |
| assert(ws.count() == initial); | |
| ws.visited.set(root_change.id); | |
| } | |
| //////////////////////////////// | |
| // Pass 2: actually type check | |
| //////////////////////////////// | |
| var delays: usize = 0; | |
| var progress: usize = 0; | |
| while (ws.pop()) |t| { | |
| print(" walk node {}?\n", .{t.id}); | |
| if (t.dirty_ins != 0) { | |
| print(" * delay node {}?\n", .{t.id}); | |
| delays += 1; | |
| continue; | |
| } | |
| print(" * progress on node {}\n", .{t.id}); | |
| // dirty -> clean | |
| var fwd = t.progress(); | |
| for (t.users.items) |u| { | |
| assert(u.t.dirty_ins > 0); | |
| u.t.dirty_ins -= 1; | |
| } | |
| if (fwd) { | |
| // push any users which are ready to process now | |
| for (t.users.items) |u| { | |
| if (u.t.dirty_ins == 0) { | |
| try ws.push(u.t); | |
| } | |
| } | |
| } | |
| progress += 1; | |
| } | |
| print("\n SUMMARY:\n", .{}); | |
| print(" {} possible reductions\n", .{ n }); | |
| print(" {} max bound\n", .{ n*2 }); | |
| print(" {} delays + {} typed => O({})\n\n\n", .{ delays, progress, delays + progress }); | |
| } | |
| pub fn main() !void { | |
| var gpa = std.heap.GeneralPurposeAllocator(.{}){}; | |
| const allocator = gpa.allocator(); | |
| var ws = try Worklist.alloc(allocator); | |
| // each time we add a node, we need to push it to | |
| // the worklist, once we've got something in the cache | |
| // we can avoid this process for some nodes. | |
| print("=== parse ===\n", .{}); | |
| // abcd | |
| // |||| | |
| // plus e | |
| // | / | |
| // jean | |
| var a = try Term.alloc(allocator); | |
| var b = try Term.alloc(allocator); | |
| var c = try Term.alloc(allocator); | |
| var d = try Term.alloc(allocator); | |
| var plus = try Term.allocWithInputs(allocator, &[_]*Term{ a, b, c, d }); | |
| var e = try Term.alloc(allocator); | |
| var jean = try Term.allocWithInputs(allocator, &[_]*Term{ plus, e }); | |
| try ws.push(a); | |
| try ws.push(b); | |
| try ws.push(c); | |
| try ws.push(d); | |
| try ws.push(plus); | |
| try ws.push(e); | |
| try ws.push(jean); | |
| print("=== type check ===\n", .{}); | |
| try type_check(&ws); | |
| print("=== parse 2 ===\n", .{}); | |
| var f = try Term.alloc(allocator); | |
| try ws.push(f); | |
| try plus.set_in(f, 1); | |
| var g = try Term.alloc(allocator); | |
| try ws.push(g); | |
| try plus.set_in(g, 3); | |
| try ws.push(plus); | |
| print("=== type check 2 ===\n", .{}); | |
| try type_check(&ws); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment