Last active
December 19, 2023 05:27
-
-
Save RealNeGate/8077c99a2e7ad4ba3cd6d02e131de218 to your computer and use it in GitHub Desktop.
Revisions
-
RealNeGate revised this gist
Dec 19, 2023 . 1 changed file with 4 additions and 7 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -14,13 +14,10 @@ // necessary for our CR. // // Time complexity: // Once a node is evaluated it'll be done "forever" so the maximum number of // evaluation steps scales linearly with the number of nodes, really with the // number of rewritten nodes (which is whatever is affected by the changes directly // or indirectly) // const std = @import("std"); const print = std.debug.print; -
RealNeGate revised this gist
Nov 27, 2023 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -5,7 +5,7 @@ // Church-Rosser (i'll shorten to CR): // CR states that given our reductions terminate, the order of reduction is irrelevant, more // importantly we can say that if we reduce to our normal form in one step per term then our // total reductions are linear with the size of the IR. // // Term? // Each term will map to some chunk of ZIR (decl or certain expressions) that has an associated -
RealNeGate revised this gist
Nov 27, 2023 . 1 changed file with 3 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,10 +3,9 @@ // which has one step Church-Rosser. // // Church-Rosser (i'll shorten to CR): // CR states that given our reductions terminate, the order of reduction is irrelevant, more // importantly we can say that if we reduce to our normal form in one step per term then our // total reductions have linear time complexity. // // Term? // Each term will map to some chunk of ZIR (decl or certain expressions) that has an associated -
RealNeGate revised this gist
Oct 25, 2023 . 1 changed file with 2 additions and 7 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -73,18 +73,13 @@ const Worklist = struct { // 0 is used for the magic Term in the sky first_compile var id_counter: u32 = 1; // these are currently useless but they'll gain purpose soon :p const TermTag = enum { // this term merely extracts a value from a parent term proj, decl, call, literal, }; -
RealNeGate revised this gist
Oct 25, 2023 . 1 changed file with 57 additions and 38 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -73,16 +73,31 @@ const Worklist = struct { // 0 is used for the magic Term in the sky first_compile var id_counter: u32 = 1; const TermTag = enum { // this term merely extracts a value from a parent term proj, // function declarations might change bodies without changing // ther prototypes which means only the body projection is dirtied. decl, // call, // for now we'll have two types of terms, literals and addition. literal, }; const User = struct { t: *Term, index: u32, }; const Term = struct { tag: TermTag, id: u32, needs_retype: bool = false, // indirectly dirty inputs, once this reaches // 0 the term can be processed. dirty_ins: u16 = 0, @@ -97,15 +112,16 @@ const Term = struct { // insert term-specific data here i guess // ... pub fn alloc(allocator: Allocator, tag: TermTag) !*Term { return try allocWithInputs(allocator, tag, &[_]*Term{}); } pub fn allocWithInputs(allocator: Allocator, tag: TermTag, ins: []*Term) !*Term { var t = try allocator.create(Term); t.* = Term{ // give each node a unique ID .tag = tag, .id = id_counter, .ins = try allocator.alloc(*Term, ins.len), .users = try std.ArrayList(User).initCapacity(allocator, 4) @@ -141,15 +157,23 @@ const Term = struct { t.ins[i] = in; } pub fn markUsers(t: *Term, ws: *Worklist, fwd: bool) !void { for (t.users.items) |u| { print(" * mark users {}\n", .{u.t.id}); assert(u.t.dirty_ins > 0); u.t.dirty_ins -= 1; // make sure the children are marked to retype if there's any changes above, // if possible to propagate only to clean nodes which don't actually change // in which case we still walk the graph but don't spend time retyping (the // actually expensive piece) if (fwd) { u.t.needs_retype = true; } if (u.t.dirty_ins == 0) { // push any users which are ready to process now try ws.push(u.t); } } } @@ -178,6 +202,7 @@ pub fn type_check(ws: *Worklist) !void { var n: usize = 0; for (0..initial) |i| { var root_change = ws.items.items[i]; root_change.needs_retype = true; try ws.items.append(root_change); while (ws.popWithBase(initial)) |t| { @@ -212,32 +237,25 @@ pub fn type_check(ws: *Worklist) !void { continue; } if (t.needs_retype) { print(" * progress on node {}\n", .{t.id}); // dirty -> clean var p = t.progress(); try t.markUsers(ws, p); // reset for the next time we might try to run type_check t.needs_retype = false; progress += 1; } else { print(" * cleaned but not retyped\n", .{}); } } print("\n SUMMARY:\n", .{}); print(" {:3} delays + {:3} typed => O({:3})\n", .{ delays, progress, delays + progress }); print(" {:3} max + {:3} max => O({:3})\n\n\n", .{ initial - 1, n, n + initial - 1 }); } pub fn main() !void { @@ -256,13 +274,13 @@ pub fn main() !void { // plus e // | / // jean var a = try Term.alloc(allocator, .literal); var b = try Term.alloc(allocator, .literal); var c = try Term.alloc(allocator, .literal); var d = try Term.alloc(allocator, .literal); var plus = try Term.allocWithInputs(allocator, .decl, &[_]*Term{ a, b, c, d }); var e = try Term.alloc(allocator, .literal); var jean = try Term.allocWithInputs(allocator, .decl, &[_]*Term{ plus, e }); try ws.push(a); try ws.push(b); @@ -277,14 +295,15 @@ pub fn main() !void { print("=== parse 2 ===\n", .{}); var f = try Term.alloc(allocator, .literal); try ws.push(f); try plus.set_in(f, 1); var g = try Term.alloc(allocator, .literal); try ws.push(g); try plus.set_in(g, 3); try ws.push(plus); print("=== type check 2 ===\n", .{}); try type_check(&ws); } -
RealNeGate revised this gist
Oct 23, 2023 . 1 changed file with 3 additions and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -143,6 +143,8 @@ const Term = struct { pub fn undirtyUsers(t: *Term) void { for (t.users.items) |u| { print(" * implicit clean node {}\n", .{u.t.id}); assert(u.t.dirty_ins > 0); u.t.dirty_ins -= 1; @@ -152,11 +154,6 @@ const Term = struct { } } // returns true if it makes changes which affect other terms. pub fn progress(t: *Term) bool { // this isn't necessary, just making an example where @@ -229,7 +226,7 @@ pub fn type_check(ws: *Worklist) !void { } } } else { // don't push the nodes, just propagate the cleanliness // the last dirty. t.undirtyUsers(); } -
RealNeGate revised this gist
Oct 23, 2023 . 1 changed file with 29 additions and 17 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -16,11 +16,12 @@ // // Time complexity: // At most a term is visited twice, once where it's not ready (inputs are dirty) and once // where it's ready, only some nodes can make their way onto the worklist without being ready // which are the initial worklist items, so worse case performance is O(n+k) where n is the // number of possible affected nodes from a rewrite and k is the initial worklist size. Of course, // Big O isn't performance so to talk perf, each node is only type checked once in that setup and // that's the actually expensive piece, the fact that we skim over them without doing must might // cost some cache which is annoying but not necessarily much actual time. // const std = @import("std"); const print = std.debug.print; @@ -140,6 +141,17 @@ const Term = struct { t.ins[i] = in; } pub fn undirtyUsers(t: *Term) void { for (t.users.items) |u| { assert(u.t.dirty_ins > 0); u.t.dirty_ins -= 1; if (u.t.dirty_ins == 0) { u.t.undirtyUsers(); } } } // converts a term into it's "normal form" (type checked // for the new IR), this function is called once for any // node because of the one step Church-Rosser property we @@ -198,36 +210,36 @@ pub fn type_check(ws: *Worklist) !void { print(" walk node {}?\n", .{t.id}); if (t.dirty_ins != 0) { print(" * delay node {}?\n", .{t.id}); delays += 1; continue; } print(" * progress on node {}\n", .{t.id}); // dirty -> clean if (t.progress()) { // push any users which are ready to process now for (t.users.items) |u| { assert(u.t.dirty_ins > 0); u.t.dirty_ins -= 1; if (u.t.dirty_ins == 0) { try ws.push(u.t); } } } else { // don't push the nodes, just propagate the cleanly // the last dirty. t.undirtyUsers(); } progress += 1; } print("\n SUMMARY:\n", .{}); print(" {} possible reductions\n", .{ n }); print(" {} max bound aka O(n+k-1)\n", .{ n+initial-1 }); print(" {} delays + {} typed => O({})\n\n\n", .{ delays, progress, delays + progress }); } @@ -278,4 +290,4 @@ pub fn main() !void { print("=== type check 2 ===\n", .{}); try type_check(&ws); } -
RealNeGate revised this gist
Oct 22, 2023 . 1 changed file with 5 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,9 +8,11 @@ // we can say that if we reduce to our normal form in one step then our total reductions // have linear time complexity. // // Term? // Each term will map to some chunk of ZIR (decl or certain expressions) that has an associated // hash (meaning it can be dirtied) and the "reduction" is the type checking step, for the purposes // of this IR the terms are pure and their results only dependent on it's direct inputs. This is // necessary for our CR. // // Time complexity: // At most a term is visited twice, once where it's not ready (inputs are dirty) and once -
RealNeGate revised this gist
Oct 22, 2023 . 1 changed file with 5 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,10 +2,11 @@ // can break down all of type checking into a simple term rewriting system // which has one step Church-Rosser. // // Church-Rosser (i'll shorten to CR): // CR states that given our reductions move towards their normal form and eventually // reach it (thus terminating), the order of reduction is irrelevant, more importantly // we can say that if we reduce to our normal form in one step then our total reductions // have linear time complexity. // // Each term maps to some chunk of ZIR (decl or certain expressions) and the reduction // is the type checking step, for the purposes of this "granularity", the terms are -
RealNeGate revised this gist
Oct 22, 2023 . 1 changed file with 9 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -10,6 +10,15 @@ // Each term maps to some chunk of ZIR (decl or certain expressions) and the reduction // is the type checking step, for the purposes of this "granularity", the terms are // pure and their results only dependent on it's direct inputs (necessary for our CR) // // Time complexity: // At most a term is visited twice, once where it's not ready (inputs are dirty) and once // where it's ready so worse case performance is O(2n) where n is the number of possible // affected nodes from a rewrite. Of course, Big O isn't performance so to talk perf, each // node is only type checked once in that setup and that's the actually expensive piece, the // fact that we skim over them without doing must might cost some cache which is annoying but // not necessarily much actual time. // const std = @import("std"); const print = std.debug.print; const assert = std.debug.assert; -
RealNeGate revised this gist
Oct 22, 2023 . 1 changed file with 2 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -191,10 +191,11 @@ pub fn type_check(ws: *Worklist) !void { continue; } print(" * progress on node {}\n", .{t.id}); // dirty -> clean var fwd = t.progress(); for (t.users.items) |u| { assert(u.t.dirty_ins > 0); u.t.dirty_ins -= 1; -
RealNeGate revised this gist
Oct 22, 2023 . 1 changed file with 7 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -194,12 +194,15 @@ pub fn type_check(ws: *Worklist) !void { // dirty -> clean print(" * progress on node {}\n", .{t.id}); var fwd = t.progress(); for (t.users.items) |u| { assert(u.t.dirty_ins > 0); u.t.dirty_ins -= 1; } if (fwd) { // push any users which are ready to process now for (t.users.items) |u| { if (u.t.dirty_ins == 0) { try ws.push(u.t); } -
RealNeGate created this gist
Oct 22, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,265 @@ // This is a PoC for incremental compilation for Zig, the idea is that we // can break down all of type checking into a simple term rewriting system // which has one step Church-Rosser. // // Church-Rosser (i'll shorten to CR) itself states that given our reductions move // towards their normal form and eventually reach it (thus terminating), the order // is irrelevant, more importantly we can say that if we reduce to our normal form // in one step then our total reductions have linear time complexity. // // Each term maps to some chunk of ZIR (decl or certain expressions) and the reduction // is the type checking step, for the purposes of this "granularity", the terms are // pure and their results only dependent on it's direct inputs (necessary for our CR) const std = @import("std"); const print = std.debug.print; const assert = std.debug.assert; const Allocator = std.mem.Allocator; const Worklist = struct { visited: std.DynamicBitSet, items: std.ArrayList(*Term), pub fn alloc(allocator: Allocator) !Worklist { return .{ .visited = try std.DynamicBitSet.initEmpty(allocator, 1024), .items = try std.ArrayList(*Term).initCapacity(allocator, 1024) }; } pub fn push(self: *Worklist, t: *Term) !void { if (self.visited.isSet(t.id)) { return; } self.visited.set(t.id); try self.items.append(t); } pub fn pop(self: *Worklist) ?*Term { return self.popWithBase(0); } // if it reaches base, it'll stop pub fn popWithBase(self: *Worklist, base: usize) ?*Term { if (self.items.items.len == base) { return null; } else { var t = self.items.items[self.items.items.len - 1]; self.items.items.len -= 1; self.visited.unset(t.id); return t; } } pub fn count(self: *Worklist) usize { return self.items.items.len; } }; // 0 is used for the magic Term in the sky first_compile var id_counter: u32 = 1; const User = struct { // we might wanna store a tag with the user, to // denote different kinds of dependencies. t: *Term, index: u32, }; const Term = struct { id: u32, // indirectly dirty inputs, once this reaches // 0 the term can be processed. dirty_ins: u16 = 0, // direct changes hold a reference to their // previous form, if there is none we can assume // the node is either new or hasn't changed. old: ?*Term = null, ins: []*Term, users: std.ArrayList(User), // insert term-specific data here i guess // ... pub fn alloc(allocator: Allocator) !*Term { return try allocWithInputs(allocator, &[_]*Term{}); } pub fn allocWithInputs(allocator: Allocator, ins: []*Term) !*Term { var t = try allocator.create(Term); t.* = Term{ // give each node a unique ID .id = id_counter, .ins = try allocator.alloc(*Term, ins.len), .users = try std.ArrayList(User).initCapacity(allocator, 4) }; id_counter += 1; // add our lovely inputs std.mem.copy(*Term, t.ins, ins); // fill user lists for (ins, 0..) |in, i| { print(" set node {}'s slot {} with {}\n", .{ t.id, i, in.id }); try in.users.append(User{ .t = t, .index = @truncate(u32, i) }); } return t; } pub fn set_in(t: *Term, in: *Term, i: usize) !void { // remove old user var old = t.ins[i]; for (old.users.items, 0..) |u, user_i| { if (u.index == i and u.t == t) { _ = old.users.swapRemove(user_i); break; } } // add new user print(" set node {}'s slot {} with {}\n", .{ t.id, i, in.id }); try in.users.append(User{ .t = t, .index = @truncate(u32, i) }); t.ins[i] = in; } // converts a term into it's "normal form" (type checked // for the new IR), this function is called once for any // node because of the one step Church-Rosser property we // go over above. // // returns true if it makes changes which affect other terms. pub fn progress(t: *Term) bool { // this isn't necessary, just making an example where // changes propagate only if they're from leaf nodes. // // an example of a realistic return is that changing a // non-comptime function body will not affect the deps. // only changing it's prototype. return t.ins.len == 0; } }; // we start with only the changed nodes on the worklist pub fn type_check(ws: *Worklist) !void { //////////////////////////////// // Pass 1: mark indirect dirty //////////////////////////////// var initial = ws.count(); // i don't wanna allocate two worklists so i'll just do the work on the // same worklist :p var n: usize = 0; for (0..initial) |i| { var root_change = ws.items.items[i]; try ws.items.append(root_change); while (ws.popWithBase(initial)) |t| { n += 1; for (t.users.items) |u| { // user is now a lil dirty if (u.t.old == null) { u.t.dirty_ins += 1; } try ws.push(u.t); } } // we're back to normal, set the visited flag on this node assert(ws.count() == initial); ws.visited.set(root_change.id); } //////////////////////////////// // Pass 2: actually type check //////////////////////////////// var delays: usize = 0; var progress: usize = 0; while (ws.pop()) |t| { print(" walk node {}?\n", .{t.id}); if (t.dirty_ins != 0) { print(" * delay node {}?\n", .{t.id}); delays += 1; continue; } // dirty -> clean print(" * progress on node {}\n", .{t.id}); if (t.progress()) { // push any users which are ready to process now for (t.users.items) |u| { assert(u.t.dirty_ins > 0); u.t.dirty_ins -= 1; if (u.t.dirty_ins == 0) { try ws.push(u.t); } } } progress += 1; } print("\n SUMMARY:\n", .{}); print(" {} possible reductions\n", .{ n }); print(" {} max bound\n", .{ n*2 }); print(" {} delays + {} typed => O({})\n\n\n", .{ delays, progress, delays + progress }); } pub fn main() !void { var gpa = std.heap.GeneralPurposeAllocator(.{}){}; const allocator = gpa.allocator(); var ws = try Worklist.alloc(allocator); // each time we add a node, we need to push it to // the worklist, once we've got something in the cache // we can avoid this process for some nodes. print("=== parse ===\n", .{}); // abcd // |||| // plus e // | / // jean var a = try Term.alloc(allocator); var b = try Term.alloc(allocator); var c = try Term.alloc(allocator); var d = try Term.alloc(allocator); var plus = try Term.allocWithInputs(allocator, &[_]*Term{ a, b, c, d }); var e = try Term.alloc(allocator); var jean = try Term.allocWithInputs(allocator, &[_]*Term{ plus, e }); try ws.push(a); try ws.push(b); try ws.push(c); try ws.push(d); try ws.push(plus); try ws.push(e); try ws.push(jean); print("=== type check ===\n", .{}); try type_check(&ws); print("=== parse 2 ===\n", .{}); var f = try Term.alloc(allocator); try ws.push(f); try plus.set_in(f, 1); var g = try Term.alloc(allocator); try ws.push(g); try plus.set_in(g, 3); try ws.push(plus); print("=== type check 2 ===\n", .{}); try type_check(&ws); }