p4bl0- · January 2, 2025 09:03 · Aug 21, 2021 · Aug 21, 2021 · Aug 20, 2021 · Aug 20, 2021
diff --git a/00_readme.md b/00_readme.md
@@ -52,8 +52,10 @@ constructs (stack and register manipulation).
 
 We can also see that the approach we use will generate non-optimal code,
 because our compiler will always treat `nand` gates in the same way without
-regard to the context. And additional optimization pass would be quite easy
-to make. I could also provide it if asked to.
+regard to the context (e.g. we would manually translate "!1" into "thn" but
+our compiler will generate "tsthln"). An additional optimization pass that
+look for such patterns and replace them with manually optimized ones would
+be quite easy to make. I could also provide it if asked to.
 
 The VM is implemented in C.
 

diff --git a/00_readme.md b/00_readme.md
@@ -27,7 +27,7 @@ The bytecode consists in 6 instructions:
 - `h`: copies the value of the acc register into the tmp register
 - `n`: sets the acc register to the value of acc nand tmp
 
-This is suffisient because the nand gate is functionally complete:
+This is sufficient because the nand gate is functionally complete:
 https://en.wikipedia.org/wiki/Functional_completeness
 
 After lexing and parsing, we do not need any semantic analysis or type checking
@@ -47,7 +47,8 @@ bytecode instructions.
 
 This is were we can see actual compilation work: the paradigm shift from
 functional "high level" to imperative "low level" forces us to implement
-our language's abstractions using more primitive constructs.
+our language's abstractions (here, function call) using more primitive
+constructs (stack and register manipulation).
 
 We can also see that the approach we use will generate non-optimal code,
 because our compiler will always treat `nand` gates in the same way without

diff --git a/02_parser.mly b/02_parser.mly
@@ -3,7 +3,7 @@
 %}
 
 %token <bool> Lbool
-%token Lnot Land Lor Lxor Lopar Lcpar Leof 
+%token Lnot Land Lor Lxor Lopar Lcpar Leof
 
 %left Land Lor Lxor
 %right Lnot

diff --git a/00_readme.md b/00_readme.md
@@ -46,13 +46,13 @@ In a second pass, this (functional) expression is compiled down to (imperative)
 bytecode instructions.
 
 This is were we can see actual compilation work: the paradigm shift from
-functional to imperative forces us to implement our language's abstractions at
-a lower level.
+functional "high level" to imperative "low level" forces us to implement
+our language's abstractions using more primitive constructs.
 
-We can also see that the approach we use will generate non-optimal code, because
-our compiler will always treat `nand` gates in the same way without regard to
-the context. And additional optimization pass would be quite easy to make.
-I could also provide it if asked to.
+We can also see that the approach we use will generate non-optimal code,
+because our compiler will always treat `nand` gates in the same way without
+regard to the context. And additional optimization pass would be quite easy
+to make. I could also provide it if asked to.
 
 The VM is implemented in C.
 

diff --git a/00_readme.md b/00_readme.md
@@ -1,7 +1,7 @@
 This project is a tiny compiler for a very simple language consisting of boolean expression.
 
-The language has two constants: 1 for true and 0 for false, and 4 logic gates:
-! (not), & (and), | (or), and ^ (xor).
+The language has two constants: `1` for true and `0` for false, and 4 logic gates:
+`!` (not), `&` (and), `|` (or), and `^` (xor).
 
 It can also use parentheses to manage priorities.
 

diff --git a/00_readme.md b/00_readme.md
@@ -20,12 +20,12 @@ The virual machine has two registers (`acc` and `tmp`) and memory stack.
 
 The bytecode consists in 6 instructions:
 
-- t: sets the acc register to 1
-- f: sets the acc register to 0
-- s: pushes the value of the acc register onto the stack
-- l: pops the top value of the stack and write it to the acc register
-- h: copies the value of the acc register into the tmp register
-- n: sets the acc register to the value of acc nand tmp
+- `t`: sets the acc register to 1
+- `f`: sets the acc register to 0
+- `s`: pushes the value of the acc register onto the stack
+- `l`: pops the top value of the stack and write it to the acc register
+- `h`: copies the value of the acc register into the tmp register
+- `n`: sets the acc register to the value of acc nand tmp
 
 This is suffisient because the nand gate is functionally complete:
 https://en.wikipedia.org/wiki/Functional_completeness

diff --git a/00_readme.md b/00_readme.md
@@ -1,19 +1,25 @@
 This project is a tiny compiler for a very simple language consisting of boolean expression.
+
 The language has two constants: 1 for true and 0 for false, and 4 logic gates:
 ! (not), & (and), | (or), and ^ (xor).
+
 It can also use parentheses to manage priorities.
+
 Here is its grammar in BNF format:
 
-expr ::= "0" | "1"
-       | "!" expr
-       | expr "&" expr
-       | expr "|" expr
-       | expr "^" expr
-       | "(" expr ")"
+    expr ::= "0" | "1"
+           | "!" expr
+           | expr "&" expr
+           | expr "|" expr
+           | expr "^" expr
+           | "(" expr ")"
 
 The goal is to compile it to a virtual machine bytecode.
+
 The virual machine has two registers (`acc` and `tmp`) and memory stack.
+
 The bytecode consists in 6 instructions:
+
 - t: sets the acc register to 1
 - f: sets the acc register to 0
 - s: pushes the value of the acc register onto the stack
@@ -26,6 +32,7 @@ https://en.wikipedia.org/wiki/Functional_completeness
 
 After lexing and parsing, we do not need any semantic analysis or type checking
 because there are no names (e.g., no variables) and a single type.
+
 This is why we do not keep source location information in the parser's output.
 In a more complex language (e.g., adding support for assignment to variables),
 we would need an additional front-end pass after the parser.
@@ -34,27 +41,34 @@ This is not a lot to add, I could make it if asked to.
 The compilation strategy is to first use known formulas to translate the parsed
 boolean expression into an equivalent expression that only uses `nand` gates.
 (See https://en.wikipedia.org/wiki/NAND_logic)
+
 In a second pass, this (functional) expression is compiled down to (imperative)
 bytecode instructions.
+
 This is were we can see actual compilation work: the paradigm shift from
 functional to imperative forces us to implement our language's abstractions at
 a lower level.
+
 We can also see that the approach we use will generate non-optimal code, because
 our compiler will always treat `nand` gates in the same way without regard to
 the context. And additional optimization pass would be quite easy to make.
 I could also provide it if asked to.
 
 The VM is implemented in C.
+
 The compiler is implemented in OCaml.
+
 The lexer uses ocamllex, the parser uses menhir.
 
 Files need to be renamed without the numbers and underscore at the beginging of
 their names, which are here so that they appear in the desired order in this gist.
 
 To compile the compiler: `ocamlbuild -use-menhir main.byte`.
+
 To compile the vm: `gcc nandvm.c -o nandvm`.
 
 Then you can for example run:
-`./main.byte <<<"1" | ./nandvm`
-`./main.byte <<<"!(0 ^ 1)" | ./nandvm`
-`./main.byte <<<"0 ^ (0 | 1) & !(1 ^ 1)" | ./nandvm`
+
+- `./main.byte <<<"1" | ./nandvm`
+- `./main.byte <<<"!(0 ^ 1)" | ./nandvm`
+- `./main.byte <<<"0 ^ (0 | 1) & !(1 ^ 1)" | ./nandvm`
diff --git a/00_readme.md b/00_readme.md
@@ -0,0 +1,60 @@
+This project is a tiny compiler for a very simple language consisting of boolean expression.
+The language has two constants: 1 for true and 0 for false, and 4 logic gates:
+! (not), & (and), | (or), and ^ (xor).
+It can also use parentheses to manage priorities.
+Here is its grammar in BNF format:
+
+expr ::= "0" | "1"
+       | "!" expr
+       | expr "&" expr
+       | expr "|" expr
+       | expr "^" expr
+       | "(" expr ")"
+
+The goal is to compile it to a virtual machine bytecode.
+The virual machine has two registers (`acc` and `tmp`) and memory stack.
+The bytecode consists in 6 instructions:
+- t: sets the acc register to 1
+- f: sets the acc register to 0
+- s: pushes the value of the acc register onto the stack
+- l: pops the top value of the stack and write it to the acc register
+- h: copies the value of the acc register into the tmp register
+- n: sets the acc register to the value of acc nand tmp
+
+This is suffisient because the nand gate is functionally complete:
+https://en.wikipedia.org/wiki/Functional_completeness
+
+After lexing and parsing, we do not need any semantic analysis or type checking
+because there are no names (e.g., no variables) and a single type.
+This is why we do not keep source location information in the parser's output.
+In a more complex language (e.g., adding support for assignment to variables),
+we would need an additional front-end pass after the parser.
+This is not a lot to add, I could make it if asked to.
+
+The compilation strategy is to first use known formulas to translate the parsed
+boolean expression into an equivalent expression that only uses `nand` gates.
+(See https://en.wikipedia.org/wiki/NAND_logic)
+In a second pass, this (functional) expression is compiled down to (imperative)
+bytecode instructions.
+This is were we can see actual compilation work: the paradigm shift from
+functional to imperative forces us to implement our language's abstractions at
+a lower level.
+We can also see that the approach we use will generate non-optimal code, because
+our compiler will always treat `nand` gates in the same way without regard to
+the context. And additional optimization pass would be quite easy to make.
+I could also provide it if asked to.
+
+The VM is implemented in C.
+The compiler is implemented in OCaml.
+The lexer uses ocamllex, the parser uses menhir.
+
+Files need to be renamed without the numbers and underscore at the beginging of
+their names, which are here so that they appear in the desired order in this gist.
+
+To compile the compiler: `ocamlbuild -use-menhir main.byte`.
+To compile the vm: `gcc nandvm.c -o nandvm`.
+
+Then you can for example run:
+`./main.byte <<<"1" | ./nandvm`
+`./main.byte <<<"!(0 ^ 1)" | ./nandvm`
+`./main.byte <<<"0 ^ (0 | 1) & !(1 ^ 1)" | ./nandvm`
diff --git a/01_lexer.mll b/01_lexer.mll
@@ -0,0 +1,17 @@
+{
+  open Parser
+  exception Error of char
+}
+
+rule token = parse
+| eof             { Leof }
+| [' ' '\t' '\n'] { token lexbuf }
+| "0"             { Lbool (false) }
+| "1"             { Lbool (true) }
+| "!"             { Lnot }
+| "&"             { Land }
+| "|"             { Lor }
+| "^"             { Lxor }
+| "("             { Lopar }
+| ")"             { Lcpar }
+| _ as c          { raise (Error c) }
diff --git a/02_parser.mly b/02_parser.mly
@@ -0,0 +1,28 @@
+%{
+  open Ast.Bool
+%}
+
+%token <bool> Lbool
+%token Lnot Land Lor Lxor Lopar Lcpar Leof 
+
+%left Land Lor Lxor
+%right Lnot
+
+%start prog
+
+%type <Ast.Bool.expr> prog
+
+%%
+
+prog:
+| e = expr; Leof { e }
+;
+
+expr:
+| b = Lbool                { Bool (b) }
+| Lnot; e = expr           { Not (e) }
+| Lopar; e = expr; Lcpar   { e }
+| a = expr; Land; b = expr { And (a, b) }
+| a = expr; Lor;  b = expr { Or (a, b) }
+| a = expr; Lxor; b = expr { Xor (a, b) }
+; 
diff --git a/03_ast.ml b/03_ast.ml
@@ -0,0 +1,25 @@
+module Bool = struct
+  type expr =
+  | Bool of bool
+  | Not  of expr
+  | And  of expr * expr
+  | Or   of expr * expr
+  | Xor  of expr * expr
+end
+
+module Nand = struct
+  type expr =
+  | B of bool
+  | N of expr * expr
+end
+
+module Byte = struct
+  type instr =
+    | False
+    | True
+    | Push
+    | Pop
+    | Hold
+    | Nand
+  and prog = instr list
+end
diff --git a/04_bool.ml b/04_bool.ml
@@ -0,0 +1,21 @@
+open Ast.Bool
+open Ast.Nand
+
+(* formulas taken from https://en.wikipedia.org/wiki/NAND_logic *)
+let rec compile = function
+  | Bool b     -> B b
+  | Not e      -> let ce = compile e in N (ce, ce)
+  | And (a, b) -> let ca, cb = compile a, compile b in
+                  N (N (ca, cb), N (ca, cb))
+  | Or (a, b)  -> let ca, cb = compile a, compile b in
+                  N (N (ca, ca), N (cb, cb))
+  | Xor (a, b) -> let ca, cb = compile a, compile b in
+                  N (N (ca, N (ca, cb)), N (cb, N (ca, cb)))
+
+(* bonus: eval, useless in our compiler except to make some tests *)
+let rec eval = function
+  | Bool b     -> b
+  | Not e      -> not (eval e)
+  | And (a, b) -> (eval a) && (eval b)
+  | Or (a, b)  -> (eval a) || (eval b)
+  | Xor (a, b) -> (eval a) <> (eval b)
diff --git a/05_nand.ml b/05_nand.ml
@@ -0,0 +1,12 @@
+open Ast.Nand
+open Ast.Byte
+
+let rec compile = function
+  | B false  -> [ False ]
+  | B true   -> [ True ]
+  | N (a, b) -> (compile a) @ [ Push ] @ (compile b) @ [ Hold ; Pop ; Nand ]
+
+(* bonus: eval, useless in our compiler except to make some tests *)
+let rec eval = function
+  | B e      -> e
+  | N (a, b) -> not ((eval a) && (eval b))
diff --git a/06_byte.ml b/06_byte.ml
@@ -0,0 +1,25 @@
+open Ast.Byte
+
+let emit prog =
+  let bc = List.map (function
+               | False -> 'f'
+               | True  -> 't'
+               | Push  -> 's'
+               | Pop   -> 'l'
+               | Hold  -> 'h'
+               | Nand  -> 'n')
+             prog in
+  List.iter print_char bc
+
+(* bonus: run bytecode (functionnal style vm runtime implementation) *)
+let run code =
+  let rec exec prog acc tmp stack =
+    match prog with
+    | []         -> acc
+    | False :: p -> exec p false tmp stack
+    | True  :: p -> exec p true tmp stack
+    | Push  :: p -> exec p acc tmp (acc :: stack)
+    | Pop   :: p -> exec p (List.hd stack) tmp (List.tl stack)
+    | Hold  :: p -> exec p acc acc stack
+    | Nand  :: p -> exec p (not (acc && tmp)) tmp stack
+  in exec code false false []
diff --git a/07_main.ml b/07_main.ml
@@ -0,0 +1,17 @@
+let err msg pos =
+  Lexing.(Printf.eprintf "Error on line %d col %d: %s.\n"
+            pos.pos_lnum (pos.pos_cnum - pos.pos_bol) msg) ;
+  exit 1
+
+let () =
+  let src = Lexing.from_channel Stdlib.stdin in
+  try
+    let boolexpr = Parser.prog Lexer.token src in
+    let nandexpr = Bool.compile boolexpr in
+    let bytecode = Nand.compile nandexpr in
+    Byte.emit bytecode
+  with
+  | Lexer.Error c ->
+     err (Printf.sprintf "unrecognized char '%c'" c) (Lexing.lexeme_start_p src)
+  | Parser.Error ->
+     err "syntax error" (Lexing.lexeme_start_p src)
diff --git a/08_nandvm.c b/08_nandvm.c
@@ -0,0 +1,24 @@
+#include <stdio.h>
+
+int
+main (int argc, char *argv[])
+{
+  char acc = 0, tmp = 0; // registers
+  char mem[16384];       // memory
+  int sp = 0;            // stack pointer
+  char instr;
+
+  while ((instr = getchar()) != EOF) {
+    switch (instr) {
+    case 'f' /* False */ : acc = 0;             break;
+    case 't' /* True  */ : acc = 1;             break;
+    case 's' /* Push  */ : mem[sp++] = acc;     break; // to avoid "p" conflict, "s" as in "store"
+    case 'l' /* Pop   */ : acc = mem[--sp];     break; // to avoid "p" conflict, "l" as in "load"
+    case 'h' /* Hold  */ : tmp = acc;           break;
+    case 'n' /* Nand  */ : acc = !(acc && tmp); break;
+    }
+  }
+
+  printf("result = %d\n", acc);
+  return 0;
+}