Skip to content

Instantly share code, notes, and snippets.

@dannypsnl
Forked from neel-krishnaswami/pcomb.ml
Created August 7, 2023 15:29
Show Gist options
  • Select an option

  • Save dannypsnl/b08e5a195582ec3b26542de77245596a to your computer and use it in GitHub Desktop.

Select an option

Save dannypsnl/b08e5a195582ec3b26542de77245596a to your computer and use it in GitHub Desktop.

Revisions

  1. @neel-krishnaswami neel-krishnaswami renamed this gist Jul 21, 2023. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. @neel-krishnaswami neel-krishnaswami created this gist Jul 20, 2023.
    437 changes: 437 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,437 @@
    module C : sig
    type t
    val empty : t
    val one : char -> t
    val union : t -> t -> t
    val inter : t -> t -> t
    val top : t
    val mem : char -> t -> bool
    val make : (char -> bool) -> t
    val equal : t -> t -> bool
    val negate : t -> t
    val is_empty : t -> bool
    val disjoint : t -> t -> bool
    val fold : (char -> 'a -> 'a) -> 'a -> t -> 'a
    val of_string : string -> t
    end = struct
    type t = bytes

    let make f =
    let open Int in
    let open Char in
    Bytes.init 32 (fun i ->
    let b0 = shift_left (Bool.to_int (f (chr (i * 8 + 0)))) 0 in
    let b1 = shift_left (Bool.to_int (f (chr (i * 8 + 1)))) 1 in
    let b2 = shift_left (Bool.to_int (f (chr (i * 8 + 2)))) 2 in
    let b3 = shift_left (Bool.to_int (f (chr (i * 8 + 3)))) 3 in
    let b4 = shift_left (Bool.to_int (f (chr (i * 8 + 4)))) 4 in
    let b5 = shift_left (Bool.to_int (f (chr (i * 8 + 5)))) 5 in
    let b6 = shift_left (Bool.to_int (f (chr (i * 8 + 6)))) 6 in
    let b7 = shift_left (Bool.to_int (f (chr (i * 8 + 7)))) 7 in
    let (||) = logor in
    Char.chr (b7 || b6 || b5 || b4 || b3 || b2 || b1 || b0))

    let mem c s =
    let b = (Char.code c) / 8 in
    let i = (Char.code c) mod 8 in
    let w = Char.code (Bytes.get s b) in
    Int.logand (Int.shift_left 1 i) w > 0

    let empty = make (fun c -> false)
    let top = make (fun c -> true)
    let one c = make (fun c' -> c = c')
    let union s1 s2 = make (fun c -> mem c s1 || mem c s2)
    let inter s1 s2 = make (fun c -> mem c s1 && mem c s2)
    let negate s = make (fun c -> not (mem c s))

    let equal s1 s2 =
    let rec loop i acc =
    if i = 32 then
    acc
    else
    loop (i+1) (acc && (Bytes.get s1 i = Bytes.get s2 i))
    in
    loop 0 true

    let is_empty s = equal s empty

    let disjoint s1 s2 = is_empty (inter s1 s2)

    let fold f init s =
    let rec loop i acc =
    if i > 255 then
    acc
    else
    let c = Char.chr i in
    if mem c s then
    loop (i+1) (f c acc)
    else
    loop (i+1) acc
    in
    loop 0 init

    let of_string str =
    let p c = String.contains str c in
    make p
    end

    module Tp : sig
    type t = { null : bool; first : C.t; follow : C.t }
    exception TypeError of string
    val char : char -> t
    val eps : t
    val seq : t -> t -> t
    val charset : C.t -> t
    val string : string -> t
    val alt : t -> t -> t
    val bot : t
    val equal : t -> t -> bool
    val fix : (t -> t) -> t
    val print : Format.formatter -> t -> unit
    end = struct
    type t = {
    null : bool;
    first : C.t;
    follow : C.t;
    }

    exception TypeError of string

    let char c = {
    null = false;
    first = C.one c;
    follow = C.empty;
    }

    let eps = {
    null = true;
    first = C.empty;
    follow = C.empty;
    }

    let seq t1 t2 =
    let separate t1 t2 =
    not t1.null
    &&
    C.disjoint t1.follow t2.first
    in
    if separate t1 t2 then
    { null = false;
    first = t1.first;
    follow = C.union t2.follow (if t2.null then t1.follow else C.empty);
    }
    else
    raise (TypeError "ambiguous sequencing")

    let string s =
    if String.length s = 0 then
    eps
    else
    char s.[0]

    let alt t1 t2 =
    let nonoverlapping t1 t2 =
    not (t1.null && t2.null)
    &&
    C.disjoint t1.first t2.first
    in
    if nonoverlapping t1 t2 then
    {
    null = t1.null || t2.null;
    first = C.union t1.first t2.first;
    follow = C.union t1.follow t2.follow;
    }
    else
    raise (TypeError "ambiguous alternation")

    let bot = {
    null = false;
    first = C.empty;
    follow = C.empty;
    }

    let charset cs =
    if C.is_empty cs then
    bot
    else
    { null = false;
    first = cs;
    follow = C.empty;
    }

    let equal t1 t2 =
    t1.null = t2.null
    && C.equal t1.first t2.first
    && C.equal t1.follow t2.follow

    let fix f =
    let rec loop t =
    let t' = f t in
    if equal t t' then
    t'
    else
    loop t'
    in
    loop bot

    let print out t =
    let p fmt = Format.fprintf out fmt in
    let print_set cs =
    C.fold (fun c () -> p "%c" c) () cs
    in
    let print_bool = function
    | true -> p "true"
    | false -> p "false"
    in
    p "{\n";
    p " null = "; print_bool t.null; p ";\n";
    p " first = C.of_string \""; print_set t.first; p "\";\n";
    p " follow = C.of_string \""; print_set t.follow; p "\";\n";
    p "}\n"
    end

    module Parser: sig
    type 'a t
    exception ParseFailure of int

    val char : char -> unit t
    val charset : C.t -> char t
    val string : string -> unit t

    val map : ('a -> 'b) -> 'a t -> 'b t
    val (let+) : 'a t -> ('a -> 'b) -> 'b t

    val seq : 'a t -> 'b t -> ('a * 'b) t
    val (and+) : 'a t -> 'b t -> ('a * 'b) t

    val eps : unit t
    val return : 'a -> 'a t
    val fail : 'a t
    val any : 'a t list -> 'a t
    val fix : ('a t -> 'a t) -> 'a t

    val parse : 'a t -> string -> int -> (int * 'a)
    end = struct
    type 'a t = { tp : Tp.t; parse : string -> int -> int * 'a }
    exception ParseFailure of int

    let char c =
    let p s i =
    if i < String.length s && s.[i] = c then
    (i+1, ())
    else
    raise (ParseFailure i)
    in
    { tp = Tp.char c; parse = p }

    let (let+) p f =
    let p' s i =
    let (i, v) = p.parse s i in
    (i, f v)
    in
    {tp = p.tp; parse = p'}

    let map f p = let+ x = p in f x

    let (and+) p1 p2 =
    let p' s i =
    let (i, a) = p1.parse s i in
    let (i, b) = p2.parse s i in
    (i, (a,b))
    in
    { tp = Tp.seq p1.tp p2.tp; parse = p' }

    let seq = (and+)

    let eps = { tp = Tp.eps; parse = fun s i -> (i, ()) }

    let return x =
    let+ () = eps in x

    let charset cs =
    let p s i =
    if i < String.length s && C.mem s.[i] cs then
    (i+1, s.[i])
    else
    raise (ParseFailure i)
    in
    {tp = Tp.charset cs; parse = p }

    let string str =
    let p s i =
    if i + String.length str < String.length s then
    let rec loop j =
    if j < String.length str then
    if str.[j] = s.[i + j] then
    loop (j+1)
    else
    raise (ParseFailure (i+j))
    else
    (i+j, ())
    in
    loop 0
    else
    raise (ParseFailure i)
    in
    {tp = Tp.string str; parse = p}

    let fail =
    { tp = Tp.bot;
    parse = fun s i -> raise (ParseFailure i) }

    let (||) p1 p2 =
    let p' s i =
    if i < String.length s then
    if C.mem s.[i] p1.tp.Tp.first then
    p1.parse s i
    else if C.mem s.[i] p2.tp.Tp.first then
    p2.parse s i
    else if p1.tp.Tp.null then
    p1.parse s i
    else if p2.tp.Tp.null then
    p2.parse s i
    else
    raise (ParseFailure i)
    else if p1.tp.Tp.null then
    p1.parse s i
    else if p2.tp.Tp.null then
    p2.parse s i
    else
    raise (ParseFailure i)
    in
    {tp = Tp.alt p1.tp p2.tp; parse = p' }

    let any ps = List.fold_left (||) fail ps


    let fix f =
    let g t = (f {fail with tp = t}).tp in
    let r = ref (fail.parse) in
    let p = f {tp = (Tp.fix g); parse = fun s i -> !r s i} in
    r := p.parse;
    p

    let parse p = p.parse
    end

    module Sexp = struct
    open Parser

    let letter = charset (C.of_string "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")

    let digit = charset (C.of_string "0123456789")
    let whitespace = charset (C.of_string " \t\n")

    let (==>) p f = let+ x = p in f x

    let (>>) p1 p2 =
    let+ x = p1
    and+ _ = p2 in
    x

    let star p =
    fix (fun r ->
    any [ eps ==> (fun () -> []);
    seq p r ==> (fun (x, xs) -> x :: xs)
    ])

    let starskip p =
    fix (fun r ->
    any [ eps ==> (fun _ -> ());
    seq p r ==> (fun _ -> ())
    ])


    let symbol =
    let+ c = letter
    and+ cs = star letter
    and+ _ = starskip whitespace
    in
    let b = Buffer.create 0 in
    List.iter (Buffer.add_char b) (c :: cs);
    Buffer.contents b

    type sexp =
    | Sym of string
    | Seq of sexp list


    let rec generate_list g (fuel : int) =
    if (fuel = 0) then
    []
    else if fuel = 1 then
    [ g fuel ]
    else
    let i = Random.int fuel in (* Divide the fuel *)
    let x = g i in
    let xs = generate_list g (fuel - i) in
    x :: xs

    let paren p =
    let+ () = char '(' >> starskip whitespace
    and+ x = p
    and+ () = char ')' >> starskip whitespace
    in
    x

    let sexp =
    fix (fun r ->
    any [ symbol ==> (fun s -> Sym s);
    paren (star r) ==> (fun xs -> Seq xs)
    ])

    end

    module Test = struct
    open Sexp

    (* This module randomly generates some huge s-expressions, and
    then tries to parse them *)


    let generate_symbol fuel =
    Char.(escaped (chr (65 + Random.int 26)))

    let rec generate_sexp fuel =
    if fuel = 0 then
    Seq []
    else if fuel = 1 then
    Sym (generate_symbol fuel)
    else
    Seq (generate_list generate_sexp fuel)

    let rec print_sexp out = function
    | Sym s -> Format.fprintf out "%s" s
    | Seq xs -> Format.fprintf out "(%a)" print_sexps xs
    and print_sexps out = function
    | [] -> ()
    | [s] -> print_sexp out s
    | x :: xs -> Format.fprintf out "%a %a" print_sexp x print_sexps xs

    let string_of_sexp sexp =
    let b = Buffer.create 0 in
    let out = Format.formatter_of_buffer b in
    let () = print_sexp out sexp in
    Buffer.contents b


    let time f x =
    let t = Sys.time () in
    let _ = f x in
    (Sys.time () -. t)

    let s1000k = string_of_sexp (generate_sexp 1000000)
    let s10M = string_of_sexp (generate_sexp 10000000)

    let test str =
    let len = String.length str in
    let t = time (fun s -> Parser.parse sexp s 0) str in
    let rate = (float_of_int len /. t) in begin
    Printf.printf "String length: %d bytes\n" len;
    Printf.printf "Parser elapsed time: %.3f sec\n" t;
    Printf.printf "Parsing rate: %.3g bytes/sec\n\n" rate;
    end

    let _ = test s1000k
    let _ = test s10M
    end