Skip to content

Instantly share code, notes, and snippets.

@olebedev
Forked from bouk/parse-yarn-lock.nix
Created May 22, 2023 05:57
Show Gist options
  • Save olebedev/63319bc7831a596ec4b39b93c4bd6a5d to your computer and use it in GitHub Desktop.
Save olebedev/63319bc7831a596ec4b39b93c4bd6a5d to your computer and use it in GitHub Desktop.

Revisions

  1. @bouk bouk revised this gist Dec 15, 2022. No changes.
  2. @bouk bouk created this gist Dec 15, 2022.
    129 changes: 129 additions & 0 deletions parse-yarn-lock.nix
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,129 @@
    # Parse a yarn.lock file using pure Nix
    # yarn.lock v1 files are basically YAML with support for having multiple keys for a single value in a map and without array support.
    # Inspired by https://github.com/yarnpkg/yarn/blob/158d96dce95313d9a00218302631cd263877d164/src/lockfile/parse.js

    with builtins;
    let
    # Add index to a list of elements
    enumerate = list: genList (i: ({ inherit i; e = elemAt list i; })) (length list);
    mkToken = type: value: { inherit type value; };
    parseLockfile = str: let
    # A Regex that tokenizes a yarn lockfile
    # I've split up the regex in the various token types
    newlineRe = "(\r?\n)";
    commentRe = "#([^\n]+)";
    # Used for any kind of whitespace and also indentation in an object
    indentRe = "( +)";
    # Note that this contains a group for repetition, so the next group is offset.
    # This is a regex that matches JSON strings, which is the format used.
    stringRe = "(\"([^\"\\\\]|\\\\[\\\"\\\\/bfnrt]|\\\\u[0-9a-f]{4})+\")";
    numberRe = "([0-9]+)";
    booleanRe = "(true|false)";
    colonRe = "(:)";
    commaRe = "(,)";
    # A symbol is a string without quotes
    symbolRe = "([a-zA-Z\\/.-][^: \n\r,]+)";
    tokenizeRe = "${newlineRe}|${commentRe}|${indentRe}|${stringRe}|${numberRe}|${booleanRe}|${colonRe}|${commaRe}|${symbolRe}";

    tokenize = split tokenizeRe;
    convert = token: if isString token then abort "Invalid token ${token}"
    else if (elemAt token 0) != null then
    mkToken "newline" null
    else if (elemAt token 1) != null then
    mkToken "comment" (elemAt token 1)
    else if (elemAt token 2) != null then
    mkToken "indent" (stringLength (elemAt token 2))
    else if (elemAt token 3) != null then
    mkToken "string" (fromJSON (elemAt token 3))
    else if (elemAt token 5) != null then
    mkToken "number" (fromJSON (elemAt token 5))
    else if (elemAt token 6) != null then
    mkToken "boolean" (elemAt token 6) == "true"
    else if (elemAt token 7) != null then
    mkToken "colon" null
    else if (elemAt token 8) != null then
    mkToken "comma" null
    else if (elemAt token 9) != null then
    mkToken "string" (elemAt token 9)
    else abort "unreachable";
    unprocessedTokens = map convert (filter (e: e != "") (tokenize str));

    # Filter out comments, and spaces that don't follow a newline
    tokens = map ({ i, e }: e) (filter ({ i, e }:
    if e.type == "comment" then
    # Check if this is the right version lockfile
    if (match "[[:space:]]*yarn lockfile v[0-9]+[[:space:]]*" e.value) != null && (match "[[:space:]]*yarn lockfile v1[[:space:]]*" e.value) == null
    then abort "Unsupported lockfile: ${e.value}"
    else false
    else
    !(e.type == "indent" && (elemAt unprocessedTokens (i - 1)).type != "newline")) (enumerate unprocessedTokens));

    get = index: if index < length tokens then elemAt tokens index else { type = "eof"; };

    # Take one or more keys interspersed with commas
    takeKeys = index: [(get index).value] ++ (if (get (index + 1)).type == "comma" && (get (index + 2)).type == "string" then takeKeys (index + 2) else []);

    # Consume tokens for a single object
    # Returns 'value' for the object and 'index' for how far we iterated
    parse = start: indent:
    let
    # genericClosure is used here to iterate over the tokens in a non-recursive way,
    # which would be too slow for the Nix language.
    # We can't use fold because we need to recurse into nested maps and skip over
    # the tokens that were consumed.
    result = genericClosure {
    startSet = [ { key = start; values = []; } ];
    operator = { key, ... }:
    let
    token = get key;
    nextToken = get (key + 1);
    done = [];
    next = [{ key = key + 1; values = []; }];
    in
    if token.type == "eof" then done
    else if token.type == "newline" then
    if indent == 0 then
    next
    else if nextToken.type != "indent" || nextToken.value != indent then
    done
    else [{ key = key + 1; values = []; }]
    else if token.type == "indent" then
    if token.value == indent then next else done
    # String means this is a key value pair
    else if token.type == "string" then
    let
    keys = takeKeys key;
    skip = 1 + ((length keys) - 1) * 2;
    nextToken = get (key + skip);
    in
    # If the key is followed by a colon then this is a nested object
    if nextToken.type == "colon" then
    let
    # Parse the nested object
    res = parse (key + skip + 1) (indent + 2);
    inherit (res) value index;
    in
    [{
    key = index;
    values = map (name: { inherit name value; }) keys;
    }]
    # The only valid values
    else if (nextToken.type == "string" || nextToken.type == "number" || nextToken.type == "boolean") then
    [{
    key = (key + skip + 1);
    values = map (name: { inherit name; value = nextToken.value; }) keys;
    }]
    else abort "Invalid token ${nextToken.type}"
    else abort "Invalid token ${token.type}"
    ;
    };
    results = concatLists (map (el: el.values) result);
    in
    {
    value = listToAttrs results;
    index = (elemAt result ((length result) - 1)).key;
    };
    in
    (parse 0 0).value;
    in
    parseLockfile (readFile ./yarn.lock)