|
|
@@ -0,0 +1,129 @@ |
|
|
# Parse a yarn.lock file using pure Nix |
|
|
# yarn.lock v1 files are basically YAML with support for having multiple keys for a single value in a map and without array support. |
|
|
# Inspired by https://github.com/yarnpkg/yarn/blob/158d96dce95313d9a00218302631cd263877d164/src/lockfile/parse.js |
|
|
|
|
|
with builtins; |
|
|
let |
|
|
# Add index to a list of elements |
|
|
enumerate = list: genList (i: ({ inherit i; e = elemAt list i; })) (length list); |
|
|
mkToken = type: value: { inherit type value; }; |
|
|
parseLockfile = str: let |
|
|
# A Regex that tokenizes a yarn lockfile |
|
|
# I've split up the regex in the various token types |
|
|
newlineRe = "(\r?\n)"; |
|
|
commentRe = "#([^\n]+)"; |
|
|
# Used for any kind of whitespace and also indentation in an object |
|
|
indentRe = "( +)"; |
|
|
# Note that this contains a group for repetition, so the next group is offset. |
|
|
# This is a regex that matches JSON strings, which is the format used. |
|
|
stringRe = "(\"([^\"\\\\]|\\\\[\\\"\\\\/bfnrt]|\\\\u[0-9a-f]{4})+\")"; |
|
|
numberRe = "([0-9]+)"; |
|
|
booleanRe = "(true|false)"; |
|
|
colonRe = "(:)"; |
|
|
commaRe = "(,)"; |
|
|
# A symbol is a string without quotes |
|
|
symbolRe = "([a-zA-Z\\/.-][^: \n\r,]+)"; |
|
|
tokenizeRe = "${newlineRe}|${commentRe}|${indentRe}|${stringRe}|${numberRe}|${booleanRe}|${colonRe}|${commaRe}|${symbolRe}"; |
|
|
|
|
|
tokenize = split tokenizeRe; |
|
|
convert = token: if isString token then abort "Invalid token ${token}" |
|
|
else if (elemAt token 0) != null then |
|
|
mkToken "newline" null |
|
|
else if (elemAt token 1) != null then |
|
|
mkToken "comment" (elemAt token 1) |
|
|
else if (elemAt token 2) != null then |
|
|
mkToken "indent" (stringLength (elemAt token 2)) |
|
|
else if (elemAt token 3) != null then |
|
|
mkToken "string" (fromJSON (elemAt token 3)) |
|
|
else if (elemAt token 5) != null then |
|
|
mkToken "number" (fromJSON (elemAt token 5)) |
|
|
else if (elemAt token 6) != null then |
|
|
mkToken "boolean" (elemAt token 6) == "true" |
|
|
else if (elemAt token 7) != null then |
|
|
mkToken "colon" null |
|
|
else if (elemAt token 8) != null then |
|
|
mkToken "comma" null |
|
|
else if (elemAt token 9) != null then |
|
|
mkToken "string" (elemAt token 9) |
|
|
else abort "unreachable"; |
|
|
unprocessedTokens = map convert (filter (e: e != "") (tokenize str)); |
|
|
|
|
|
# Filter out comments, and spaces that don't follow a newline |
|
|
tokens = map ({ i, e }: e) (filter ({ i, e }: |
|
|
if e.type == "comment" then |
|
|
# Check if this is the right version lockfile |
|
|
if (match "[[:space:]]*yarn lockfile v[0-9]+[[:space:]]*" e.value) != null && (match "[[:space:]]*yarn lockfile v1[[:space:]]*" e.value) == null |
|
|
then abort "Unsupported lockfile: ${e.value}" |
|
|
else false |
|
|
else |
|
|
!(e.type == "indent" && (elemAt unprocessedTokens (i - 1)).type != "newline")) (enumerate unprocessedTokens)); |
|
|
|
|
|
get = index: if index < length tokens then elemAt tokens index else { type = "eof"; }; |
|
|
|
|
|
# Take one or more keys interspersed with commas |
|
|
takeKeys = index: [(get index).value] ++ (if (get (index + 1)).type == "comma" && (get (index + 2)).type == "string" then takeKeys (index + 2) else []); |
|
|
|
|
|
# Consume tokens for a single object |
|
|
# Returns 'value' for the object and 'index' for how far we iterated |
|
|
parse = start: indent: |
|
|
let |
|
|
# genericClosure is used here to iterate over the tokens in a non-recursive way, |
|
|
# which would be too slow for the Nix language. |
|
|
# We can't use fold because we need to recurse into nested maps and skip over |
|
|
# the tokens that were consumed. |
|
|
result = genericClosure { |
|
|
startSet = [ { key = start; values = []; } ]; |
|
|
operator = { key, ... }: |
|
|
let |
|
|
token = get key; |
|
|
nextToken = get (key + 1); |
|
|
done = []; |
|
|
next = [{ key = key + 1; values = []; }]; |
|
|
in |
|
|
if token.type == "eof" then done |
|
|
else if token.type == "newline" then |
|
|
if indent == 0 then |
|
|
next |
|
|
else if nextToken.type != "indent" || nextToken.value != indent then |
|
|
done |
|
|
else [{ key = key + 1; values = []; }] |
|
|
else if token.type == "indent" then |
|
|
if token.value == indent then next else done |
|
|
# String means this is a key value pair |
|
|
else if token.type == "string" then |
|
|
let |
|
|
keys = takeKeys key; |
|
|
skip = 1 + ((length keys) - 1) * 2; |
|
|
nextToken = get (key + skip); |
|
|
in |
|
|
# If the key is followed by a colon then this is a nested object |
|
|
if nextToken.type == "colon" then |
|
|
let |
|
|
# Parse the nested object |
|
|
res = parse (key + skip + 1) (indent + 2); |
|
|
inherit (res) value index; |
|
|
in |
|
|
[{ |
|
|
key = index; |
|
|
values = map (name: { inherit name value; }) keys; |
|
|
}] |
|
|
# The only valid values |
|
|
else if (nextToken.type == "string" || nextToken.type == "number" || nextToken.type == "boolean") then |
|
|
[{ |
|
|
key = (key + skip + 1); |
|
|
values = map (name: { inherit name; value = nextToken.value; }) keys; |
|
|
}] |
|
|
else abort "Invalid token ${nextToken.type}" |
|
|
else abort "Invalid token ${token.type}" |
|
|
; |
|
|
}; |
|
|
results = concatLists (map (el: el.values) result); |
|
|
in |
|
|
{ |
|
|
value = listToAttrs results; |
|
|
index = (elemAt result ((length result) - 1)).key; |
|
|
}; |
|
|
in |
|
|
(parse 0 0).value; |
|
|
in |
|
|
parseLockfile (readFile ./yarn.lock) |