Skip to content

Instantly share code, notes, and snippets.

@blueset
Last active April 17, 2022 04:48
Show Gist options
  • Save blueset/78cc54d6da052c74ff105ff80bde025d to your computer and use it in GitHub Desktop.
Save blueset/78cc54d6da052c74ff105ff80bde025d to your computer and use it in GitHub Desktop.

Revisions

  1. blueset revised this gist Apr 17, 2022. 1 changed file with 5 additions and 5 deletions.
    10 changes: 5 additions & 5 deletions jsonparser.py
    Original file line number Diff line number Diff line change
    @@ -12,7 +12,7 @@

    whitespace_re = re.compile("^[ \t\n\r]+")
    float_re = re.compile(r"-?\d+(?:\.\d+)?(?:[Ee][+-]?\d+)?")
    single_char_escape = {"\\\\": "\\", "\\/": "/", "\\\"": "\"", "\\b": "\b", "\\f": "\f", "\\n": "\n", "\\r": "\r", "\\t": "\t"}
    single_char_escape = {"\\\\": "\\", "\\/": "/", '\\"': '"', "\\b": "\b", "\\f": "\f", "\\n": "\n", "\\r": "\r", "\\t": "\t"}
    plain_str_content_re = re.compile(r"([^\\\"]|\n\r\t)+")

    def leading_whitespaces(data: str) -> int:
    @@ -22,7 +22,7 @@ def leading_whitespaces(data: str) -> int:
    return len(match[0])

    def parse_string(data: str) -> Tuple[str, int]:
    assert data[0] == "\""
    assert data[0] == '"'
    result = ""
    ptr = 1
    while ptr < len(data):
    @@ -39,7 +39,7 @@ def parse_string(data: str) -> Tuple[str, int]:
    result += single_char_escape[i]
    ptr += 2
    break
    elif data[ptr] == "\"":
    elif data[ptr] == '"':
    # End of string
    ptr += 1
    break
    @@ -120,7 +120,7 @@ def parse_json(data: str) -> Tuple[Any, int]:
    elif data[ptr] == "[":
    val, proc_chr = parse_array(data[ptr:])
    return val, ptr + proc_chr
    elif data[ptr] == "\"":
    elif data[ptr] == '"':
    val, proc_chr = parse_string(data[ptr:])
    return val, ptr + proc_chr
    elif data[ptr:].startswith("true"):
    @@ -133,4 +133,4 @@ def parse_json(data: str) -> Tuple[Any, int]:
    val, proc_chr = pares_number(data[ptr:])
    return val, ptr + proc_chr
    else:
    raise ValueError(f"Unexpected character: {data[ptr:]}")
    raise ValueError(f"Unexpected character: {data[ptr:]}")
  2. blueset revised this gist Apr 17, 2022. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions jsonparser.py
    Original file line number Diff line number Diff line change
    @@ -6,8 +6,8 @@
    Number parsing is handled by Python.
    Usage:
    data, _ = parse_json('{"key": ["value", -1e20, true, false, null]}')
    assert data == {"key": ["value", -1e20, True, False, None]}
    >>> data, _ = parse_json('{"key": ["value", -1e20, true, false, null]}')
    >>> assert data == {"key": ["value", -1e20, True, False, None]}
    """

    whitespace_re = re.compile("^[ \t\n\r]+")
  3. blueset created this gist Apr 17, 2022.
    136 changes: 136 additions & 0 deletions jsonparser.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,136 @@
    import re
    from typing import Any, Tuple, Union

    """A very rough JSON parser.
    Implementing the standard outlined in https://www.json.org/json-en.html
    Number parsing is handled by Python.
    Usage:
    data, _ = parse_json('{"key": ["value", -1e20, true, false, null]}')
    assert data == {"key": ["value", -1e20, True, False, None]}
    """

    whitespace_re = re.compile("^[ \t\n\r]+")
    float_re = re.compile(r"-?\d+(?:\.\d+)?(?:[Ee][+-]?\d+)?")
    single_char_escape = {"\\\\": "\\", "\\/": "/", "\\\"": "\"", "\\b": "\b", "\\f": "\f", "\\n": "\n", "\\r": "\r", "\\t": "\t"}
    plain_str_content_re = re.compile(r"([^\\\"]|\n\r\t)+")

    def leading_whitespaces(data: str) -> int:
    match = whitespace_re.match(data)
    if not match:
    return 0
    return len(match[0])

    def parse_string(data: str) -> Tuple[str, int]:
    assert data[0] == "\""
    result = ""
    ptr = 1
    while ptr < len(data):
    if data[ptr] == "\\":
    # Escape sequence
    if data[ptr + 1] == "u":
    # Unicode escape
    result += chr(int(data[ptr + 2:ptr + 6], 16))
    ptr += 6
    else:
    # Single character escape
    for i in single_char_escape:
    if data[ptr:ptr+2] == i:
    result += single_char_escape[i]
    ptr += 2
    break
    elif data[ptr] == "\"":
    # End of string
    ptr += 1
    break
    else:
    # Plain string
    match = plain_str_content_re.match(data[ptr:])
    result += match[0]
    ptr += len(match[0])
    return result, ptr


    def pares_number(s: str) -> Tuple[Union[float, int], int]:
    match = float_re.match(s)[0]
    if "e" not in match and "E" not in match and "." not in match:
    return int(match), len(match)
    return float(match), len(match)


    def parse_object(data: str) -> Tuple[dict, int]:
    assert data[0] == "{"
    result = {}
    ptr = 1
    while ptr < len(data):
    ptr += leading_whitespaces(data[ptr:])

    # Empty object
    if data[ptr] == "}":
    ptr += 1
    break

    key, proc_len = parse_string(data[ptr:])
    ptr += proc_len
    ptr += leading_whitespaces(data[ptr:])
    assert data[ptr] == ":"
    ptr += 1
    value, proc_len = parse_json(data[ptr:])
    ptr += proc_len
    ptr += leading_whitespaces(data[ptr:])
    result[key] = value
    if data[ptr] == "}":
    ptr += 1
    break
    assert data[ptr] == ","
    ptr += 1
    return result, ptr


    def parse_array(data: str) -> Tuple[list, int]:
    assert data[0] == "["
    result = []
    ptr = 1
    while ptr < len(data):
    ptr += leading_whitespaces(data[ptr:])
    # Empty array
    if data[ptr] == "]":
    ptr += 1
    break
    value, proc_len = parse_json(data[ptr:])
    ptr += proc_len
    result.append(value)
    ptr += leading_whitespaces(data[ptr:])
    if data[ptr] == "]":
    ptr += 1
    break
    assert data[ptr] == ","
    ptr += 1
    return result, ptr


    def parse_json(data: str) -> Tuple[Any, int]:
    """Returns: parsed value and the number of characters consumed."""
    ptr = leading_whitespaces(data)
    if ptr == len(data):
    return None, ptr
    if data[ptr] == "{":
    val, proc_chr = parse_object(data[ptr:])
    return val, ptr + proc_chr
    elif data[ptr] == "[":
    val, proc_chr = parse_array(data[ptr:])
    return val, ptr + proc_chr
    elif data[ptr] == "\"":
    val, proc_chr = parse_string(data[ptr:])
    return val, ptr + proc_chr
    elif data[ptr:].startswith("true"):
    return True, ptr + 4
    elif data[ptr:].startswith("false"):
    return False, ptr + 5
    elif data[ptr:].startswith("null"):
    return None, ptr + 4
    elif data[ptr] in "-+0123456789":
    val, proc_chr = pares_number(data[ptr:])
    return val, ptr + proc_chr
    else:
    raise ValueError(f"Unexpected character: {data[ptr:]}")