Last active
April 17, 2022 04:48
-
-
Save blueset/78cc54d6da052c74ff105ff80bde025d to your computer and use it in GitHub Desktop.
Revisions
-
blueset revised this gist
Apr 17, 2022 . 1 changed file with 5 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -12,7 +12,7 @@ whitespace_re = re.compile("^[ \t\n\r]+") float_re = re.compile(r"-?\d+(?:\.\d+)?(?:[Ee][+-]?\d+)?") single_char_escape = {"\\\\": "\\", "\\/": "/", '\\"': '"', "\\b": "\b", "\\f": "\f", "\\n": "\n", "\\r": "\r", "\\t": "\t"} plain_str_content_re = re.compile(r"([^\\\"]|\n\r\t)+") def leading_whitespaces(data: str) -> int: @@ -22,7 +22,7 @@ def leading_whitespaces(data: str) -> int: return len(match[0]) def parse_string(data: str) -> Tuple[str, int]: assert data[0] == '"' result = "" ptr = 1 while ptr < len(data): @@ -39,7 +39,7 @@ def parse_string(data: str) -> Tuple[str, int]: result += single_char_escape[i] ptr += 2 break elif data[ptr] == '"': # End of string ptr += 1 break @@ -120,7 +120,7 @@ def parse_json(data: str) -> Tuple[Any, int]: elif data[ptr] == "[": val, proc_chr = parse_array(data[ptr:]) return val, ptr + proc_chr elif data[ptr] == '"': val, proc_chr = parse_string(data[ptr:]) return val, ptr + proc_chr elif data[ptr:].startswith("true"): @@ -133,4 +133,4 @@ def parse_json(data: str) -> Tuple[Any, int]: val, proc_chr = pares_number(data[ptr:]) return val, ptr + proc_chr else: raise ValueError(f"Unexpected character: {data[ptr:]}") -
blueset revised this gist
Apr 17, 2022 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -6,8 +6,8 @@ Number parsing is handled by Python. Usage: >>> data, _ = parse_json('{"key": ["value", -1e20, true, false, null]}') >>> assert data == {"key": ["value", -1e20, True, False, None]} """ whitespace_re = re.compile("^[ \t\n\r]+") -
blueset created this gist
Apr 17, 2022 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,136 @@ import re from typing import Any, Tuple, Union """A very rough JSON parser. Implementing the standard outlined in https://www.json.org/json-en.html Number parsing is handled by Python. Usage: data, _ = parse_json('{"key": ["value", -1e20, true, false, null]}') assert data == {"key": ["value", -1e20, True, False, None]} """ whitespace_re = re.compile("^[ \t\n\r]+") float_re = re.compile(r"-?\d+(?:\.\d+)?(?:[Ee][+-]?\d+)?") single_char_escape = {"\\\\": "\\", "\\/": "/", "\\\"": "\"", "\\b": "\b", "\\f": "\f", "\\n": "\n", "\\r": "\r", "\\t": "\t"} plain_str_content_re = re.compile(r"([^\\\"]|\n\r\t)+") def leading_whitespaces(data: str) -> int: match = whitespace_re.match(data) if not match: return 0 return len(match[0]) def parse_string(data: str) -> Tuple[str, int]: assert data[0] == "\"" result = "" ptr = 1 while ptr < len(data): if data[ptr] == "\\": # Escape sequence if data[ptr + 1] == "u": # Unicode escape result += chr(int(data[ptr + 2:ptr + 6], 16)) ptr += 6 else: # Single character escape for i in single_char_escape: if data[ptr:ptr+2] == i: result += single_char_escape[i] ptr += 2 break elif data[ptr] == "\"": # End of string ptr += 1 break else: # Plain string match = plain_str_content_re.match(data[ptr:]) result += match[0] ptr += len(match[0]) return result, ptr def pares_number(s: str) -> Tuple[Union[float, int], int]: match = float_re.match(s)[0] if "e" not in match and "E" not in match and "." not in match: return int(match), len(match) return float(match), len(match) def parse_object(data: str) -> Tuple[dict, int]: assert data[0] == "{" result = {} ptr = 1 while ptr < len(data): ptr += leading_whitespaces(data[ptr:]) # Empty object if data[ptr] == "}": ptr += 1 break key, proc_len = parse_string(data[ptr:]) ptr += proc_len ptr += leading_whitespaces(data[ptr:]) assert data[ptr] == ":" ptr += 1 value, proc_len = parse_json(data[ptr:]) ptr += proc_len ptr += leading_whitespaces(data[ptr:]) result[key] = value if data[ptr] == "}": ptr += 1 break assert data[ptr] == "," ptr += 1 return result, ptr def parse_array(data: str) -> Tuple[list, int]: assert data[0] == "[" result = [] ptr = 1 while ptr < len(data): ptr += leading_whitespaces(data[ptr:]) # Empty array if data[ptr] == "]": ptr += 1 break value, proc_len = parse_json(data[ptr:]) ptr += proc_len result.append(value) ptr += leading_whitespaces(data[ptr:]) if data[ptr] == "]": ptr += 1 break assert data[ptr] == "," ptr += 1 return result, ptr def parse_json(data: str) -> Tuple[Any, int]: """Returns: parsed value and the number of characters consumed.""" ptr = leading_whitespaces(data) if ptr == len(data): return None, ptr if data[ptr] == "{": val, proc_chr = parse_object(data[ptr:]) return val, ptr + proc_chr elif data[ptr] == "[": val, proc_chr = parse_array(data[ptr:]) return val, ptr + proc_chr elif data[ptr] == "\"": val, proc_chr = parse_string(data[ptr:]) return val, ptr + proc_chr elif data[ptr:].startswith("true"): return True, ptr + 4 elif data[ptr:].startswith("false"): return False, ptr + 5 elif data[ptr:].startswith("null"): return None, ptr + 4 elif data[ptr] in "-+0123456789": val, proc_chr = pares_number(data[ptr:]) return val, ptr + proc_chr else: raise ValueError(f"Unexpected character: {data[ptr:]}")