from __future__ import annotations import re from typing import Any, Never class JSONParseError(Exception): pass ESCAPE_CHARS = { '"': '"', "\\": "\\", "b": "\b", "f": "\f", "n": "\n", "r": "\r", "t": "\t", } class JSONParser: def __init__(self, text: str) -> None: self.text = text self.pos = 0 def _expect(self, text: str) -> bool: if self.text[self.pos : self.pos + len(text)] == text: self.pos += len(text) return True return False def _expect_re(self, pattern: str) -> str | None: match = re.compile(pattern).match(self.text, self.pos) if match: self.pos = match.end() return match.group() return None def _parse_error(self, message: str = "", pos: int | None = None) -> Never: if pos is None: pos = self.pos if not message: message = f"Unexpected character '{self.text[pos]}'" context = f"{self.text[pos-10:pos+5]}\n{' ' * min(10, pos)}^\n" raise JSONParseError(f"at pos {pos}: {message}\n{context}") def _skip_whitespace(self) -> None: while self.pos < len(self.text) and self.text[self.pos].isspace(): self.pos += 1 def _parse_bool(self) -> bool: if self._expect("true"): return True if self._expect("false"): return False self._parse_error() def _parse_null(self) -> None: if self._expect("null"): return self._parse_error() def _parse_number(self) -> int | float: pattern = r"-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][+-]?[0-9]+)?" number = self._expect_re(pattern) if number is None: self._parse_error() return int(number) if number.isdigit() else float(number) def _parse_string(self) -> str: pos = self.pos text = self.text if text[pos] != '"': self._parse_error() pos += 1 buffer = "" while pos < len(text): if text[pos] == "\\": pos += 1 for escape_char in ESCAPE_CHARS: if text[pos] == escape_char: buffer += ESCAPE_CHARS[escape_char] pos += 1 break else: if text[pos] == "u": pos += 1 if pos + 4 >= len(text): self._parse_error( f"Truncated unicode escape sequence {text[pos - 2:pos + 4]}", pos, ) else: try: buffer += chr(int(text[pos : pos + 4], 16)) pos += 4 except ValueError: self._parse_error( f"Invalid unicode escape sequence {text[pos - 2:pos + 4]}", pos, ) else: self._parse_error("Invalid escape sequence", pos) elif text[pos] == '"': pos += 1 break else: buffer += text[pos] pos += 1 else: self._parse_error("Unterminated string", pos) self.pos = pos return buffer def _parse_array(self) -> list[Any]: if not self._expect("["): self._parse_error() elements: list[Any] = [] last_comma = False while self.pos < len(self.text): self._skip_whitespace() if self._expect("]"): if last_comma: self._parse_error(pos=self.pos - 1) return elements if elements and not last_comma: self._parse_error() elements.append(self.parse_value()) self._skip_whitespace() last_comma = self._expect(",") self._parse_error("Unclosed array") def _parse_object(self) -> dict[str, Any]: if not self._expect("{"): self._parse_error() obj: dict[str, Any] = {} last_comma = False while self.pos < len(self.text): self._skip_whitespace() if self._expect("}"): if last_comma: self._parse_error(pos=self.pos - 1) return obj if obj and not last_comma: self._parse_error() key = self._parse_string() self._skip_whitespace() if not self._expect(":"): self._parse_error() self._skip_whitespace() obj[key] = self.parse_value() self._skip_whitespace() last_comma = self._expect(",") self._parse_error("Unclosed object") def parse_value(self) -> Any: self._skip_whitespace() if self.pos >= len(self.text): self._parse_error("Unexpected end of input") c = self.text[self.pos] if c == '"': return self._parse_string() elif c == "[": return self._parse_array() elif c == "{": return self._parse_object() elif c in "tf": return self._parse_bool() elif c == "n": self._parse_null() return None else: return self._parse_number() def loads(text: str) -> Any: return JSONParser(text).parse_value()