/* Language: Python Description: Python is an interpreted, object-oriented, high-level programming language with dynamic semantics. Website: https://www.python.org Category: common */ var module = module ? module : {}; // shim for browser use function hljsDefinePython(hljs) { const regex = hljs.regex; const IDENT_RE = /[\p{XID_Start}_]\p{XID_Continue}*/u; const RESERVED_WORDS = [ "and", "as", "assert", "async", "await", "break", "case", "class", "continue", "def", "del", "elif", "else", "except", "finally", "for", "from", "global", "if", "import", "in", "is", "lambda", "match", "nonlocal|10", "not", "or", "pass", "raise", "return", "try", "while", "with", "yield", ]; const BUILT_INS = [ "__import__", "abs", "all", "any", "ascii", "bin", "bool", "breakpoint", "bytearray", "bytes", "callable", "chr", "classmethod", "compile", "complex", "delattr", "dict", "dir", "divmod", "enumerate", "eval", "exec", "filter", "float", "format", "frozenset", "getattr", "globals", "hasattr", "hash", "help", "hex", "id", "input", "int", "isinstance", "issubclass", "iter", "len", "list", "locals", "map", "max", "memoryview", "min", "next", "object", "oct", "open", "ord", "pow", "print", "property", "range", "repr", "reversed", "round", "set", "setattr", "slice", "sorted", "staticmethod", "str", "sum", "super", "tuple", "type", "vars", "zip", ]; const LITERALS = [ "__debug__", "Ellipsis", "False", "None", "NotImplemented", "True", ]; // https://docs.python.org/3/library/typing.html // TODO: Could these be supplemented by a CamelCase matcher in certain // contexts, leaving these remaining only for relevance hinting? const TYPES = [ "Any", "Callable", "Coroutine", "Dict", "List", "Literal", "Generic", "Optional", "Sequence", "Set", "Tuple", "Type", "Union", ]; const KEYWORDS = { $pattern: /[A-Za-z]\w+|__\w+__/, keyword: RESERVED_WORDS, built_in: BUILT_INS, literal: LITERALS, type: TYPES, }; const PROMPT = { className: "meta", begin: /^(>>>|\.\.\.) /, }; const SUBST = { className: "subst", begin: /\{/, end: /\}/, keywords: KEYWORDS, illegal: /#/, }; const LITERAL_BRACKET = { begin: /\{\{/, relevance: 0, }; const STRING = { className: "string", contains: [hljs.BACKSLASH_ESCAPE], variants: [ { begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?'''/, end: /'''/, contains: [hljs.BACKSLASH_ESCAPE, PROMPT], relevance: 10, }, { begin: /([uU]|[bB]|[rR]|[bB][rR]|[rR][bB])?"""/, end: /"""/, contains: [hljs.BACKSLASH_ESCAPE, PROMPT], relevance: 10, }, { begin: /([fF][rR]|[rR][fF]|[fF])'''/, end: /'''/, contains: [hljs.BACKSLASH_ESCAPE, PROMPT, LITERAL_BRACKET, SUBST], }, { begin: /([fF][rR]|[rR][fF]|[fF])"""/, end: /"""/, contains: [hljs.BACKSLASH_ESCAPE, PROMPT, LITERAL_BRACKET, SUBST], }, { begin: /([uU]|[rR])'/, end: /'/, relevance: 10, }, { begin: /([uU]|[rR])"/, end: /"/, relevance: 10, }, { begin: /([bB]|[bB][rR]|[rR][bB])'/, end: /'/, }, { begin: /([bB]|[bB][rR]|[rR][bB])"/, end: /"/, }, { begin: /([fF][rR]|[rR][fF]|[fF])'/, end: /'/, contains: [hljs.BACKSLASH_ESCAPE, LITERAL_BRACKET, SUBST], }, { begin: /([fF][rR]|[rR][fF]|[fF])"/, end: /"/, contains: [hljs.BACKSLASH_ESCAPE, LITERAL_BRACKET, SUBST], }, hljs.APOS_STRING_MODE, hljs.QUOTE_STRING_MODE, ], }; // https://docs.python.org/3.9/reference/lexical_analysis.html#numeric-literals const digitpart = "[0-9](_?[0-9])*"; const pointfloat = `(\\b(${digitpart}))?\\.(${digitpart})|\\b(${digitpart})\\.`; // Whitespace after a number (or any lexical token) is needed only if its absence // would change the tokenization // https://docs.python.org/3.9/reference/lexical_analysis.html#whitespace-between-tokens // We deviate slightly, requiring a word boundary or a keyword // to avoid accidentally recognizing *prefixes* (e.g., `0` in `0x41` or `08` or `0__1`) const lookahead = `\\b|${RESERVED_WORDS.join("|")}`; const NUMBER = { className: "number", relevance: 0, variants: [ // exponentfloat, pointfloat // https://docs.python.org/3.9/reference/lexical_analysis.html#floating-point-literals // optionally imaginary // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals // Note: no leading \b because floats can start with a decimal point // and we don't want to mishandle e.g. `fn(.5)`, // no trailing \b for pointfloat because it can end with a decimal point // and we don't want to mishandle e.g. `0..hex()`; this should be safe // because both MUST contain a decimal point and so cannot be confused with // the interior part of an identifier { begin: `(\\b(${digitpart})|(${pointfloat}))[eE][+-]?(${digitpart})[jJ]?(?=${lookahead})`, }, { begin: `(${pointfloat})[jJ]?`, }, // decinteger, bininteger, octinteger, hexinteger // https://docs.python.org/3.9/reference/lexical_analysis.html#integer-literals // optionally "long" in Python 2 // https://docs.python.org/2.7/reference/lexical_analysis.html#integer-and-long-integer-literals // decinteger is optionally imaginary // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals { begin: `\\b([1-9](_?[0-9])*|0+(_?0)*)[lLjJ]?(?=${lookahead})`, }, { begin: `\\b0[bB](_?[01])+[lL]?(?=${lookahead})`, }, { begin: `\\b0[oO](_?[0-7])+[lL]?(?=${lookahead})`, }, { begin: `\\b0[xX](_?[0-9a-fA-F])+[lL]?(?=${lookahead})`, }, // imagnumber (digitpart-based) // https://docs.python.org/3.9/reference/lexical_analysis.html#imaginary-literals { begin: `\\b(${digitpart})[jJ](?=${lookahead})`, }, ], }; const COMMENT_TYPE = { className: "comment", begin: regex.lookahead(/# type:/), end: /$/, keywords: KEYWORDS, contains: [ { // prevent keywords from coloring `type` begin: /# type:/, }, // comment within a datatype comment includes no keywords { begin: /#/, end: /\b\B/, endsWithParent: true, }, ], }; const PARAMS = { className: "params", variants: [ // Exclude params in functions without params { className: "", begin: /\(\s*\)/, skip: true, }, { begin: /\(/, end: /\)/, excludeBegin: true, excludeEnd: true, keywords: KEYWORDS, contains: ["self", PROMPT, NUMBER, STRING, hljs.HASH_COMMENT_MODE], }, ], }; SUBST.contains = [STRING, NUMBER, PROMPT]; return { name: "Python", aliases: ["py", "gyp", "ipython"], unicodeRegex: true, keywords: KEYWORDS, illegal: /(<\/|\?)|=>/, contains: [ PROMPT, NUMBER, { // very common convention scope: "variable.language", match: /\bself\b/, }, { // eat "if" prior to string so that it won't accidentally be // labeled as an f-string beginKeywords: "if", relevance: 0, }, { match: /\bor\b/, scope: "keyword" }, STRING, COMMENT_TYPE, hljs.HASH_COMMENT_MODE, { match: [/\bdef/, /\s+/, IDENT_RE], scope: { 1: "keyword", 3: "title.function", }, contains: [PARAMS], }, { variants: [ { match: [ /\bclass/, /\s+/, IDENT_RE, /\s*/, /\(\s*/, IDENT_RE, /\s*\)/, ], }, { match: [/\bclass/, /\s+/, IDENT_RE], }, ], scope: { 1: "keyword", 3: "title.class", 6: "title.class.inherited", }, }, { className: "meta", begin: /^[\t ]*@/, end: /(?=#)|$/, contains: [NUMBER, PARAMS, STRING], }, ], }; } module.exports = function (hljs) { hljs.registerLanguage("py", hljsDefinePython); }; module.exports.definer = hljsDefinePython;