Skip to content

Instantly share code, notes, and snippets.

@vsemozhetbyt
Created March 5, 2018 02:01
Show Gist options
  • Select an option

  • Save vsemozhetbyt/893044264a088b96025b2d51ed17aec3 to your computer and use it in GitHub Desktop.

Select an option

Save vsemozhetbyt/893044264a088b96025b2d51ed17aec3 to your computer and use it in GitHub Desktop.

Revisions

  1. vsemozhetbyt created this gist Mar 5, 2018.
    267 changes: 267 additions & 0 deletions re-unicode-properties.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,267 @@
    'use strict';

    const unicodeProperties = {
    nonBinaryNames: {
    General_Category: [
    'Letter',
    'Cased_Letter',
    'Uppercase_Letter',
    'Lowercase_Letter',
    'Titlecase_Letter',
    'Modifier_Letter',
    'Other_Letter',

    'Mark',
    'Nonspacing_Mark',
    'Spacing_Mark',
    'Enclosing_Mark',

    'Number',
    'Decimal_Number',
    'Letter_Number',
    'Other_Number',

    'Symbol',
    'Math_Symbol',
    'Currency_Symbol',
    'Modifier_Symbol',
    'Other_Symbol',

    'Punctuation',
    'Connector_Punctuation',
    'Dash_Punctuation',
    'Open_Punctuation',
    'Close_Punctuation',
    'Initial_Punctuation',
    'Final_Punctuation',
    'Other_Punctuation',

    'Separator',
    'Space_Separator',
    'Line_Separator',
    'Paragraph_Separator',

    'Other',
    'Control',
    'Format',
    'Surrogate',
    'Private_Use',
    'Unassigned',
    ],
    Script: [
    'Adlam',
    'Ahom',
    'Anatolian_Hieroglyphs',
    'Arabic',
    'Armenian',
    'Avestan',
    'Balinese',
    'Bamum',
    'Bassa_Vah',
    'Batak',
    'Bengali',
    'Bhaiksuki',
    'Bopomofo',
    'Brahmi',
    'Braille',
    'Buginese',
    'Buhid',
    'Canadian_Aboriginal',
    'Carian',
    'Caucasian_Albanian',
    'Chakma',
    'Cham',
    'Cherokee',
    'Common',
    'Coptic',
    'Cuneiform',
    'Cypriot',
    'Cyrillic',
    'Deseret',
    'Devanagari',
    'Duployan',
    'Egyptian_Hieroglyphs',
    'Elbasan',
    'Ethiopic',
    'Georgian',
    'Glagolitic',
    'Gothic',
    'Grantha',
    'Greek',
    'Gujarati',
    'Gurmukhi',
    'Han',
    'Hangul',
    'Hanunoo',
    'Hatran',
    'Hebrew',
    'Hiragana',
    'Imperial_Aramaic',
    'Inherited',
    'Inscriptional_Pahlavi',
    'Inscriptional_Parthian',
    'Javanese',
    'Kaithi',
    'Kannada',
    'Katakana',
    'Kayah_Li',
    'Kharoshthi',
    'Khmer',
    'Khojki',
    'Khudawadi',
    'Lao',
    'Latin',
    'Lepcha',
    'Limbu',
    'Linear_A',
    'Linear_B',
    'Lisu',
    'Lycian',
    'Lydian',
    'Mahajani',
    'Malayalam',
    'Mandaic',
    'Manichaean',
    'Marchen',
    'Masaram_Gondi',
    'Meetei_Mayek',
    'Mende_Kikakui',
    'Meroitic_Cursive',
    'Meroitic_Hieroglyphs',
    'Miao',
    'Modi',
    'Mongolian',
    'Mro',
    'Multani',
    'Myanmar',
    'Nabataean',
    'New_Tai_Lue',
    'Newa',
    'Nko',
    'Nushu',
    'Ogham',
    'Ol_Chiki',
    'Old_Hungarian',
    'Old_Italic',
    'Old_North_Arabian',
    'Old_Permic',
    'Old_Persian',
    'Old_South_Arabian',
    'Old_Turkic',
    'Oriya',
    'Osage',
    'Osmanya',
    'Pahawh_Hmong',
    'Palmyrene',
    'Pau_Cin_Hau',
    'Phags_Pa',
    'Phoenician',
    'Psalter_Pahlavi',
    'Rejang',
    'Runic',
    'Samaritan',
    'Saurashtra',
    'Sharada',
    'Shavian',
    'Siddham',
    'SignWriting',
    'Sinhala',
    'Sora_Sompeng',
    'Soyombo',
    'Sundanese',
    'Syloti_Nagri',
    'Syriac',
    'Tagalog',
    'Tagbanwa',
    'Tai_Le',
    'Tai_Tham',
    'Tai_Viet',
    'Takri',
    'Tamil',
    'Tangut',
    'Telugu',
    'Thaana',
    'Thai',
    'Tibetan',
    'Tifinagh',
    'Tirhuta',
    'Ugaritic',
    'Vai',
    'Warang_Citi',
    'Yi',
    'Zanabazar_Square',
    ],
    get Script_Extensions() {
    return this.Script;
    },
    },
    binaryNames: [
    'ASCII',
    'ASCII_Hex_Digit',
    'Alphabetic',
    'Any',
    'Assigned',
    'Bidi_Control',
    'Bidi_Mirrored',
    'Case_Ignorable',
    'Cased',
    'Changes_When_Casefolded',
    'Changes_When_Casemapped',
    'Changes_When_Lowercased',
    'Changes_When_NFKC_Casefolded',
    'Changes_When_Titlecased',
    'Changes_When_Uppercased',
    'Dash',
    'Default_Ignorable_Code_Point',
    'Deprecated',
    'Diacritic',
    'Emoji',
    'Emoji_Component',
    'Emoji_Modifier',
    'Emoji_Modifier_Base',
    'Emoji_Presentation',
    'Extender',
    'Grapheme_Base',
    'Grapheme_Extend',
    'Hex_Digit',
    'IDS_Binary_Operator',
    'IDS_Trinary_Operator',
    'ID_Continue',
    'ID_Start',
    'Ideographic',
    'Join_Control',
    'Logical_Order_Exception',
    'Lowercase',
    'Math',
    'Noncharacter_Code_Point',
    'Pattern_Syntax',
    'Pattern_White_Space',
    'Quotation_Mark',
    'Radical',
    'Regional_Indicator',
    'Sentence_Terminal',
    'Soft_Dotted',
    'Terminal_Punctuation',
    'Unified_Ideograph',
    'Uppercase',
    'Variation_Selector',
    'White_Space',
    'XID_Continue',
    'XID_Start',
    ],
    };


    module.exports = [
    ...unicodeProperties.nonBinaryNames.General_Category
    .map(value => RegExp(`\\p{gc=${value}}`, 'u')),

    ...unicodeProperties.nonBinaryNames.Script
    .map(value => RegExp(`\\p{sc=${value}}`, 'u')),

    ...unicodeProperties.nonBinaryNames.Script_Extensions
    .map(value => RegExp(`\\p{scx=${value}}`, 'u')),

    ...unicodeProperties.binaryNames
    .map(binaryName => RegExp(`\\p{${binaryName}}`, 'u')),
    ];