Skip to content

Instantly share code, notes, and snippets.

@abhishekbhardwaj
Forked from ravisorg/test-hashtag-regexp.php
Created December 13, 2018 05:50
Show Gist options
  • Select an option

  • Save abhishekbhardwaj/97fc32e8040e9d5ded354c939dbe3000 to your computer and use it in GitHub Desktop.

Select an option

Save abhishekbhardwaj/97fc32e8040e9d5ded354c939dbe3000 to your computer and use it in GitHub Desktop.

Revisions

  1. @ravisorg ravisorg revised this gist Apr 1, 2017. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion test-hashtag-regexp.php
    Original file line number Diff line number Diff line change
    @@ -13,7 +13,7 @@
    // and is the equivilant (because PHP doesn't support these character classes) of:
    // (?:\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\x{FE0F}?)
    // Generated using https://gist.github.com/ravisorg/23edafbfcbd45de9875adec5310fca76
    $emojiRegexp = '(?:(?:\x{261D}|\x{26F9}|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{1F385}|[\x{1F3C2}-\x{1F3C4}]|\x{1F3C7}|\x{1F3CA}|[\x{1F3CB}-\x{1F3CC}]|[\x{1F442}-\x{1F443}]|[\x{1F446}-\x{1F450}]|[\x{1F466}-\x{1F469}]|\x{1F46E}|[\x{1F470}-\x{1F478}]|\x{1F47C}|[\x{1F481}-\x{1F483}]|[\x{1F485}-\x{1F487}]|\x{1F4AA}|[\x{1F574}-\x{1F575}]|\x{1F57A}|\x{1F590}|[\x{1F595}-\x{1F596}]|[\x{1F645}-\x{1F647}]|[\x{1F64B}-\x{1F64F}]|\x{1F6A3}|[\x{1F6B4}-\x{1F6B6}]|\x{1F6C0}|\x{1F6CC}|\x{1F918}|[\x{1F919}-\x{1F91C}]|\x{1F91E}|\x{1F91F}|\x{1F926}|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F939}]|[\x{1F93D}-\x{1F93E}]|[\x{1F9D1}-\x{1F9DD}])(?:[\x{1F3FB}-\x{1F3FF}])?|(?:[\x{231A}-\x{231B}]|[\x{23E9}-\x{23EC}]|\x{23F0}|\x{23F3}|[\x{25FD}-\x{25FE}]|[\x{2614}-\x{2615}]|[\x{2648}-\x{2653}]|\x{267F}|\x{2693}|\x{26A1}|[\x{26AA}-\x{26AB}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26CE}|\x{26D4}|\x{26EA}|[\x{26F2}-\x{26F3}]|\x{26F5}|\x{26FA}|\x{26FD}|\x{2705}|[\x{270A}-\x{270B}]|\x{2728}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2795}-\x{2797}]|\x{27B0}|\x{27BF}|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{1F004}|\x{1F0CF}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|\x{1F201}|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F236}]|[\x{1F238}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|[\x{1F337}-\x{1F37C}]|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3E0}-\x{1F3F0}]|\x{1F3F4}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F440}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|\x{1F57A}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|\x{1F6CC}|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6EB}-\x{1F6EC}]|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])|(?:\x{0023}|\x{002A}|[\x{0030}-\x{0039}]|\x{00A9}|\x{00AE}|\x{203C}|\x{2049}|\x{2122}|\x{2139}|[\x{2194}-\x{2199}]|[\x{21A9}-\x{21AA}]|[\x{231A}-\x{231B}]|\x{2328}|\x{23CF}|[\x{23E9}-\x{23F3}]|[\x{23F8}-\x{23FA}]|\x{24C2}|[\x{25AA}-\x{25AB}]|\x{25B6}|\x{25C0}|[\x{25FB}-\x{25FE}]|[\x{2600}-\x{2604}]|\x{260E}|\x{2611}|[\x{2614}-\x{2615}]|\x{2618}|\x{261D}|\x{2620}|[\x{2622}-\x{2623}]|\x{2626}|\x{262A}|[\x{262E}-\x{262F}]|[\x{2638}-\x{263A}]|\x{2640}|\x{2642}|[\x{2648}-\x{2653}]|\x{2660}|\x{2663}|[\x{2665}-\x{2666}]|\x{2668}|\x{267B}|\x{267F}|[\x{2692}-\x{2697}]|\x{2699}|[\x{269B}-\x{269C}]|[\x{26A0}-\x{26A1}]|[\x{26AA}-\x{26AB}]|[\x{26B0}-\x{26B1}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26C8}|\x{26CE}|\x{26CF}|\x{26D1}|[\x{26D3}-\x{26D4}]|[\x{26E9}-\x{26EA}]|[\x{26F0}-\x{26F5}]|[\x{26F7}-\x{26FA}]|\x{26FD}|\x{2702}|\x{2705}|[\x{2708}-\x{2709}]|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{270F}|\x{2712}|\x{2714}|\x{2716}|\x{271D}|\x{2721}|\x{2728}|[\x{2733}-\x{2734}]|\x{2744}|\x{2747}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2763}-\x{2764}]|[\x{2795}-\x{2797}]|\x{27A1}|\x{27B0}|\x{27BF}|[\x{2934}-\x{2935}]|[\x{2B05}-\x{2B07}]|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{3030}|\x{303D}|\x{3297}|\x{3299}|\x{1F004}|\x{1F0CF}|[\x{1F170}-\x{1F171}]|\x{1F17E}|\x{1F17F}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|[\x{1F201}-\x{1F202}]|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|\x{1F321}|[\x{1F324}-\x{1F32C}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|\x{1F336}|[\x{1F337}-\x{1F37C}]|\x{1F37D}|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F396}-\x{1F397}]|[\x{1F399}-\x{1F39B}]|[\x{1F39E}-\x{1F39F}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CB}-\x{1F3CE}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3D4}-\x{1F3DF}]|[\x{1F3E0}-\x{1F3F0}]|[\x{1F3F3}-\x{1F3F5}]|\x{1F3F7}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F43F}|\x{1F440}|\x{1F441}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FD}|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F549}-\x{1F54A}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|[\x{1F56F}-\x{1F570}]|[\x{1F573}-\x{1F579}]|\x{1F57A}|\x{1F587}|[\x{1F58A}-\x{1F58D}]|\x{1F590}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|\x{1F5A5}|\x{1F5A8}|[\x{1F5B1}-\x{1F5B2}]|\x{1F5BC}|[\x{1F5C2}-\x{1F5C4}]|[\x{1F5D1}-\x{1F5D3}]|[\x{1F5DC}-\x{1F5DE}]|\x{1F5E1}|\x{1F5E3}|\x{1F5E8}|\x{1F5EF}|\x{1F5F3}|\x{1F5FA}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|[\x{1F6CB}-\x{1F6CF}]|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6E0}-\x{1F6E5}]|\x{1F6E9}|[\x{1F6EB}-\x{1F6EC}]|\x{1F6F0}|\x{1F6F3}|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])\x{FE0F}?)';
    $emojiRegexp = '(?:(?:\x{261D}|\x{26F9}|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{1F385}|[\x{1F3C2}-\x{1F3C4}]|\x{1F3C7}|\x{1F3CA}|[\x{1F3CB}-\x{1F3CC}]|[\x{1F442}-\x{1F443}]|[\x{1F446}-\x{1F450}]|[\x{1F466}-\x{1F469}]|\x{1F46E}|[\x{1F470}-\x{1F478}]|\x{1F47C}|[\x{1F481}-\x{1F483}]|[\x{1F485}-\x{1F487}]|\x{1F4AA}|[\x{1F574}-\x{1F575}]|\x{1F57A}|\x{1F590}|[\x{1F595}-\x{1F596}]|[\x{1F645}-\x{1F647}]|[\x{1F64B}-\x{1F64F}]|\x{1F6A3}|[\x{1F6B4}-\x{1F6B6}]|\x{1F6C0}|\x{1F6CC}|\x{1F918}|[\x{1F919}-\x{1F91C}]|\x{1F91E}|\x{1F91F}|\x{1F926}|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F939}]|[\x{1F93D}-\x{1F93E}]|[\x{1F9D1}-\x{1F9DD}])(?:[\x{1F3FB}-\x{1F3FF}])?|(?:[\x{231A}-\x{231B}]|[\x{23E9}-\x{23EC}]|\x{23F0}|\x{23F3}|[\x{25FD}-\x{25FE}]|[\x{2614}-\x{2615}]|[\x{2648}-\x{2653}]|\x{267F}|\x{2693}|\x{26A1}|[\x{26AA}-\x{26AB}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26CE}|\x{26D4}|\x{26EA}|[\x{26F2}-\x{26F3}]|\x{26F5}|\x{26FA}|\x{26FD}|\x{2705}|[\x{270A}-\x{270B}]|\x{2728}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2795}-\x{2797}]|\x{27B0}|\x{27BF}|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{1F004}|\x{1F0CF}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|\x{1F201}|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F236}]|[\x{1F238}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|[\x{1F337}-\x{1F37C}]|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3E0}-\x{1F3F0}]|\x{1F3F4}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F440}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|\x{1F57A}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|\x{1F6CC}|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6EB}-\x{1F6EC}]|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])|(?:\x{0023}|\x{002A}|[\x{0030}-\x{0039}]|\x{00A9}|\x{00AE}|\x{203C}|\x{2049}|\x{2122}|\x{2139}|[\x{2194}-\x{2199}]|[\x{21A9}-\x{21AA}]|[\x{231A}-\x{231B}]|\x{2328}|\x{23CF}|[\x{23E9}-\x{23F3}]|[\x{23F8}-\x{23FA}]|\x{24C2}|[\x{25AA}-\x{25AB}]|\x{25B6}|\x{25C0}|[\x{25FB}-\x{25FE}]|[\x{2600}-\x{2604}]|\x{260E}|\x{2611}|[\x{2614}-\x{2615}]|\x{2618}|\x{261D}|\x{2620}|[\x{2622}-\x{2623}]|\x{2626}|\x{262A}|[\x{262E}-\x{262F}]|[\x{2638}-\x{263A}]|\x{2640}|\x{2642}|[\x{2648}-\x{2653}]|\x{2660}|\x{2663}|[\x{2665}-\x{2666}]|\x{2668}|\x{267B}|\x{267F}|[\x{2692}-\x{2697}]|\x{2699}|[\x{269B}-\x{269C}]|[\x{26A0}-\x{26A1}]|[\x{26AA}-\x{26AB}]|[\x{26B0}-\x{26B1}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26C8}|\x{26CE}|\x{26CF}|\x{26D1}|[\x{26D3}-\x{26D4}]|[\x{26E9}-\x{26EA}]|[\x{26F0}-\x{26F5}]|[\x{26F7}-\x{26FA}]|\x{26FD}|\x{2702}|\x{2705}|[\x{2708}-\x{2709}]|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{270F}|\x{2712}|\x{2714}|\x{2716}|\x{271D}|\x{2721}|\x{2728}|[\x{2733}-\x{2734}]|\x{2744}|\x{2747}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2763}-\x{2764}]|[\x{2795}-\x{2797}]|\x{27A1}|\x{27B0}|\x{27BF}|[\x{2934}-\x{2935}]|[\x{2B05}-\x{2B07}]|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{3030}|\x{303D}|\x{3297}|\x{3299}|\x{1F004}|\x{1F0CF}|[\x{1F170}-\x{1F171}]|\x{1F17E}|\x{1F17F}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|[\x{1F201}-\x{1F202}]|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|\x{1F321}|[\x{1F324}-\x{1F32C}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|\x{1F336}|[\x{1F337}-\x{1F37C}]|\x{1F37D}|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F396}-\x{1F397}]|[\x{1F399}-\x{1F39B}]|[\x{1F39E}-\x{1F39F}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CB}-\x{1F3CE}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3D4}-\x{1F3DF}]|[\x{1F3E0}-\x{1F3F0}]|[\x{1F3F3}-\x{1F3F5}]|\x{1F3F7}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F43F}|\x{1F440}|\x{1F441}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FD}|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F549}-\x{1F54A}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|[\x{1F56F}-\x{1F570}]|[\x{1F573}-\x{1F579}]|\x{1F57A}|\x{1F587}|[\x{1F58A}-\x{1F58D}]|\x{1F590}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|\x{1F5A5}|\x{1F5A8}|[\x{1F5B1}-\x{1F5B2}]|\x{1F5BC}|[\x{1F5C2}-\x{1F5C4}]|[\x{1F5D1}-\x{1F5D3}]|[\x{1F5DC}-\x{1F5DE}]|\x{1F5E1}|\x{1F5E3}|\x{1F5E8}|\x{1F5EF}|\x{1F5F3}|\x{1F5FA}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|[\x{1F6CB}-\x{1F6CF}]|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6E0}-\x{1F6E5}]|\x{1F6E9}|[\x{1F6EB}-\x{1F6EC}]|\x{1F6F0}|\x{1F6F3}|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])\x{FE0F})';

    $hashtagRegexp = '/'.

  2. @ravisorg ravisorg revised this gist Apr 1, 2017. No changes.
  3. @ravisorg ravisorg revised this gist Apr 1, 2017. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions test-hashtag-regexp.php
    Original file line number Diff line number Diff line change
    @@ -37,12 +37,12 @@
    '_'. // Underscore
    ')+'.

    // Which can (optionally) be followed by more numbers
    '\d*'.

    // Repeat this group at least once (we need at least one character for a hashtag)
    ')+'.

    // Which can (optionally) be followed by more numbers
    '\d*'.

    // end the actual hashtag capture group
    ')'.

  4. @ravisorg ravisorg revised this gist Apr 1, 2017. 1 changed file with 20 additions and 16 deletions.
    36 changes: 20 additions & 16 deletions test-hashtag-regexp.php
    Original file line number Diff line number Diff line change
    @@ -20,26 +20,30 @@
    // Start with a pound sign (or a unicode variant of that)
    '[##]'.

    // Make sure the entire hashtag isn't just numbers (eg: #1)
    '(?!\d+\b)'.

    // capture the entire hashtag
    // Capture the entire hashtag
    '('.

    // Note that twitter requires the first character to be a non-numeric one (so #2 would not be a
    // hashtag, for example). ADN DID allow #2 as a hashtag. So I'm supporting the easier/more efficient
    // version (#2 is a valid hashtag). That can be easily changed if desired.
    // Defines a group of (possible) digits and (required) valid non digit characters
    '(?:'.

    // We can optionally start with one or more numbers, so long as the number is...
    '\d*'.

    // Followed by one or more valid hashtag characters that aren't numbers...
    '(?:'.
    '\p{L}|'. // Any language letter
    '\p{M}|'. // Any language letter modifier
    $emojiRegexp.'|'. // Any valid emoji
    '_'. // Underscore
    ')+'.

    // Which can (optionally) be followed by more numbers
    '\d*'.

    // Followed by one or more valid hashtag characters...
    '(?:'.
    '\p{L}|'. // Any language letter
    '\p{M}|'. // Any language letter modifier
    $emojiRegexp.'|'. // Any valid emoji
    '\d|'. // Any number
    '_'. // Underscore
    ')+'.
    // Repeat this group at least once (we need at least one character for a hashtag)
    ')+'.

    // end capturing the hashtag
    // end the actual hashtag capture group
    ')'.

    // use unicode modifiers / unicode strings
  5. @ravisorg ravisorg revised this gist Apr 1, 2017. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions test-hashtag-regexp.php
    Original file line number Diff line number Diff line change
    @@ -20,6 +20,9 @@
    // Start with a pound sign (or a unicode variant of that)
    '[##]'.

    // Make sure the entire hashtag isn't just numbers (eg: #1)
    '(?!\d+\b)'.

    // capture the entire hashtag
    '('.

  6. @ravisorg ravisorg revised this gist Mar 30, 2017. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion test-hashtag-regexp.php
    Original file line number Diff line number Diff line change
    @@ -9,9 +9,10 @@


    // Emoji are spread throughout the Unicode codespace and can contain modifiers. This regexp
    // was automatically generated from http://ftp.unicode.org/Public/emoji/5.0/emoji-data.txt
    // was automatically generated from http://ftp.unicode.org/Public/emoji/1.0/emoji-data.txt
    // and is the equivilant (because PHP doesn't support these character classes) of:
    // (?:\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\x{FE0F}?)
    // Generated using https://gist.github.com/ravisorg/23edafbfcbd45de9875adec5310fca76
    $emojiRegexp = '(?:(?:\x{261D}|\x{26F9}|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{1F385}|[\x{1F3C2}-\x{1F3C4}]|\x{1F3C7}|\x{1F3CA}|[\x{1F3CB}-\x{1F3CC}]|[\x{1F442}-\x{1F443}]|[\x{1F446}-\x{1F450}]|[\x{1F466}-\x{1F469}]|\x{1F46E}|[\x{1F470}-\x{1F478}]|\x{1F47C}|[\x{1F481}-\x{1F483}]|[\x{1F485}-\x{1F487}]|\x{1F4AA}|[\x{1F574}-\x{1F575}]|\x{1F57A}|\x{1F590}|[\x{1F595}-\x{1F596}]|[\x{1F645}-\x{1F647}]|[\x{1F64B}-\x{1F64F}]|\x{1F6A3}|[\x{1F6B4}-\x{1F6B6}]|\x{1F6C0}|\x{1F6CC}|\x{1F918}|[\x{1F919}-\x{1F91C}]|\x{1F91E}|\x{1F91F}|\x{1F926}|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F939}]|[\x{1F93D}-\x{1F93E}]|[\x{1F9D1}-\x{1F9DD}])(?:[\x{1F3FB}-\x{1F3FF}])?|(?:[\x{231A}-\x{231B}]|[\x{23E9}-\x{23EC}]|\x{23F0}|\x{23F3}|[\x{25FD}-\x{25FE}]|[\x{2614}-\x{2615}]|[\x{2648}-\x{2653}]|\x{267F}|\x{2693}|\x{26A1}|[\x{26AA}-\x{26AB}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26CE}|\x{26D4}|\x{26EA}|[\x{26F2}-\x{26F3}]|\x{26F5}|\x{26FA}|\x{26FD}|\x{2705}|[\x{270A}-\x{270B}]|\x{2728}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2795}-\x{2797}]|\x{27B0}|\x{27BF}|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{1F004}|\x{1F0CF}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|\x{1F201}|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F236}]|[\x{1F238}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|[\x{1F337}-\x{1F37C}]|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3E0}-\x{1F3F0}]|\x{1F3F4}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F440}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|\x{1F57A}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|\x{1F6CC}|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6EB}-\x{1F6EC}]|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])|(?:\x{0023}|\x{002A}|[\x{0030}-\x{0039}]|\x{00A9}|\x{00AE}|\x{203C}|\x{2049}|\x{2122}|\x{2139}|[\x{2194}-\x{2199}]|[\x{21A9}-\x{21AA}]|[\x{231A}-\x{231B}]|\x{2328}|\x{23CF}|[\x{23E9}-\x{23F3}]|[\x{23F8}-\x{23FA}]|\x{24C2}|[\x{25AA}-\x{25AB}]|\x{25B6}|\x{25C0}|[\x{25FB}-\x{25FE}]|[\x{2600}-\x{2604}]|\x{260E}|\x{2611}|[\x{2614}-\x{2615}]|\x{2618}|\x{261D}|\x{2620}|[\x{2622}-\x{2623}]|\x{2626}|\x{262A}|[\x{262E}-\x{262F}]|[\x{2638}-\x{263A}]|\x{2640}|\x{2642}|[\x{2648}-\x{2653}]|\x{2660}|\x{2663}|[\x{2665}-\x{2666}]|\x{2668}|\x{267B}|\x{267F}|[\x{2692}-\x{2697}]|\x{2699}|[\x{269B}-\x{269C}]|[\x{26A0}-\x{26A1}]|[\x{26AA}-\x{26AB}]|[\x{26B0}-\x{26B1}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26C8}|\x{26CE}|\x{26CF}|\x{26D1}|[\x{26D3}-\x{26D4}]|[\x{26E9}-\x{26EA}]|[\x{26F0}-\x{26F5}]|[\x{26F7}-\x{26FA}]|\x{26FD}|\x{2702}|\x{2705}|[\x{2708}-\x{2709}]|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{270F}|\x{2712}|\x{2714}|\x{2716}|\x{271D}|\x{2721}|\x{2728}|[\x{2733}-\x{2734}]|\x{2744}|\x{2747}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2763}-\x{2764}]|[\x{2795}-\x{2797}]|\x{27A1}|\x{27B0}|\x{27BF}|[\x{2934}-\x{2935}]|[\x{2B05}-\x{2B07}]|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{3030}|\x{303D}|\x{3297}|\x{3299}|\x{1F004}|\x{1F0CF}|[\x{1F170}-\x{1F171}]|\x{1F17E}|\x{1F17F}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|[\x{1F201}-\x{1F202}]|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|\x{1F321}|[\x{1F324}-\x{1F32C}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|\x{1F336}|[\x{1F337}-\x{1F37C}]|\x{1F37D}|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F396}-\x{1F397}]|[\x{1F399}-\x{1F39B}]|[\x{1F39E}-\x{1F39F}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CB}-\x{1F3CE}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3D4}-\x{1F3DF}]|[\x{1F3E0}-\x{1F3F0}]|[\x{1F3F3}-\x{1F3F5}]|\x{1F3F7}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F43F}|\x{1F440}|\x{1F441}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FD}|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F549}-\x{1F54A}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|[\x{1F56F}-\x{1F570}]|[\x{1F573}-\x{1F579}]|\x{1F57A}|\x{1F587}|[\x{1F58A}-\x{1F58D}]|\x{1F590}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|\x{1F5A5}|\x{1F5A8}|[\x{1F5B1}-\x{1F5B2}]|\x{1F5BC}|[\x{1F5C2}-\x{1F5C4}]|[\x{1F5D1}-\x{1F5D3}]|[\x{1F5DC}-\x{1F5DE}]|\x{1F5E1}|\x{1F5E3}|\x{1F5E8}|\x{1F5EF}|\x{1F5F3}|\x{1F5FA}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|[\x{1F6CB}-\x{1F6CF}]|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6E0}-\x{1F6E5}]|\x{1F6E9}|[\x{1F6EB}-\x{1F6EC}]|\x{1F6F0}|\x{1F6F3}|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])\x{FE0F}?)';

    $hashtagRegexp = '/'.
  7. @ravisorg ravisorg revised this gist Mar 30, 2017. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion test-hashtag-regexp.php
    Original file line number Diff line number Diff line change
    @@ -9,7 +9,7 @@


    // Emoji are spread throughout the Unicode codespace and can contain modifiers. This regexp
    // was automatically generated from http://ftp.unicode.org/Public/emoji/1.0/emoji-data.txt
    // was automatically generated from http://ftp.unicode.org/Public/emoji/5.0/emoji-data.txt
    // and is the equivilant (because PHP doesn't support these character classes) of:
    // (?:\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\x{FE0F}?)
    $emojiRegexp = '(?:(?:\x{261D}|\x{26F9}|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{1F385}|[\x{1F3C2}-\x{1F3C4}]|\x{1F3C7}|\x{1F3CA}|[\x{1F3CB}-\x{1F3CC}]|[\x{1F442}-\x{1F443}]|[\x{1F446}-\x{1F450}]|[\x{1F466}-\x{1F469}]|\x{1F46E}|[\x{1F470}-\x{1F478}]|\x{1F47C}|[\x{1F481}-\x{1F483}]|[\x{1F485}-\x{1F487}]|\x{1F4AA}|[\x{1F574}-\x{1F575}]|\x{1F57A}|\x{1F590}|[\x{1F595}-\x{1F596}]|[\x{1F645}-\x{1F647}]|[\x{1F64B}-\x{1F64F}]|\x{1F6A3}|[\x{1F6B4}-\x{1F6B6}]|\x{1F6C0}|\x{1F6CC}|\x{1F918}|[\x{1F919}-\x{1F91C}]|\x{1F91E}|\x{1F91F}|\x{1F926}|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F939}]|[\x{1F93D}-\x{1F93E}]|[\x{1F9D1}-\x{1F9DD}])(?:[\x{1F3FB}-\x{1F3FF}])?|(?:[\x{231A}-\x{231B}]|[\x{23E9}-\x{23EC}]|\x{23F0}|\x{23F3}|[\x{25FD}-\x{25FE}]|[\x{2614}-\x{2615}]|[\x{2648}-\x{2653}]|\x{267F}|\x{2693}|\x{26A1}|[\x{26AA}-\x{26AB}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26CE}|\x{26D4}|\x{26EA}|[\x{26F2}-\x{26F3}]|\x{26F5}|\x{26FA}|\x{26FD}|\x{2705}|[\x{270A}-\x{270B}]|\x{2728}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2795}-\x{2797}]|\x{27B0}|\x{27BF}|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{1F004}|\x{1F0CF}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|\x{1F201}|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F236}]|[\x{1F238}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|[\x{1F337}-\x{1F37C}]|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3E0}-\x{1F3F0}]|\x{1F3F4}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F440}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|\x{1F57A}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|\x{1F6CC}|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6EB}-\x{1F6EC}]|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])|(?:\x{0023}|\x{002A}|[\x{0030}-\x{0039}]|\x{00A9}|\x{00AE}|\x{203C}|\x{2049}|\x{2122}|\x{2139}|[\x{2194}-\x{2199}]|[\x{21A9}-\x{21AA}]|[\x{231A}-\x{231B}]|\x{2328}|\x{23CF}|[\x{23E9}-\x{23F3}]|[\x{23F8}-\x{23FA}]|\x{24C2}|[\x{25AA}-\x{25AB}]|\x{25B6}|\x{25C0}|[\x{25FB}-\x{25FE}]|[\x{2600}-\x{2604}]|\x{260E}|\x{2611}|[\x{2614}-\x{2615}]|\x{2618}|\x{261D}|\x{2620}|[\x{2622}-\x{2623}]|\x{2626}|\x{262A}|[\x{262E}-\x{262F}]|[\x{2638}-\x{263A}]|\x{2640}|\x{2642}|[\x{2648}-\x{2653}]|\x{2660}|\x{2663}|[\x{2665}-\x{2666}]|\x{2668}|\x{267B}|\x{267F}|[\x{2692}-\x{2697}]|\x{2699}|[\x{269B}-\x{269C}]|[\x{26A0}-\x{26A1}]|[\x{26AA}-\x{26AB}]|[\x{26B0}-\x{26B1}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26C8}|\x{26CE}|\x{26CF}|\x{26D1}|[\x{26D3}-\x{26D4}]|[\x{26E9}-\x{26EA}]|[\x{26F0}-\x{26F5}]|[\x{26F7}-\x{26FA}]|\x{26FD}|\x{2702}|\x{2705}|[\x{2708}-\x{2709}]|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{270F}|\x{2712}|\x{2714}|\x{2716}|\x{271D}|\x{2721}|\x{2728}|[\x{2733}-\x{2734}]|\x{2744}|\x{2747}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2763}-\x{2764}]|[\x{2795}-\x{2797}]|\x{27A1}|\x{27B0}|\x{27BF}|[\x{2934}-\x{2935}]|[\x{2B05}-\x{2B07}]|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{3030}|\x{303D}|\x{3297}|\x{3299}|\x{1F004}|\x{1F0CF}|[\x{1F170}-\x{1F171}]|\x{1F17E}|\x{1F17F}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|[\x{1F201}-\x{1F202}]|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|\x{1F321}|[\x{1F324}-\x{1F32C}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|\x{1F336}|[\x{1F337}-\x{1F37C}]|\x{1F37D}|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F396}-\x{1F397}]|[\x{1F399}-\x{1F39B}]|[\x{1F39E}-\x{1F39F}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CB}-\x{1F3CE}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3D4}-\x{1F3DF}]|[\x{1F3E0}-\x{1F3F0}]|[\x{1F3F3}-\x{1F3F5}]|\x{1F3F7}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F43F}|\x{1F440}|\x{1F441}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FD}|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F549}-\x{1F54A}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|[\x{1F56F}-\x{1F570}]|[\x{1F573}-\x{1F579}]|\x{1F57A}|\x{1F587}|[\x{1F58A}-\x{1F58D}]|\x{1F590}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|\x{1F5A5}|\x{1F5A8}|[\x{1F5B1}-\x{1F5B2}]|\x{1F5BC}|[\x{1F5C2}-\x{1F5C4}]|[\x{1F5D1}-\x{1F5D3}]|[\x{1F5DC}-\x{1F5DE}]|\x{1F5E1}|\x{1F5E3}|\x{1F5E8}|\x{1F5EF}|\x{1F5F3}|\x{1F5FA}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|[\x{1F6CB}-\x{1F6CF}]|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6E0}-\x{1F6E5}]|\x{1F6E9}|[\x{1F6EB}-\x{1F6EC}]|\x{1F6F0}|\x{1F6F3}|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])\x{FE0F}?)';
  8. @ravisorg ravisorg revised this gist Mar 30, 2017. 1 changed file with 4 additions and 3 deletions.
    7 changes: 4 additions & 3 deletions test-hashtag-regexp.php
    Original file line number Diff line number Diff line change
    @@ -8,9 +8,10 @@
    // For curiosity's sake, post number 693,847 is an emoji hashtag: #(heart)


    // Emoji are spread throughout the Unicode codespace and can contain modifiers. This is one of
    // those "don't reinvent the wheel" type things, and was appropriated / liberated from
    // https://github.com/gmac/gemoji-parser/blob/master/output/rx_unicode.rb
    // Emoji are spread throughout the Unicode codespace and can contain modifiers. This regexp
    // was automatically generated from http://ftp.unicode.org/Public/emoji/1.0/emoji-data.txt
    // and is the equivilant (because PHP doesn't support these character classes) of:
    // (?:\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\x{FE0F}?)
    $emojiRegexp = '(?:(?:\x{261D}|\x{26F9}|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{1F385}|[\x{1F3C2}-\x{1F3C4}]|\x{1F3C7}|\x{1F3CA}|[\x{1F3CB}-\x{1F3CC}]|[\x{1F442}-\x{1F443}]|[\x{1F446}-\x{1F450}]|[\x{1F466}-\x{1F469}]|\x{1F46E}|[\x{1F470}-\x{1F478}]|\x{1F47C}|[\x{1F481}-\x{1F483}]|[\x{1F485}-\x{1F487}]|\x{1F4AA}|[\x{1F574}-\x{1F575}]|\x{1F57A}|\x{1F590}|[\x{1F595}-\x{1F596}]|[\x{1F645}-\x{1F647}]|[\x{1F64B}-\x{1F64F}]|\x{1F6A3}|[\x{1F6B4}-\x{1F6B6}]|\x{1F6C0}|\x{1F6CC}|\x{1F918}|[\x{1F919}-\x{1F91C}]|\x{1F91E}|\x{1F91F}|\x{1F926}|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F939}]|[\x{1F93D}-\x{1F93E}]|[\x{1F9D1}-\x{1F9DD}])(?:[\x{1F3FB}-\x{1F3FF}])?|(?:[\x{231A}-\x{231B}]|[\x{23E9}-\x{23EC}]|\x{23F0}|\x{23F3}|[\x{25FD}-\x{25FE}]|[\x{2614}-\x{2615}]|[\x{2648}-\x{2653}]|\x{267F}|\x{2693}|\x{26A1}|[\x{26AA}-\x{26AB}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26CE}|\x{26D4}|\x{26EA}|[\x{26F2}-\x{26F3}]|\x{26F5}|\x{26FA}|\x{26FD}|\x{2705}|[\x{270A}-\x{270B}]|\x{2728}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2795}-\x{2797}]|\x{27B0}|\x{27BF}|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{1F004}|\x{1F0CF}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|\x{1F201}|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F236}]|[\x{1F238}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|[\x{1F337}-\x{1F37C}]|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3E0}-\x{1F3F0}]|\x{1F3F4}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F440}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|\x{1F57A}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|\x{1F6CC}|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6EB}-\x{1F6EC}]|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])|(?:\x{0023}|\x{002A}|[\x{0030}-\x{0039}]|\x{00A9}|\x{00AE}|\x{203C}|\x{2049}|\x{2122}|\x{2139}|[\x{2194}-\x{2199}]|[\x{21A9}-\x{21AA}]|[\x{231A}-\x{231B}]|\x{2328}|\x{23CF}|[\x{23E9}-\x{23F3}]|[\x{23F8}-\x{23FA}]|\x{24C2}|[\x{25AA}-\x{25AB}]|\x{25B6}|\x{25C0}|[\x{25FB}-\x{25FE}]|[\x{2600}-\x{2604}]|\x{260E}|\x{2611}|[\x{2614}-\x{2615}]|\x{2618}|\x{261D}|\x{2620}|[\x{2622}-\x{2623}]|\x{2626}|\x{262A}|[\x{262E}-\x{262F}]|[\x{2638}-\x{263A}]|\x{2640}|\x{2642}|[\x{2648}-\x{2653}]|\x{2660}|\x{2663}|[\x{2665}-\x{2666}]|\x{2668}|\x{267B}|\x{267F}|[\x{2692}-\x{2697}]|\x{2699}|[\x{269B}-\x{269C}]|[\x{26A0}-\x{26A1}]|[\x{26AA}-\x{26AB}]|[\x{26B0}-\x{26B1}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26C8}|\x{26CE}|\x{26CF}|\x{26D1}|[\x{26D3}-\x{26D4}]|[\x{26E9}-\x{26EA}]|[\x{26F0}-\x{26F5}]|[\x{26F7}-\x{26FA}]|\x{26FD}|\x{2702}|\x{2705}|[\x{2708}-\x{2709}]|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{270F}|\x{2712}|\x{2714}|\x{2716}|\x{271D}|\x{2721}|\x{2728}|[\x{2733}-\x{2734}]|\x{2744}|\x{2747}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2763}-\x{2764}]|[\x{2795}-\x{2797}]|\x{27A1}|\x{27B0}|\x{27BF}|[\x{2934}-\x{2935}]|[\x{2B05}-\x{2B07}]|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{3030}|\x{303D}|\x{3297}|\x{3299}|\x{1F004}|\x{1F0CF}|[\x{1F170}-\x{1F171}]|\x{1F17E}|\x{1F17F}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|[\x{1F201}-\x{1F202}]|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|\x{1F321}|[\x{1F324}-\x{1F32C}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|\x{1F336}|[\x{1F337}-\x{1F37C}]|\x{1F37D}|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F396}-\x{1F397}]|[\x{1F399}-\x{1F39B}]|[\x{1F39E}-\x{1F39F}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CB}-\x{1F3CE}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3D4}-\x{1F3DF}]|[\x{1F3E0}-\x{1F3F0}]|[\x{1F3F3}-\x{1F3F5}]|\x{1F3F7}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F43F}|\x{1F440}|\x{1F441}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FD}|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F549}-\x{1F54A}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|[\x{1F56F}-\x{1F570}]|[\x{1F573}-\x{1F579}]|\x{1F57A}|\x{1F587}|[\x{1F58A}-\x{1F58D}]|\x{1F590}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|\x{1F5A5}|\x{1F5A8}|[\x{1F5B1}-\x{1F5B2}]|\x{1F5BC}|[\x{1F5C2}-\x{1F5C4}]|[\x{1F5D1}-\x{1F5D3}]|[\x{1F5DC}-\x{1F5DE}]|\x{1F5E1}|\x{1F5E3}|\x{1F5E8}|\x{1F5EF}|\x{1F5F3}|\x{1F5FA}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|[\x{1F6CB}-\x{1F6CF}]|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6E0}-\x{1F6E5}]|\x{1F6E9}|[\x{1F6EB}-\x{1F6EC}]|\x{1F6F0}|\x{1F6F3}|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])\x{FE0F}?)';

    $hashtagRegexp = '/'.
  9. @ravisorg ravisorg revised this gist Mar 30, 2017. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion test-hashtag-regexp.php
    Original file line number Diff line number Diff line change
    @@ -11,7 +11,7 @@
    // Emoji are spread throughout the Unicode codespace and can contain modifiers. This is one of
    // those "don't reinvent the wheel" type things, and was appropriated / liberated from
    // https://github.com/gmac/gemoji-parser/blob/master/output/rx_unicode.rb
    $emojiRegexp = '(?:\x{1f602}|\x{2665}|\x{2764}|\x{1f60d}|\x{1f612}|\x{1f60a}|\x{1f62d}|\x{1f44c}|\x{263a}|\x{1f618}|\x{1f495}|\x{1f629}|\x{1f614}|\x{1f60f}|\x{1f601}|\x{1f633}|\x{1f64f}|\x{1f64c}|\x{1f44d}|\x{270c}|\x{1f609}|\x{1f60c}|\x{1f648}|\x{1f481}|\x{1f60e}|\x{1f3b6}|\x{1f440}|\x{1f604}|\x{1f634}|\x{1f611}|\x{1f61c}|\x{1f622}|\x{1f61e}|\x{1f4af}|\x{1f496}|\x{1f60b}|\x{1f44f}|\x{1f49c}|\x{2728}|\x{1f615}|\x{1f64a}|\x{270b}|\x{1f499}|\x{1f610}|\x{1f605}|\x{1f494}|\x{1f62a}|\x{1f497}|\x{1f48b}|\x{1f49e}|\x{1f631}|\x{1f498}|\x{1f621}|\x{1f62b}|\x{2600}|\x{1f44a}|\x{1f603}|\x{1f338}|\x{1f608}|\x{1f61d}|\x{1f624}|\x{1f44b}|\x{1f339}|\x{2714}|\x{1f4aa}|\x{1f389}|\x{1f637}|\x{1f49b}|\x{1f623}|\x{1f49a}|\x{1f63b}|\x{1f606}|\x{1f449}|\x{1f613}|\x{1f616}|\x{1f480}|\x{1f61a}|\x{1f31a}|\x{1f52b}|\x{1f525}|\x{1f64b}|\x{1f620}|\x{1f645}|\x{1f625}|\x{1f62c}|\x{270a}|\x{1f3a7}|\x{1f44e}|\x{1f483}|\x{1f61b}|\x{1f451}|\x{1f493}|\x{1f448}|\x{1f600}|\x{1f636}|\x{1f31f}|\x{25b6}|\x{1f427}|\x{1f4a9}|\x{2611}|\x{1f6b6}|\x{1f630}|\x{1f4ab}|\x{1f3b5}|\x{1f355}|\x{1f646}|\x{1f48e}|\x{1f4a5}|\x{1f450}|\x{2708}|\x{1f4a4}|\x{1f607}|\x{1f647}|\x{1f31e}|\x{1f47d}|\x{1f4ad}|\x{274c}|\x{1f485}|\x{1f380}|\x{1f47c}|\x{1f639}|\x{27a1}|\x{1f340}|\x{1f46f}|\x{1f445}|\x{2744}|\x{1f61f}|\x{1f30d}|\x{1f447}|\x{1f628}|\x{2601}|\x{1f33a}|\x{25c0}|\x{261d}|\x{2663}|\x{2716}|\x{1f4a6}|\x{1f388}|\x{2757}|\x{1f343}|\x{2b50}|\x{1f381}|\x{1f3c3}|\x{1f30e}|\x{2705}|\x{1f3a4}|\x{1f384}|\x{1f64d}|\x{1f319}|\x{1f49d}|\x{1f46b}|\x{2668}|\x{1f649}|\x{1f534}|\x{1f436}|\x{1f446}|\x{1f48f}|\x{1f635}|\x{1f33f}|\x{1f334}|\x{1f47b}|\x{26bd}|\x{1f37b}|\x{1f33b}|\x{1f4b0}|\x{1f3c0}|\x{203c}|\x{1f619}|\x{1f46d}|\x{1f486}|\x{1f300}|\x{1f478}|\x{1f62f}|\x{1f4a8}|\x{1f490}|\x{1f62e}|\x{1f632}|\x{1f48d}|\x{1f335}|\x{1f385}|\x{1f31d}|\x{1f431}|\x{1f63f}|\x{1f627}|\x{1f382}|\x{2615}|\x{1f49f}|\x{1f337}|\x{26c4}|\x{1f52a}|\x{1f3a5}|\x{1f4d5}|\x{1f308}|\x{1f38a}|\x{260e}|\x{1f4b8}|\x{1f3b8}|\x{1f30f}|\x{2b55}|\x{1f1fa}\x{1f1f8}|\x{1f3c8}|\x{2614}|\x{1f4a3}|\x{26a1}|\x{1f346}|\x{1f47f}|\x{2b05}|\x{1f626}|\x{2666}|\x{1f63c}|\x{1f491}|\x{1f47e}|\x{1f63d}|\x{1f476}|\x{1f638}|\x{2660}|\x{0031}\x{fe0f}?\x{20e3}|\x{1f680}|\x{1f34c}|\x{1f4f7}|\x{1f341}|\x{1f437}|\x{1f48c}|\x{25aa}|\x{1f640}|\x{1f43c}|\x{1f33c}|\x{26c5}|\x{1f4bf}|\x{1f30a}|\x{1f444}|\x{1f374}|\x{1f3ae}|\x{0032}\x{fe0f}?\x{20e3}|\x{1f42f}|\x{1f463}|\x{1f369}|\x{1f6ac}|\x{1f422}|\x{1f35f}|\x{1f52e}|\x{1f4a2}|\x{1f482}|\x{1f37a}|\x{1f3bc}|\x{1f4f1}|\x{1f197}|\x{1f379}|\x{1f3c6}|\x{1f51e}|\x{1f342}|\x{1f697}|\x{1f354}|\x{1f36d}|\x{1f617}|\x{1f349}|\x{1f377}|\x{1f468}|\x{1f4f2}|\x{1f370}|\x{1f36a}|\x{1f6a8}|\x{1f366}|\x{1f383}|\x{1f51d}|\x{1f43b}|\x{1f36b}|\x{1f489}|\x{1f438}|\x{1f42c}|\x{267b}|\x{1f63a}|\x{0033}\x{fe0f}?\x{20e3}|\x{1f479}|\x{1f41d}|\x{1f433}|\x{1f64e}|\x{26a0}|\x{1f6ab}|\x{1f30c}|\x{1f412}|\x{1f487}|\x{1f423}|\x{1f430}|\x{1f378}|\x{1f63e}|\x{1f4b5}|\x{1f40d}|\x{1f3ca}|\x{1f43e}|\x{1f192}|\x{1f353}|\x{1f418}|\x{1f4fa}|\x{1f351}|\x{1f469}|\x{1f4da}|\x{1f33e}|\x{1f390}|\x{1f311}|\x{1f315}|\x{1f3ac}|\x{2702}|\x{1f50a}|\x{1f46a}|\x{1f352}|\x{1f4bb}|\x{1f198}|\x{1f425}|\x{1f475}|\x{1f1eb}\x{1f1f7}|\x{0034}\x{fe0f}?\x{20e3}|\x{26be}|\x{2709}|\x{2753}|\x{1f34d}|\x{1f467}|\x{2b07}|\x{1f473}|\x{1f419}|\x{1f4e2}|\x{1f4ac}|\x{1f4dd}|\x{1f460}|\x{1f698}|\x{1f484}|\x{1f1ee}\x{1f1f9}|\x{2693}|\x{1f357}|\x{1f386}|\x{1f3e1}|\x{1f466}|\x{1f393}|\x{270f}|\x{1f459}|\x{1f519}|\x{1f4d6}|\x{1f443}|\x{1f4de}|\x{1f510}|\x{1f45f}|\x{1f46c}|\x{1f371}|\x{1f239}|\x{1f453}|\x{1f31b}|\x{1f48a}|\x{1f50b}|\x{1f34e}|\x{1f6bf}|\x{1f442}|\x{0035}\x{fe0f}?\x{20e3}|\x{26aa}|\x{1f320}|\x{1f535}|\x{2197}|\x{1f35d}|\x{1f305}|\x{1f470}|\x{1f313}|\x{1f6c5}|\x{1f428}|\x{1f312}|\x{1f474}|\x{1f435}|\x{1f347}|\x{1f420}|\x{1f314}|\x{1f4a7}|\x{26ab}|\x{1f36c}|\x{1f4a1}|\x{1f1ec}\x{1f1e7}|\x{1f317}|\x{1f41f}|\x{1f365}|\x{1f318}|\x{1f316}|\x{1f35c}|\x{1f31c}|\x{1f43d}|\x{2199}|\x{1f434}|\x{1f303}|\x{1f3a8}|\x{1f3e0}|\x{2198}|\x{1f3ea}|\x{1f40b}|\x{1f539}|\x{2733}|\x{1f344}|\x{1f457}|\x{1f5ff}|\x{1f414}|\x{1f34a}|\x{1f331}|\x{1f472}|\x{1f3c4}|\x{1f6ae}|\x{1f367}|\x{1f699}|\x{1f332}|\x{1f333}|\x{1f3a3}|\x{231a}|\x{1f42d}|\x{1f42e}|\x{1f373}|\x{1f195}|\x{2196}|\x{1f3a9}|\x{26fd}|\x{1f426}|\x{1f46e}|\x{1f3ad}|\x{1f43a}|\x{1f47a}|\x{1f6b2}|\x{1f411}|\x{1f35e}|\x{1f364}|\x{1f511}|\x{1f6c0}|\x{2755}|\x{1f37c}|\x{1f471}|\x{1f387}|\x{1f34b}|\x{23f0}|\x{2194}|\x{1f506}|\x{1f518}|\x{23e9}|\x{1f375}|\x{1f193}|\x{2b06}|\x{0036}\x{fe0f}?\x{20e3}|\x{1f199}|\x{1f3be}|\x{1f40e}|\x{24c2}|\x{1f1e9}\x{1f1ea}|\x{21aa}|\x{1f3c1}|\x{1f424}|\x{2195}|\x{1f528}|\x{0037}\x{fe0f}?\x{20e3}|\x{26f3}|\x{1f512}|\x{1f50c}|\x{2049}|\x{1f1ea}\x{1f1f8}|\x{1f363}|\x{1f41b}|\x{1f503}|\x{0030}\x{fe0f}?\x{20e3}|\x{1f35a}|\x{1f372}|\x{1f456}|\x{1f3c2}|\x{1f439}|\x{1f34f}|\x{1f51c}|\x{27b0}|\x{1f531}|\x{1f492}|\x{1f41e}|\x{1f304}|\x{1f3af}|\x{2734}|\x{264b}|\x{1f41a}|\x{1f356}|\x{1f4a0}|\x{21a9}|\x{1f33d}|\x{1f40a}|\x{2935}|\x{1f464}|\x{1f454}|\x{1f40c}|\x{2747}|\x{1f368}|\x{1f413}|\x{1f3e5}|\x{1f5fd}|\x{1f429}|\x{2712}|\x{1f38e}|\x{1f4e3}|\x{1f6a9}|\x{1f41c}|\x{1f538}|\x{1f4b2}|\x{1f465}|\x{1f307}|\x{1f4ae}|\x{1f36f}|\x{1f6a3}|\x{1f35b}|\x{1f694}|\x{1f513}|\x{1f53a}|\x{1f6aa}|\x{1f477}|\x{1f36e}|\x{2648}|\x{1f3b1}|\x{1f504}|\x{1f3eb}|\x{2795}|\x{264f}|\x{1f45c}|\x{1f345}|\x{1f42a}|\x{2652}|\x{1f6bc}|\x{1f6bd}|\x{3299}|\x{26d4}|\x{1f537}|\x{3297}|\x{1f3b9}|\x{1f455}|\x{1f408}|\x{1f3e2}|\x{1f682}|\x{1f409}|\x{1f532}|\x{1f6ba}|\x{1f68c}|\x{1f517}|\x{1f536}|\x{1f42b}|\x{1f410}|\x{1f4c0}|\x{1f306}|\x{26f5}|\x{1f3b3}|\x{1f30b}|\x{264c}|\x{1f501}|\x{1f3e9}|\x{1f330}|\x{1f45e}|\x{1f194}|\x{1f4b3}|\x{1f3b2}|\x{1f53b}|\x{1f4cd}|\x{1f3a2}|\x{2650}|\x{1f415}|\x{1f452}|\x{2651}|\x{1f488}|\x{1f432}|\x{1f407}|\x{0038}\x{fe0f}?\x{20e3}|\x{264a}|\x{1f4f9}|\x{1f404}|\x{303d}|\x{0039}\x{fe0f}?\x{20e3}|\x{1f5fb}|\x{1f302}|\x{264d}|\x{1f1ef}\x{1f1f5}|\x{2649}|\x{1f693}|\x{1f6b4}|\x{2796}|\x{25fe}|\x{1f350}|\x{1f4ba}|\x{1f40f}|\x{264e}|\x{1f405}|\x{2653}|\x{1f696}|\x{1f514}|\x{1f361}|\x{1f45b}|\x{1f309}|\x{1f417}|\x{1f4b7}|\x{1f310}|\x{1f4cc}|\x{1f3e4}|\x{2139}|\x{1f4e6}|\x{2754}|\x{1f6a2}|\x{26ea}|\x{1f566}|\x{1f0cf}|\x{23f3}|\x{1f3c9}|\x{274e}|\x{1f38d}|\x{1f4f4}|\x{1f301}|\x{1f400}|\x{1f6a6}|\x{1f392}|\x{1f416}|\x{1f462}|\x{1f461}|\x{0023}\x{fe0f}?\x{20e3}|\x{1f3c7}|\x{1f1f7}\x{1f1fa}|\x{1f4d3}|\x{1f5fc}|\x{1f38b}|\x{1f3bf}|\x{1f4fb}|\x{1f3bb}|\x{1f3a1}|\x{1f3b7}|\x{1f51f}|\x{1f4e9}|\x{1f4d2}|\x{1f4bd}|\x{1f68d}|\x{23ea}|\x{1f691}|\x{1f3ec}|\x{1f362}|\x{231b}|\x{1f402}|\x{1f38f}|\x{1f421}|\x{1f695}|\x{25fc}|\x{1f51b}|\x{26fa}|\x{1f530}|\x{1f3ba}|\x{1f348}|\x{1f6b9}|\x{1f4b4}|\x{1f4bc}|\x{1f681}|\x{25ab}|\x{1f406}|\x{1f52f}|\x{1f505}|\x{1f45a}|\x{1f4f6}|\x{1f3a0}|\x{1f4b6}|\x{1f3f0}|\x{1f533}|\x{1f38c}|\x{1f6a4}|\x{1f19a}|\x{1f4c5}|\x{1f359}|\x{1f51a}|\x{1f55b}|\x{1f526}|\x{2b1c}|\x{1f692}|\x{1f3bd}|\x{1f360}|\x{1f3aa}|\x{1f4d8}|\x{1f6be}|\x{25fb}|\x{1f250}|\x{1f4db}|\x{2934}|\x{1f550}|\x{1f4f0}|\x{1f22f}|\x{1f403}|\x{1f551}|\x{1f52d}|\x{1f196}|\x{1f529}|\x{1f3e7}|\x{1f6a7}|\x{1f552}|\x{1f50d}|\x{1f50e}|\x{1f69a}|\x{1f4c6}|\x{1f4d7}|\x{1f6ad}|\x{1f458}|\x{1f401}|\x{1f391}|\x{1f683}|\x{1f4e1}|\x{1f233}|\x{1f507}|\x{1f376}|\x{1f1f0}\x{1f1f7}|\x{1f3e6}|\x{1f4fc}|\x{1f3e8}|\x{1f3b0}|\x{1f3a6}|\x{1f69c}|\x{1f687}|\x{1f3ee}|\x{1f4ee}|\x{1f689}|\x{267f}|\x{1f4dc}|\x{1f4ec}|\x{26f2}|\x{1f4c8}|\x{1f527}|\x{1f684}|\x{1f4f3}|\x{1f68a}|\x{23ec}|\x{1f553}|\x{2b1b}|\x{1f23a}|\x{1f4ce}|\x{1f358}|\x{1f53d}|\x{1f236}|\x{1f238}|\x{1f18e}|\x{1f6c1}|\x{1f21a}|\x{1f6b7}|\x{1f53c}|\x{1f4e7}|\x{1f4f5}|\x{1f4cb}|\x{1f69b}|\x{1f3b4}|\x{1f4c9}|\x{1f554}|\x{1f6a5}|\x{1f4c4}|\x{1f45d}|\x{1f558}|\x{1f1e8}\x{1f1f3}|\x{25fd}|\x{1f502}|\x{1f191}|\x{1f232}|\x{1f235}|\x{1f690}|\x{1f559}|\x{1f3ef}|\x{1f522}|\x{1f557}|\x{1f685}|\x{1f004}|\x{1f3ab}|\x{1f17f}|\x{1f515}|\x{1f52c}|\x{1f4e5}|\x{1f4c3}|\x{1f4d1}|\x{1f3ed}|\x{1f234}|\x{1f4d9}|\x{1f55a}|\x{1f4b1}|\x{1f516}|\x{1f4d4}|\x{1f6b5}|\x{1f5fe}|\x{1f4ca}|\x{1f6bb}|\x{1f3e3}|\x{1f6b8}|\x{27bf}|\x{1f556}|\x{1f4be}|\x{1f4e0}|\x{1f4b9}|\x{1f4d0}|\x{1f4df}|\x{1f555}|\x{1f251}|\x{1f68e}|\x{1f686}|\x{1f4cf}|\x{1f6b0}|\x{1f4e8}|\x{1f55c}|\x{1f4ef}|\x{1f509}|\x{1f50f}|\x{1f4eb}|\x{1f68f}|\x{1f4c7}|\x{1f524}|\x{1f564}|\x{1f55d}|\x{1f68b}|\x{1f508}|\x{1f688}|\x{1f565}|\x{1f567}|\x{1f4c2}|\x{1f69d}|\x{1f4c1}|\x{1f500}|\x{1f520}|\x{1f523}|\x{1f4e4}|\x{1f55f}|\x{1f4ea}|\x{26ce}|\x{1f55e}|\x{1f562}|\x{23eb}|\x{1f521}|\x{1f69e}|\x{1f560}|\x{1f4ed}|\x{2797}|\x{1f201}|\x{1f6a1}|\x{1f563}|\x{1f561}|\x{1f6af}|\x{1f6a0}|\x{1f6b1}|\x{1f6b3}|\x{1f69f}|\x{1f6c2}|\x{1f6c3}|\x{1f6c4}|\x{1f202}|\x{1f170}|\x{00a9}|\x{1f171}|\x{1f237}|\x{3030}|\x{00ae}|\x{2122}|\x{1f17e})\x{fe0f}?';
    $emojiRegexp = '(?:(?:\x{261D}|\x{26F9}|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{1F385}|[\x{1F3C2}-\x{1F3C4}]|\x{1F3C7}|\x{1F3CA}|[\x{1F3CB}-\x{1F3CC}]|[\x{1F442}-\x{1F443}]|[\x{1F446}-\x{1F450}]|[\x{1F466}-\x{1F469}]|\x{1F46E}|[\x{1F470}-\x{1F478}]|\x{1F47C}|[\x{1F481}-\x{1F483}]|[\x{1F485}-\x{1F487}]|\x{1F4AA}|[\x{1F574}-\x{1F575}]|\x{1F57A}|\x{1F590}|[\x{1F595}-\x{1F596}]|[\x{1F645}-\x{1F647}]|[\x{1F64B}-\x{1F64F}]|\x{1F6A3}|[\x{1F6B4}-\x{1F6B6}]|\x{1F6C0}|\x{1F6CC}|\x{1F918}|[\x{1F919}-\x{1F91C}]|\x{1F91E}|\x{1F91F}|\x{1F926}|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F939}]|[\x{1F93D}-\x{1F93E}]|[\x{1F9D1}-\x{1F9DD}])(?:[\x{1F3FB}-\x{1F3FF}])?|(?:[\x{231A}-\x{231B}]|[\x{23E9}-\x{23EC}]|\x{23F0}|\x{23F3}|[\x{25FD}-\x{25FE}]|[\x{2614}-\x{2615}]|[\x{2648}-\x{2653}]|\x{267F}|\x{2693}|\x{26A1}|[\x{26AA}-\x{26AB}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26CE}|\x{26D4}|\x{26EA}|[\x{26F2}-\x{26F3}]|\x{26F5}|\x{26FA}|\x{26FD}|\x{2705}|[\x{270A}-\x{270B}]|\x{2728}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2795}-\x{2797}]|\x{27B0}|\x{27BF}|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{1F004}|\x{1F0CF}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|\x{1F201}|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F236}]|[\x{1F238}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|[\x{1F337}-\x{1F37C}]|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3E0}-\x{1F3F0}]|\x{1F3F4}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F440}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|\x{1F57A}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|\x{1F6CC}|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6EB}-\x{1F6EC}]|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])|(?:\x{0023}|\x{002A}|[\x{0030}-\x{0039}]|\x{00A9}|\x{00AE}|\x{203C}|\x{2049}|\x{2122}|\x{2139}|[\x{2194}-\x{2199}]|[\x{21A9}-\x{21AA}]|[\x{231A}-\x{231B}]|\x{2328}|\x{23CF}|[\x{23E9}-\x{23F3}]|[\x{23F8}-\x{23FA}]|\x{24C2}|[\x{25AA}-\x{25AB}]|\x{25B6}|\x{25C0}|[\x{25FB}-\x{25FE}]|[\x{2600}-\x{2604}]|\x{260E}|\x{2611}|[\x{2614}-\x{2615}]|\x{2618}|\x{261D}|\x{2620}|[\x{2622}-\x{2623}]|\x{2626}|\x{262A}|[\x{262E}-\x{262F}]|[\x{2638}-\x{263A}]|\x{2640}|\x{2642}|[\x{2648}-\x{2653}]|\x{2660}|\x{2663}|[\x{2665}-\x{2666}]|\x{2668}|\x{267B}|\x{267F}|[\x{2692}-\x{2697}]|\x{2699}|[\x{269B}-\x{269C}]|[\x{26A0}-\x{26A1}]|[\x{26AA}-\x{26AB}]|[\x{26B0}-\x{26B1}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26C8}|\x{26CE}|\x{26CF}|\x{26D1}|[\x{26D3}-\x{26D4}]|[\x{26E9}-\x{26EA}]|[\x{26F0}-\x{26F5}]|[\x{26F7}-\x{26FA}]|\x{26FD}|\x{2702}|\x{2705}|[\x{2708}-\x{2709}]|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{270F}|\x{2712}|\x{2714}|\x{2716}|\x{271D}|\x{2721}|\x{2728}|[\x{2733}-\x{2734}]|\x{2744}|\x{2747}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2763}-\x{2764}]|[\x{2795}-\x{2797}]|\x{27A1}|\x{27B0}|\x{27BF}|[\x{2934}-\x{2935}]|[\x{2B05}-\x{2B07}]|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{3030}|\x{303D}|\x{3297}|\x{3299}|\x{1F004}|\x{1F0CF}|[\x{1F170}-\x{1F171}]|\x{1F17E}|\x{1F17F}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|[\x{1F201}-\x{1F202}]|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|\x{1F321}|[\x{1F324}-\x{1F32C}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|\x{1F336}|[\x{1F337}-\x{1F37C}]|\x{1F37D}|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F396}-\x{1F397}]|[\x{1F399}-\x{1F39B}]|[\x{1F39E}-\x{1F39F}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CB}-\x{1F3CE}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3D4}-\x{1F3DF}]|[\x{1F3E0}-\x{1F3F0}]|[\x{1F3F3}-\x{1F3F5}]|\x{1F3F7}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F43F}|\x{1F440}|\x{1F441}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FD}|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F549}-\x{1F54A}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|[\x{1F56F}-\x{1F570}]|[\x{1F573}-\x{1F579}]|\x{1F57A}|\x{1F587}|[\x{1F58A}-\x{1F58D}]|\x{1F590}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|\x{1F5A5}|\x{1F5A8}|[\x{1F5B1}-\x{1F5B2}]|\x{1F5BC}|[\x{1F5C2}-\x{1F5C4}]|[\x{1F5D1}-\x{1F5D3}]|[\x{1F5DC}-\x{1F5DE}]|\x{1F5E1}|\x{1F5E3}|\x{1F5E8}|\x{1F5EF}|\x{1F5F3}|\x{1F5FA}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|[\x{1F6CB}-\x{1F6CF}]|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6E0}-\x{1F6E5}]|\x{1F6E9}|[\x{1F6EB}-\x{1F6EC}]|\x{1F6F0}|\x{1F6F3}|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])\x{FE0F}?)';

    $hashtagRegexp = '/'.

  10. @ravisorg ravisorg revised this gist Mar 26, 2017. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions test-hashtag-regexp.php
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,7 @@
    <?php

    // You can find the test data file at https://www.ravis.org/hashtag-test.zip

    // You're gonna want to have your console output supporting UTF8 before running this, or you're
    // gonna see a bunch of ? in the output...

  11. @ravisorg ravisorg created this gist Mar 26, 2017.
    113 changes: 113 additions & 0 deletions test-hashtag-regexp.php
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,113 @@
    <?php

    // You're gonna want to have your console output supporting UTF8 before running this, or you're
    // gonna see a bunch of ? in the output...

    // For curiosity's sake, post number 693,847 is an emoji hashtag: #(heart)


    // Emoji are spread throughout the Unicode codespace and can contain modifiers. This is one of
    // those "don't reinvent the wheel" type things, and was appropriated / liberated from
    // https://github.com/gmac/gemoji-parser/blob/master/output/rx_unicode.rb
    $emojiRegexp = '(?:\x{1f602}|\x{2665}|\x{2764}|\x{1f60d}|\x{1f612}|\x{1f60a}|\x{1f62d}|\x{1f44c}|\x{263a}|\x{1f618}|\x{1f495}|\x{1f629}|\x{1f614}|\x{1f60f}|\x{1f601}|\x{1f633}|\x{1f64f}|\x{1f64c}|\x{1f44d}|\x{270c}|\x{1f609}|\x{1f60c}|\x{1f648}|\x{1f481}|\x{1f60e}|\x{1f3b6}|\x{1f440}|\x{1f604}|\x{1f634}|\x{1f611}|\x{1f61c}|\x{1f622}|\x{1f61e}|\x{1f4af}|\x{1f496}|\x{1f60b}|\x{1f44f}|\x{1f49c}|\x{2728}|\x{1f615}|\x{1f64a}|\x{270b}|\x{1f499}|\x{1f610}|\x{1f605}|\x{1f494}|\x{1f62a}|\x{1f497}|\x{1f48b}|\x{1f49e}|\x{1f631}|\x{1f498}|\x{1f621}|\x{1f62b}|\x{2600}|\x{1f44a}|\x{1f603}|\x{1f338}|\x{1f608}|\x{1f61d}|\x{1f624}|\x{1f44b}|\x{1f339}|\x{2714}|\x{1f4aa}|\x{1f389}|\x{1f637}|\x{1f49b}|\x{1f623}|\x{1f49a}|\x{1f63b}|\x{1f606}|\x{1f449}|\x{1f613}|\x{1f616}|\x{1f480}|\x{1f61a}|\x{1f31a}|\x{1f52b}|\x{1f525}|\x{1f64b}|\x{1f620}|\x{1f645}|\x{1f625}|\x{1f62c}|\x{270a}|\x{1f3a7}|\x{1f44e}|\x{1f483}|\x{1f61b}|\x{1f451}|\x{1f493}|\x{1f448}|\x{1f600}|\x{1f636}|\x{1f31f}|\x{25b6}|\x{1f427}|\x{1f4a9}|\x{2611}|\x{1f6b6}|\x{1f630}|\x{1f4ab}|\x{1f3b5}|\x{1f355}|\x{1f646}|\x{1f48e}|\x{1f4a5}|\x{1f450}|\x{2708}|\x{1f4a4}|\x{1f607}|\x{1f647}|\x{1f31e}|\x{1f47d}|\x{1f4ad}|\x{274c}|\x{1f485}|\x{1f380}|\x{1f47c}|\x{1f639}|\x{27a1}|\x{1f340}|\x{1f46f}|\x{1f445}|\x{2744}|\x{1f61f}|\x{1f30d}|\x{1f447}|\x{1f628}|\x{2601}|\x{1f33a}|\x{25c0}|\x{261d}|\x{2663}|\x{2716}|\x{1f4a6}|\x{1f388}|\x{2757}|\x{1f343}|\x{2b50}|\x{1f381}|\x{1f3c3}|\x{1f30e}|\x{2705}|\x{1f3a4}|\x{1f384}|\x{1f64d}|\x{1f319}|\x{1f49d}|\x{1f46b}|\x{2668}|\x{1f649}|\x{1f534}|\x{1f436}|\x{1f446}|\x{1f48f}|\x{1f635}|\x{1f33f}|\x{1f334}|\x{1f47b}|\x{26bd}|\x{1f37b}|\x{1f33b}|\x{1f4b0}|\x{1f3c0}|\x{203c}|\x{1f619}|\x{1f46d}|\x{1f486}|\x{1f300}|\x{1f478}|\x{1f62f}|\x{1f4a8}|\x{1f490}|\x{1f62e}|\x{1f632}|\x{1f48d}|\x{1f335}|\x{1f385}|\x{1f31d}|\x{1f431}|\x{1f63f}|\x{1f627}|\x{1f382}|\x{2615}|\x{1f49f}|\x{1f337}|\x{26c4}|\x{1f52a}|\x{1f3a5}|\x{1f4d5}|\x{1f308}|\x{1f38a}|\x{260e}|\x{1f4b8}|\x{1f3b8}|\x{1f30f}|\x{2b55}|\x{1f1fa}\x{1f1f8}|\x{1f3c8}|\x{2614}|\x{1f4a3}|\x{26a1}|\x{1f346}|\x{1f47f}|\x{2b05}|\x{1f626}|\x{2666}|\x{1f63c}|\x{1f491}|\x{1f47e}|\x{1f63d}|\x{1f476}|\x{1f638}|\x{2660}|\x{0031}\x{fe0f}?\x{20e3}|\x{1f680}|\x{1f34c}|\x{1f4f7}|\x{1f341}|\x{1f437}|\x{1f48c}|\x{25aa}|\x{1f640}|\x{1f43c}|\x{1f33c}|\x{26c5}|\x{1f4bf}|\x{1f30a}|\x{1f444}|\x{1f374}|\x{1f3ae}|\x{0032}\x{fe0f}?\x{20e3}|\x{1f42f}|\x{1f463}|\x{1f369}|\x{1f6ac}|\x{1f422}|\x{1f35f}|\x{1f52e}|\x{1f4a2}|\x{1f482}|\x{1f37a}|\x{1f3bc}|\x{1f4f1}|\x{1f197}|\x{1f379}|\x{1f3c6}|\x{1f51e}|\x{1f342}|\x{1f697}|\x{1f354}|\x{1f36d}|\x{1f617}|\x{1f349}|\x{1f377}|\x{1f468}|\x{1f4f2}|\x{1f370}|\x{1f36a}|\x{1f6a8}|\x{1f366}|\x{1f383}|\x{1f51d}|\x{1f43b}|\x{1f36b}|\x{1f489}|\x{1f438}|\x{1f42c}|\x{267b}|\x{1f63a}|\x{0033}\x{fe0f}?\x{20e3}|\x{1f479}|\x{1f41d}|\x{1f433}|\x{1f64e}|\x{26a0}|\x{1f6ab}|\x{1f30c}|\x{1f412}|\x{1f487}|\x{1f423}|\x{1f430}|\x{1f378}|\x{1f63e}|\x{1f4b5}|\x{1f40d}|\x{1f3ca}|\x{1f43e}|\x{1f192}|\x{1f353}|\x{1f418}|\x{1f4fa}|\x{1f351}|\x{1f469}|\x{1f4da}|\x{1f33e}|\x{1f390}|\x{1f311}|\x{1f315}|\x{1f3ac}|\x{2702}|\x{1f50a}|\x{1f46a}|\x{1f352}|\x{1f4bb}|\x{1f198}|\x{1f425}|\x{1f475}|\x{1f1eb}\x{1f1f7}|\x{0034}\x{fe0f}?\x{20e3}|\x{26be}|\x{2709}|\x{2753}|\x{1f34d}|\x{1f467}|\x{2b07}|\x{1f473}|\x{1f419}|\x{1f4e2}|\x{1f4ac}|\x{1f4dd}|\x{1f460}|\x{1f698}|\x{1f484}|\x{1f1ee}\x{1f1f9}|\x{2693}|\x{1f357}|\x{1f386}|\x{1f3e1}|\x{1f466}|\x{1f393}|\x{270f}|\x{1f459}|\x{1f519}|\x{1f4d6}|\x{1f443}|\x{1f4de}|\x{1f510}|\x{1f45f}|\x{1f46c}|\x{1f371}|\x{1f239}|\x{1f453}|\x{1f31b}|\x{1f48a}|\x{1f50b}|\x{1f34e}|\x{1f6bf}|\x{1f442}|\x{0035}\x{fe0f}?\x{20e3}|\x{26aa}|\x{1f320}|\x{1f535}|\x{2197}|\x{1f35d}|\x{1f305}|\x{1f470}|\x{1f313}|\x{1f6c5}|\x{1f428}|\x{1f312}|\x{1f474}|\x{1f435}|\x{1f347}|\x{1f420}|\x{1f314}|\x{1f4a7}|\x{26ab}|\x{1f36c}|\x{1f4a1}|\x{1f1ec}\x{1f1e7}|\x{1f317}|\x{1f41f}|\x{1f365}|\x{1f318}|\x{1f316}|\x{1f35c}|\x{1f31c}|\x{1f43d}|\x{2199}|\x{1f434}|\x{1f303}|\x{1f3a8}|\x{1f3e0}|\x{2198}|\x{1f3ea}|\x{1f40b}|\x{1f539}|\x{2733}|\x{1f344}|\x{1f457}|\x{1f5ff}|\x{1f414}|\x{1f34a}|\x{1f331}|\x{1f472}|\x{1f3c4}|\x{1f6ae}|\x{1f367}|\x{1f699}|\x{1f332}|\x{1f333}|\x{1f3a3}|\x{231a}|\x{1f42d}|\x{1f42e}|\x{1f373}|\x{1f195}|\x{2196}|\x{1f3a9}|\x{26fd}|\x{1f426}|\x{1f46e}|\x{1f3ad}|\x{1f43a}|\x{1f47a}|\x{1f6b2}|\x{1f411}|\x{1f35e}|\x{1f364}|\x{1f511}|\x{1f6c0}|\x{2755}|\x{1f37c}|\x{1f471}|\x{1f387}|\x{1f34b}|\x{23f0}|\x{2194}|\x{1f506}|\x{1f518}|\x{23e9}|\x{1f375}|\x{1f193}|\x{2b06}|\x{0036}\x{fe0f}?\x{20e3}|\x{1f199}|\x{1f3be}|\x{1f40e}|\x{24c2}|\x{1f1e9}\x{1f1ea}|\x{21aa}|\x{1f3c1}|\x{1f424}|\x{2195}|\x{1f528}|\x{0037}\x{fe0f}?\x{20e3}|\x{26f3}|\x{1f512}|\x{1f50c}|\x{2049}|\x{1f1ea}\x{1f1f8}|\x{1f363}|\x{1f41b}|\x{1f503}|\x{0030}\x{fe0f}?\x{20e3}|\x{1f35a}|\x{1f372}|\x{1f456}|\x{1f3c2}|\x{1f439}|\x{1f34f}|\x{1f51c}|\x{27b0}|\x{1f531}|\x{1f492}|\x{1f41e}|\x{1f304}|\x{1f3af}|\x{2734}|\x{264b}|\x{1f41a}|\x{1f356}|\x{1f4a0}|\x{21a9}|\x{1f33d}|\x{1f40a}|\x{2935}|\x{1f464}|\x{1f454}|\x{1f40c}|\x{2747}|\x{1f368}|\x{1f413}|\x{1f3e5}|\x{1f5fd}|\x{1f429}|\x{2712}|\x{1f38e}|\x{1f4e3}|\x{1f6a9}|\x{1f41c}|\x{1f538}|\x{1f4b2}|\x{1f465}|\x{1f307}|\x{1f4ae}|\x{1f36f}|\x{1f6a3}|\x{1f35b}|\x{1f694}|\x{1f513}|\x{1f53a}|\x{1f6aa}|\x{1f477}|\x{1f36e}|\x{2648}|\x{1f3b1}|\x{1f504}|\x{1f3eb}|\x{2795}|\x{264f}|\x{1f45c}|\x{1f345}|\x{1f42a}|\x{2652}|\x{1f6bc}|\x{1f6bd}|\x{3299}|\x{26d4}|\x{1f537}|\x{3297}|\x{1f3b9}|\x{1f455}|\x{1f408}|\x{1f3e2}|\x{1f682}|\x{1f409}|\x{1f532}|\x{1f6ba}|\x{1f68c}|\x{1f517}|\x{1f536}|\x{1f42b}|\x{1f410}|\x{1f4c0}|\x{1f306}|\x{26f5}|\x{1f3b3}|\x{1f30b}|\x{264c}|\x{1f501}|\x{1f3e9}|\x{1f330}|\x{1f45e}|\x{1f194}|\x{1f4b3}|\x{1f3b2}|\x{1f53b}|\x{1f4cd}|\x{1f3a2}|\x{2650}|\x{1f415}|\x{1f452}|\x{2651}|\x{1f488}|\x{1f432}|\x{1f407}|\x{0038}\x{fe0f}?\x{20e3}|\x{264a}|\x{1f4f9}|\x{1f404}|\x{303d}|\x{0039}\x{fe0f}?\x{20e3}|\x{1f5fb}|\x{1f302}|\x{264d}|\x{1f1ef}\x{1f1f5}|\x{2649}|\x{1f693}|\x{1f6b4}|\x{2796}|\x{25fe}|\x{1f350}|\x{1f4ba}|\x{1f40f}|\x{264e}|\x{1f405}|\x{2653}|\x{1f696}|\x{1f514}|\x{1f361}|\x{1f45b}|\x{1f309}|\x{1f417}|\x{1f4b7}|\x{1f310}|\x{1f4cc}|\x{1f3e4}|\x{2139}|\x{1f4e6}|\x{2754}|\x{1f6a2}|\x{26ea}|\x{1f566}|\x{1f0cf}|\x{23f3}|\x{1f3c9}|\x{274e}|\x{1f38d}|\x{1f4f4}|\x{1f301}|\x{1f400}|\x{1f6a6}|\x{1f392}|\x{1f416}|\x{1f462}|\x{1f461}|\x{0023}\x{fe0f}?\x{20e3}|\x{1f3c7}|\x{1f1f7}\x{1f1fa}|\x{1f4d3}|\x{1f5fc}|\x{1f38b}|\x{1f3bf}|\x{1f4fb}|\x{1f3bb}|\x{1f3a1}|\x{1f3b7}|\x{1f51f}|\x{1f4e9}|\x{1f4d2}|\x{1f4bd}|\x{1f68d}|\x{23ea}|\x{1f691}|\x{1f3ec}|\x{1f362}|\x{231b}|\x{1f402}|\x{1f38f}|\x{1f421}|\x{1f695}|\x{25fc}|\x{1f51b}|\x{26fa}|\x{1f530}|\x{1f3ba}|\x{1f348}|\x{1f6b9}|\x{1f4b4}|\x{1f4bc}|\x{1f681}|\x{25ab}|\x{1f406}|\x{1f52f}|\x{1f505}|\x{1f45a}|\x{1f4f6}|\x{1f3a0}|\x{1f4b6}|\x{1f3f0}|\x{1f533}|\x{1f38c}|\x{1f6a4}|\x{1f19a}|\x{1f4c5}|\x{1f359}|\x{1f51a}|\x{1f55b}|\x{1f526}|\x{2b1c}|\x{1f692}|\x{1f3bd}|\x{1f360}|\x{1f3aa}|\x{1f4d8}|\x{1f6be}|\x{25fb}|\x{1f250}|\x{1f4db}|\x{2934}|\x{1f550}|\x{1f4f0}|\x{1f22f}|\x{1f403}|\x{1f551}|\x{1f52d}|\x{1f196}|\x{1f529}|\x{1f3e7}|\x{1f6a7}|\x{1f552}|\x{1f50d}|\x{1f50e}|\x{1f69a}|\x{1f4c6}|\x{1f4d7}|\x{1f6ad}|\x{1f458}|\x{1f401}|\x{1f391}|\x{1f683}|\x{1f4e1}|\x{1f233}|\x{1f507}|\x{1f376}|\x{1f1f0}\x{1f1f7}|\x{1f3e6}|\x{1f4fc}|\x{1f3e8}|\x{1f3b0}|\x{1f3a6}|\x{1f69c}|\x{1f687}|\x{1f3ee}|\x{1f4ee}|\x{1f689}|\x{267f}|\x{1f4dc}|\x{1f4ec}|\x{26f2}|\x{1f4c8}|\x{1f527}|\x{1f684}|\x{1f4f3}|\x{1f68a}|\x{23ec}|\x{1f553}|\x{2b1b}|\x{1f23a}|\x{1f4ce}|\x{1f358}|\x{1f53d}|\x{1f236}|\x{1f238}|\x{1f18e}|\x{1f6c1}|\x{1f21a}|\x{1f6b7}|\x{1f53c}|\x{1f4e7}|\x{1f4f5}|\x{1f4cb}|\x{1f69b}|\x{1f3b4}|\x{1f4c9}|\x{1f554}|\x{1f6a5}|\x{1f4c4}|\x{1f45d}|\x{1f558}|\x{1f1e8}\x{1f1f3}|\x{25fd}|\x{1f502}|\x{1f191}|\x{1f232}|\x{1f235}|\x{1f690}|\x{1f559}|\x{1f3ef}|\x{1f522}|\x{1f557}|\x{1f685}|\x{1f004}|\x{1f3ab}|\x{1f17f}|\x{1f515}|\x{1f52c}|\x{1f4e5}|\x{1f4c3}|\x{1f4d1}|\x{1f3ed}|\x{1f234}|\x{1f4d9}|\x{1f55a}|\x{1f4b1}|\x{1f516}|\x{1f4d4}|\x{1f6b5}|\x{1f5fe}|\x{1f4ca}|\x{1f6bb}|\x{1f3e3}|\x{1f6b8}|\x{27bf}|\x{1f556}|\x{1f4be}|\x{1f4e0}|\x{1f4b9}|\x{1f4d0}|\x{1f4df}|\x{1f555}|\x{1f251}|\x{1f68e}|\x{1f686}|\x{1f4cf}|\x{1f6b0}|\x{1f4e8}|\x{1f55c}|\x{1f4ef}|\x{1f509}|\x{1f50f}|\x{1f4eb}|\x{1f68f}|\x{1f4c7}|\x{1f524}|\x{1f564}|\x{1f55d}|\x{1f68b}|\x{1f508}|\x{1f688}|\x{1f565}|\x{1f567}|\x{1f4c2}|\x{1f69d}|\x{1f4c1}|\x{1f500}|\x{1f520}|\x{1f523}|\x{1f4e4}|\x{1f55f}|\x{1f4ea}|\x{26ce}|\x{1f55e}|\x{1f562}|\x{23eb}|\x{1f521}|\x{1f69e}|\x{1f560}|\x{1f4ed}|\x{2797}|\x{1f201}|\x{1f6a1}|\x{1f563}|\x{1f561}|\x{1f6af}|\x{1f6a0}|\x{1f6b1}|\x{1f6b3}|\x{1f69f}|\x{1f6c2}|\x{1f6c3}|\x{1f6c4}|\x{1f202}|\x{1f170}|\x{00a9}|\x{1f171}|\x{1f237}|\x{3030}|\x{00ae}|\x{2122}|\x{1f17e})\x{fe0f}?';

    $hashtagRegexp = '/'.

    // Start with a pound sign (or a unicode variant of that)
    '[##]'.

    // capture the entire hashtag
    '('.

    // Note that twitter requires the first character to be a non-numeric one (so #2 would not be a
    // hashtag, for example). ADN DID allow #2 as a hashtag. So I'm supporting the easier/more efficient
    // version (#2 is a valid hashtag). That can be easily changed if desired.

    // Followed by one or more valid hashtag characters...
    '(?:'.
    '\p{L}|'. // Any language letter
    '\p{M}|'. // Any language letter modifier
    $emojiRegexp.'|'. // Any valid emoji
    '\d|'. // Any number
    '_'. // Underscore
    ')+'.

    // end capturing the hashtag
    ')'.

    // use unicode modifiers / unicode strings
    '/u';


    // Load test data scraped from ADN
    $csvFile = fopen('hashtag-test.csv','rb');

    $passed = 0;
    $failed = 0;
    $testNumber = 0;

    while (!feof($csvFile)) {
    $hashtags = fgetcsv($csvFile);

    // Ignore blank lines.
    if (!$hashtags) {
    continue;
    }

    // Post ID is first column, separate it from the hashtags.
    $postId = array_shift($hashtags);

    // Text of the post is the second column, separate it from the hashtags.
    $text = array_shift($hashtags);

    // This is a cheat for now - remove URLs that contain # from the text so we don't get hung up on
    // something that is apparently already handled in the pnut code.
    $textBefore = $text;
    $text = preg_replace('%\b(https?://|ftp://|mailto:)?[a-z0-9\-\.]+\.[a-z]{2,}/[^\s]*#[^\s]+%i','',$text);

    // Convert test hashtags to lower case
    foreach ($hashtags as &$hashtag) {
    $hashtag = mb_strtolower($hashtag);
    unset($hashtag);
    }

    // Run our own hashtag detection on the text to see if it matches what ADN said it should be.
    $pnutHashtags = array();
    if (preg_match_all($hashtagRegexp,$text,$temp)) {
    foreach ($temp[1] as $htag) {
    $pnutHashtags[] = mb_strtolower($htag);
    }
    }

    // Sort them so we can reliably compare them both.
    sort($pnutHashtags);
    sort($hashtags);

    // If we failed, dump to output (should perhaps be stderr).
    if ($hashtags!=$pnutHashtags) {
    print "Failed test number $testNumber\n";
    print " ADN Post: ".number_format($postId)."\n";
    print " Text: ".$text."\n";
    if ($text != $textBefore) {
    print " Orig Text: ".$textBefore."\n";
    }
    print " ADN Hashtags: ".implode(', ',$hashtags)."\n";
    print "PNUT Hashtags: ".implode(', ',$pnutHashtags)."\n\n";
    $failed++;
    }
    else {
    $passed++;
    }

    // Increment, rinse, wipe, repeat.
    $testNumber++;

    }

    // Clean up
    fclose($csvFile);

    // Report
    print "Ran ".number_format($testNumber)." tests.\n";
    print "Passed ".number_format($passed)." (".number_format(($passed/$testNumber)*100,2)."%)\n";
    print "Failed ".number_format($failed)." (".number_format(($failed/$testNumber)*100,2)."%)\n";