-
-
Save abhishekbhardwaj/97fc32e8040e9d5ded354c939dbe3000 to your computer and use it in GitHub Desktop.
Revisions
-
ravisorg revised this gist
Apr 1, 2017 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -13,7 +13,7 @@ // and is the equivilant (because PHP doesn't support these character classes) of: // (?:\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\x{FE0F}?) // Generated using https://gist.github.com/ravisorg/23edafbfcbd45de9875adec5310fca76 $emojiRegexp = '(?:(?:\x{261D}|\x{26F9}|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{1F385}|[\x{1F3C2}-\x{1F3C4}]|\x{1F3C7}|\x{1F3CA}|[\x{1F3CB}-\x{1F3CC}]|[\x{1F442}-\x{1F443}]|[\x{1F446}-\x{1F450}]|[\x{1F466}-\x{1F469}]|\x{1F46E}|[\x{1F470}-\x{1F478}]|\x{1F47C}|[\x{1F481}-\x{1F483}]|[\x{1F485}-\x{1F487}]|\x{1F4AA}|[\x{1F574}-\x{1F575}]|\x{1F57A}|\x{1F590}|[\x{1F595}-\x{1F596}]|[\x{1F645}-\x{1F647}]|[\x{1F64B}-\x{1F64F}]|\x{1F6A3}|[\x{1F6B4}-\x{1F6B6}]|\x{1F6C0}|\x{1F6CC}|\x{1F918}|[\x{1F919}-\x{1F91C}]|\x{1F91E}|\x{1F91F}|\x{1F926}|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F939}]|[\x{1F93D}-\x{1F93E}]|[\x{1F9D1}-\x{1F9DD}])(?:[\x{1F3FB}-\x{1F3FF}])?|(?:[\x{231A}-\x{231B}]|[\x{23E9}-\x{23EC}]|\x{23F0}|\x{23F3}|[\x{25FD}-\x{25FE}]|[\x{2614}-\x{2615}]|[\x{2648}-\x{2653}]|\x{267F}|\x{2693}|\x{26A1}|[\x{26AA}-\x{26AB}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26CE}|\x{26D4}|\x{26EA}|[\x{26F2}-\x{26F3}]|\x{26F5}|\x{26FA}|\x{26FD}|\x{2705}|[\x{270A}-\x{270B}]|\x{2728}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2795}-\x{2797}]|\x{27B0}|\x{27BF}|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{1F004}|\x{1F0CF}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|\x{1F201}|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F236}]|[\x{1F238}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|[\x{1F337}-\x{1F37C}]|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3E0}-\x{1F3F0}]|\x{1F3F4}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F440}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|\x{1F57A}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|\x{1F6CC}|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6EB}-\x{1F6EC}]|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])|(?:\x{0023}|\x{002A}|[\x{0030}-\x{0039}]|\x{00A9}|\x{00AE}|\x{203C}|\x{2049}|\x{2122}|\x{2139}|[\x{2194}-\x{2199}]|[\x{21A9}-\x{21AA}]|[\x{231A}-\x{231B}]|\x{2328}|\x{23CF}|[\x{23E9}-\x{23F3}]|[\x{23F8}-\x{23FA}]|\x{24C2}|[\x{25AA}-\x{25AB}]|\x{25B6}|\x{25C0}|[\x{25FB}-\x{25FE}]|[\x{2600}-\x{2604}]|\x{260E}|\x{2611}|[\x{2614}-\x{2615}]|\x{2618}|\x{261D}|\x{2620}|[\x{2622}-\x{2623}]|\x{2626}|\x{262A}|[\x{262E}-\x{262F}]|[\x{2638}-\x{263A}]|\x{2640}|\x{2642}|[\x{2648}-\x{2653}]|\x{2660}|\x{2663}|[\x{2665}-\x{2666}]|\x{2668}|\x{267B}|\x{267F}|[\x{2692}-\x{2697}]|\x{2699}|[\x{269B}-\x{269C}]|[\x{26A0}-\x{26A1}]|[\x{26AA}-\x{26AB}]|[\x{26B0}-\x{26B1}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26C8}|\x{26CE}|\x{26CF}|\x{26D1}|[\x{26D3}-\x{26D4}]|[\x{26E9}-\x{26EA}]|[\x{26F0}-\x{26F5}]|[\x{26F7}-\x{26FA}]|\x{26FD}|\x{2702}|\x{2705}|[\x{2708}-\x{2709}]|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{270F}|\x{2712}|\x{2714}|\x{2716}|\x{271D}|\x{2721}|\x{2728}|[\x{2733}-\x{2734}]|\x{2744}|\x{2747}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2763}-\x{2764}]|[\x{2795}-\x{2797}]|\x{27A1}|\x{27B0}|\x{27BF}|[\x{2934}-\x{2935}]|[\x{2B05}-\x{2B07}]|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{3030}|\x{303D}|\x{3297}|\x{3299}|\x{1F004}|\x{1F0CF}|[\x{1F170}-\x{1F171}]|\x{1F17E}|\x{1F17F}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|[\x{1F201}-\x{1F202}]|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|\x{1F321}|[\x{1F324}-\x{1F32C}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|\x{1F336}|[\x{1F337}-\x{1F37C}]|\x{1F37D}|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F396}-\x{1F397}]|[\x{1F399}-\x{1F39B}]|[\x{1F39E}-\x{1F39F}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CB}-\x{1F3CE}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3D4}-\x{1F3DF}]|[\x{1F3E0}-\x{1F3F0}]|[\x{1F3F3}-\x{1F3F5}]|\x{1F3F7}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F43F}|\x{1F440}|\x{1F441}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FD}|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F549}-\x{1F54A}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|[\x{1F56F}-\x{1F570}]|[\x{1F573}-\x{1F579}]|\x{1F57A}|\x{1F587}|[\x{1F58A}-\x{1F58D}]|\x{1F590}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|\x{1F5A5}|\x{1F5A8}|[\x{1F5B1}-\x{1F5B2}]|\x{1F5BC}|[\x{1F5C2}-\x{1F5C4}]|[\x{1F5D1}-\x{1F5D3}]|[\x{1F5DC}-\x{1F5DE}]|\x{1F5E1}|\x{1F5E3}|\x{1F5E8}|\x{1F5EF}|\x{1F5F3}|\x{1F5FA}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|[\x{1F6CB}-\x{1F6CF}]|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6E0}-\x{1F6E5}]|\x{1F6E9}|[\x{1F6EB}-\x{1F6EC}]|\x{1F6F0}|\x{1F6F3}|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])\x{FE0F})'; $hashtagRegexp = '/'. -
ravisorg revised this gist
Apr 1, 2017 . No changes.There are no files selected for viewing
-
ravisorg revised this gist
Apr 1, 2017 . 1 changed file with 3 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -37,12 +37,12 @@ '_'. // Underscore ')+'. // Repeat this group at least once (we need at least one character for a hashtag) ')+'. // Which can (optionally) be followed by more numbers '\d*'. // end the actual hashtag capture group ')'. -
ravisorg revised this gist
Apr 1, 2017 . 1 changed file with 20 additions and 16 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -20,26 +20,30 @@ // Start with a pound sign (or a unicode variant of that) '[##]'. // Capture the entire hashtag '('. // Defines a group of (possible) digits and (required) valid non digit characters '(?:'. // We can optionally start with one or more numbers, so long as the number is... '\d*'. // Followed by one or more valid hashtag characters that aren't numbers... '(?:'. '\p{L}|'. // Any language letter '\p{M}|'. // Any language letter modifier $emojiRegexp.'|'. // Any valid emoji '_'. // Underscore ')+'. // Which can (optionally) be followed by more numbers '\d*'. // Repeat this group at least once (we need at least one character for a hashtag) ')+'. // end the actual hashtag capture group ')'. // use unicode modifiers / unicode strings -
ravisorg revised this gist
Apr 1, 2017 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -20,6 +20,9 @@ // Start with a pound sign (or a unicode variant of that) '[##]'. // Make sure the entire hashtag isn't just numbers (eg: #1) '(?!\d+\b)'. // capture the entire hashtag '('. -
ravisorg revised this gist
Mar 30, 2017 . 1 changed file with 2 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -9,9 +9,10 @@ // Emoji are spread throughout the Unicode codespace and can contain modifiers. This regexp // was automatically generated from http://ftp.unicode.org/Public/emoji/1.0/emoji-data.txt // and is the equivilant (because PHP doesn't support these character classes) of: // (?:\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\x{FE0F}?) // Generated using https://gist.github.com/ravisorg/23edafbfcbd45de9875adec5310fca76 $emojiRegexp = '(?:(?:\x{261D}|\x{26F9}|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{1F385}|[\x{1F3C2}-\x{1F3C4}]|\x{1F3C7}|\x{1F3CA}|[\x{1F3CB}-\x{1F3CC}]|[\x{1F442}-\x{1F443}]|[\x{1F446}-\x{1F450}]|[\x{1F466}-\x{1F469}]|\x{1F46E}|[\x{1F470}-\x{1F478}]|\x{1F47C}|[\x{1F481}-\x{1F483}]|[\x{1F485}-\x{1F487}]|\x{1F4AA}|[\x{1F574}-\x{1F575}]|\x{1F57A}|\x{1F590}|[\x{1F595}-\x{1F596}]|[\x{1F645}-\x{1F647}]|[\x{1F64B}-\x{1F64F}]|\x{1F6A3}|[\x{1F6B4}-\x{1F6B6}]|\x{1F6C0}|\x{1F6CC}|\x{1F918}|[\x{1F919}-\x{1F91C}]|\x{1F91E}|\x{1F91F}|\x{1F926}|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F939}]|[\x{1F93D}-\x{1F93E}]|[\x{1F9D1}-\x{1F9DD}])(?:[\x{1F3FB}-\x{1F3FF}])?|(?:[\x{231A}-\x{231B}]|[\x{23E9}-\x{23EC}]|\x{23F0}|\x{23F3}|[\x{25FD}-\x{25FE}]|[\x{2614}-\x{2615}]|[\x{2648}-\x{2653}]|\x{267F}|\x{2693}|\x{26A1}|[\x{26AA}-\x{26AB}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26CE}|\x{26D4}|\x{26EA}|[\x{26F2}-\x{26F3}]|\x{26F5}|\x{26FA}|\x{26FD}|\x{2705}|[\x{270A}-\x{270B}]|\x{2728}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2795}-\x{2797}]|\x{27B0}|\x{27BF}|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{1F004}|\x{1F0CF}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|\x{1F201}|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F236}]|[\x{1F238}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|[\x{1F337}-\x{1F37C}]|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3E0}-\x{1F3F0}]|\x{1F3F4}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F440}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|\x{1F57A}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|\x{1F6CC}|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6EB}-\x{1F6EC}]|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])|(?:\x{0023}|\x{002A}|[\x{0030}-\x{0039}]|\x{00A9}|\x{00AE}|\x{203C}|\x{2049}|\x{2122}|\x{2139}|[\x{2194}-\x{2199}]|[\x{21A9}-\x{21AA}]|[\x{231A}-\x{231B}]|\x{2328}|\x{23CF}|[\x{23E9}-\x{23F3}]|[\x{23F8}-\x{23FA}]|\x{24C2}|[\x{25AA}-\x{25AB}]|\x{25B6}|\x{25C0}|[\x{25FB}-\x{25FE}]|[\x{2600}-\x{2604}]|\x{260E}|\x{2611}|[\x{2614}-\x{2615}]|\x{2618}|\x{261D}|\x{2620}|[\x{2622}-\x{2623}]|\x{2626}|\x{262A}|[\x{262E}-\x{262F}]|[\x{2638}-\x{263A}]|\x{2640}|\x{2642}|[\x{2648}-\x{2653}]|\x{2660}|\x{2663}|[\x{2665}-\x{2666}]|\x{2668}|\x{267B}|\x{267F}|[\x{2692}-\x{2697}]|\x{2699}|[\x{269B}-\x{269C}]|[\x{26A0}-\x{26A1}]|[\x{26AA}-\x{26AB}]|[\x{26B0}-\x{26B1}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26C8}|\x{26CE}|\x{26CF}|\x{26D1}|[\x{26D3}-\x{26D4}]|[\x{26E9}-\x{26EA}]|[\x{26F0}-\x{26F5}]|[\x{26F7}-\x{26FA}]|\x{26FD}|\x{2702}|\x{2705}|[\x{2708}-\x{2709}]|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{270F}|\x{2712}|\x{2714}|\x{2716}|\x{271D}|\x{2721}|\x{2728}|[\x{2733}-\x{2734}]|\x{2744}|\x{2747}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2763}-\x{2764}]|[\x{2795}-\x{2797}]|\x{27A1}|\x{27B0}|\x{27BF}|[\x{2934}-\x{2935}]|[\x{2B05}-\x{2B07}]|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{3030}|\x{303D}|\x{3297}|\x{3299}|\x{1F004}|\x{1F0CF}|[\x{1F170}-\x{1F171}]|\x{1F17E}|\x{1F17F}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|[\x{1F201}-\x{1F202}]|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|\x{1F321}|[\x{1F324}-\x{1F32C}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|\x{1F336}|[\x{1F337}-\x{1F37C}]|\x{1F37D}|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F396}-\x{1F397}]|[\x{1F399}-\x{1F39B}]|[\x{1F39E}-\x{1F39F}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CB}-\x{1F3CE}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3D4}-\x{1F3DF}]|[\x{1F3E0}-\x{1F3F0}]|[\x{1F3F3}-\x{1F3F5}]|\x{1F3F7}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F43F}|\x{1F440}|\x{1F441}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FD}|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F549}-\x{1F54A}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|[\x{1F56F}-\x{1F570}]|[\x{1F573}-\x{1F579}]|\x{1F57A}|\x{1F587}|[\x{1F58A}-\x{1F58D}]|\x{1F590}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|\x{1F5A5}|\x{1F5A8}|[\x{1F5B1}-\x{1F5B2}]|\x{1F5BC}|[\x{1F5C2}-\x{1F5C4}]|[\x{1F5D1}-\x{1F5D3}]|[\x{1F5DC}-\x{1F5DE}]|\x{1F5E1}|\x{1F5E3}|\x{1F5E8}|\x{1F5EF}|\x{1F5F3}|\x{1F5FA}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|[\x{1F6CB}-\x{1F6CF}]|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6E0}-\x{1F6E5}]|\x{1F6E9}|[\x{1F6EB}-\x{1F6EC}]|\x{1F6F0}|\x{1F6F3}|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])\x{FE0F}?)'; $hashtagRegexp = '/'. -
ravisorg revised this gist
Mar 30, 2017 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -9,7 +9,7 @@ // Emoji are spread throughout the Unicode codespace and can contain modifiers. This regexp // was automatically generated from http://ftp.unicode.org/Public/emoji/5.0/emoji-data.txt // and is the equivilant (because PHP doesn't support these character classes) of: // (?:\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\x{FE0F}?) $emojiRegexp = '(?:(?:\x{261D}|\x{26F9}|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{1F385}|[\x{1F3C2}-\x{1F3C4}]|\x{1F3C7}|\x{1F3CA}|[\x{1F3CB}-\x{1F3CC}]|[\x{1F442}-\x{1F443}]|[\x{1F446}-\x{1F450}]|[\x{1F466}-\x{1F469}]|\x{1F46E}|[\x{1F470}-\x{1F478}]|\x{1F47C}|[\x{1F481}-\x{1F483}]|[\x{1F485}-\x{1F487}]|\x{1F4AA}|[\x{1F574}-\x{1F575}]|\x{1F57A}|\x{1F590}|[\x{1F595}-\x{1F596}]|[\x{1F645}-\x{1F647}]|[\x{1F64B}-\x{1F64F}]|\x{1F6A3}|[\x{1F6B4}-\x{1F6B6}]|\x{1F6C0}|\x{1F6CC}|\x{1F918}|[\x{1F919}-\x{1F91C}]|\x{1F91E}|\x{1F91F}|\x{1F926}|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F939}]|[\x{1F93D}-\x{1F93E}]|[\x{1F9D1}-\x{1F9DD}])(?:[\x{1F3FB}-\x{1F3FF}])?|(?:[\x{231A}-\x{231B}]|[\x{23E9}-\x{23EC}]|\x{23F0}|\x{23F3}|[\x{25FD}-\x{25FE}]|[\x{2614}-\x{2615}]|[\x{2648}-\x{2653}]|\x{267F}|\x{2693}|\x{26A1}|[\x{26AA}-\x{26AB}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26CE}|\x{26D4}|\x{26EA}|[\x{26F2}-\x{26F3}]|\x{26F5}|\x{26FA}|\x{26FD}|\x{2705}|[\x{270A}-\x{270B}]|\x{2728}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2795}-\x{2797}]|\x{27B0}|\x{27BF}|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{1F004}|\x{1F0CF}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|\x{1F201}|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F236}]|[\x{1F238}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|[\x{1F337}-\x{1F37C}]|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3E0}-\x{1F3F0}]|\x{1F3F4}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F440}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|\x{1F57A}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|\x{1F6CC}|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6EB}-\x{1F6EC}]|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])|(?:\x{0023}|\x{002A}|[\x{0030}-\x{0039}]|\x{00A9}|\x{00AE}|\x{203C}|\x{2049}|\x{2122}|\x{2139}|[\x{2194}-\x{2199}]|[\x{21A9}-\x{21AA}]|[\x{231A}-\x{231B}]|\x{2328}|\x{23CF}|[\x{23E9}-\x{23F3}]|[\x{23F8}-\x{23FA}]|\x{24C2}|[\x{25AA}-\x{25AB}]|\x{25B6}|\x{25C0}|[\x{25FB}-\x{25FE}]|[\x{2600}-\x{2604}]|\x{260E}|\x{2611}|[\x{2614}-\x{2615}]|\x{2618}|\x{261D}|\x{2620}|[\x{2622}-\x{2623}]|\x{2626}|\x{262A}|[\x{262E}-\x{262F}]|[\x{2638}-\x{263A}]|\x{2640}|\x{2642}|[\x{2648}-\x{2653}]|\x{2660}|\x{2663}|[\x{2665}-\x{2666}]|\x{2668}|\x{267B}|\x{267F}|[\x{2692}-\x{2697}]|\x{2699}|[\x{269B}-\x{269C}]|[\x{26A0}-\x{26A1}]|[\x{26AA}-\x{26AB}]|[\x{26B0}-\x{26B1}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26C8}|\x{26CE}|\x{26CF}|\x{26D1}|[\x{26D3}-\x{26D4}]|[\x{26E9}-\x{26EA}]|[\x{26F0}-\x{26F5}]|[\x{26F7}-\x{26FA}]|\x{26FD}|\x{2702}|\x{2705}|[\x{2708}-\x{2709}]|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{270F}|\x{2712}|\x{2714}|\x{2716}|\x{271D}|\x{2721}|\x{2728}|[\x{2733}-\x{2734}]|\x{2744}|\x{2747}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2763}-\x{2764}]|[\x{2795}-\x{2797}]|\x{27A1}|\x{27B0}|\x{27BF}|[\x{2934}-\x{2935}]|[\x{2B05}-\x{2B07}]|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{3030}|\x{303D}|\x{3297}|\x{3299}|\x{1F004}|\x{1F0CF}|[\x{1F170}-\x{1F171}]|\x{1F17E}|\x{1F17F}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|[\x{1F201}-\x{1F202}]|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|\x{1F321}|[\x{1F324}-\x{1F32C}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|\x{1F336}|[\x{1F337}-\x{1F37C}]|\x{1F37D}|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F396}-\x{1F397}]|[\x{1F399}-\x{1F39B}]|[\x{1F39E}-\x{1F39F}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CB}-\x{1F3CE}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3D4}-\x{1F3DF}]|[\x{1F3E0}-\x{1F3F0}]|[\x{1F3F3}-\x{1F3F5}]|\x{1F3F7}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F43F}|\x{1F440}|\x{1F441}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FD}|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F549}-\x{1F54A}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|[\x{1F56F}-\x{1F570}]|[\x{1F573}-\x{1F579}]|\x{1F57A}|\x{1F587}|[\x{1F58A}-\x{1F58D}]|\x{1F590}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|\x{1F5A5}|\x{1F5A8}|[\x{1F5B1}-\x{1F5B2}]|\x{1F5BC}|[\x{1F5C2}-\x{1F5C4}]|[\x{1F5D1}-\x{1F5D3}]|[\x{1F5DC}-\x{1F5DE}]|\x{1F5E1}|\x{1F5E3}|\x{1F5E8}|\x{1F5EF}|\x{1F5F3}|\x{1F5FA}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|[\x{1F6CB}-\x{1F6CF}]|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6E0}-\x{1F6E5}]|\x{1F6E9}|[\x{1F6EB}-\x{1F6EC}]|\x{1F6F0}|\x{1F6F3}|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])\x{FE0F}?)'; -
ravisorg revised this gist
Mar 30, 2017 . 1 changed file with 4 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,9 +8,10 @@ // For curiosity's sake, post number 693,847 is an emoji hashtag: #(heart) // Emoji are spread throughout the Unicode codespace and can contain modifiers. This regexp // was automatically generated from http://ftp.unicode.org/Public/emoji/1.0/emoji-data.txt // and is the equivilant (because PHP doesn't support these character classes) of: // (?:\p{Emoji_Modifier_Base}\p{Emoji_Modifier}?|\p{Emoji_Presentation}|\p{Emoji}\x{FE0F}?) $emojiRegexp = '(?:(?:\x{261D}|\x{26F9}|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{1F385}|[\x{1F3C2}-\x{1F3C4}]|\x{1F3C7}|\x{1F3CA}|[\x{1F3CB}-\x{1F3CC}]|[\x{1F442}-\x{1F443}]|[\x{1F446}-\x{1F450}]|[\x{1F466}-\x{1F469}]|\x{1F46E}|[\x{1F470}-\x{1F478}]|\x{1F47C}|[\x{1F481}-\x{1F483}]|[\x{1F485}-\x{1F487}]|\x{1F4AA}|[\x{1F574}-\x{1F575}]|\x{1F57A}|\x{1F590}|[\x{1F595}-\x{1F596}]|[\x{1F645}-\x{1F647}]|[\x{1F64B}-\x{1F64F}]|\x{1F6A3}|[\x{1F6B4}-\x{1F6B6}]|\x{1F6C0}|\x{1F6CC}|\x{1F918}|[\x{1F919}-\x{1F91C}]|\x{1F91E}|\x{1F91F}|\x{1F926}|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F939}]|[\x{1F93D}-\x{1F93E}]|[\x{1F9D1}-\x{1F9DD}])(?:[\x{1F3FB}-\x{1F3FF}])?|(?:[\x{231A}-\x{231B}]|[\x{23E9}-\x{23EC}]|\x{23F0}|\x{23F3}|[\x{25FD}-\x{25FE}]|[\x{2614}-\x{2615}]|[\x{2648}-\x{2653}]|\x{267F}|\x{2693}|\x{26A1}|[\x{26AA}-\x{26AB}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26CE}|\x{26D4}|\x{26EA}|[\x{26F2}-\x{26F3}]|\x{26F5}|\x{26FA}|\x{26FD}|\x{2705}|[\x{270A}-\x{270B}]|\x{2728}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2795}-\x{2797}]|\x{27B0}|\x{27BF}|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{1F004}|\x{1F0CF}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|\x{1F201}|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F236}]|[\x{1F238}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|[\x{1F337}-\x{1F37C}]|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3E0}-\x{1F3F0}]|\x{1F3F4}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F440}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|\x{1F57A}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|\x{1F6CC}|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6EB}-\x{1F6EC}]|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])|(?:\x{0023}|\x{002A}|[\x{0030}-\x{0039}]|\x{00A9}|\x{00AE}|\x{203C}|\x{2049}|\x{2122}|\x{2139}|[\x{2194}-\x{2199}]|[\x{21A9}-\x{21AA}]|[\x{231A}-\x{231B}]|\x{2328}|\x{23CF}|[\x{23E9}-\x{23F3}]|[\x{23F8}-\x{23FA}]|\x{24C2}|[\x{25AA}-\x{25AB}]|\x{25B6}|\x{25C0}|[\x{25FB}-\x{25FE}]|[\x{2600}-\x{2604}]|\x{260E}|\x{2611}|[\x{2614}-\x{2615}]|\x{2618}|\x{261D}|\x{2620}|[\x{2622}-\x{2623}]|\x{2626}|\x{262A}|[\x{262E}-\x{262F}]|[\x{2638}-\x{263A}]|\x{2640}|\x{2642}|[\x{2648}-\x{2653}]|\x{2660}|\x{2663}|[\x{2665}-\x{2666}]|\x{2668}|\x{267B}|\x{267F}|[\x{2692}-\x{2697}]|\x{2699}|[\x{269B}-\x{269C}]|[\x{26A0}-\x{26A1}]|[\x{26AA}-\x{26AB}]|[\x{26B0}-\x{26B1}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26C8}|\x{26CE}|\x{26CF}|\x{26D1}|[\x{26D3}-\x{26D4}]|[\x{26E9}-\x{26EA}]|[\x{26F0}-\x{26F5}]|[\x{26F7}-\x{26FA}]|\x{26FD}|\x{2702}|\x{2705}|[\x{2708}-\x{2709}]|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{270F}|\x{2712}|\x{2714}|\x{2716}|\x{271D}|\x{2721}|\x{2728}|[\x{2733}-\x{2734}]|\x{2744}|\x{2747}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2763}-\x{2764}]|[\x{2795}-\x{2797}]|\x{27A1}|\x{27B0}|\x{27BF}|[\x{2934}-\x{2935}]|[\x{2B05}-\x{2B07}]|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{3030}|\x{303D}|\x{3297}|\x{3299}|\x{1F004}|\x{1F0CF}|[\x{1F170}-\x{1F171}]|\x{1F17E}|\x{1F17F}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|[\x{1F201}-\x{1F202}]|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|\x{1F321}|[\x{1F324}-\x{1F32C}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|\x{1F336}|[\x{1F337}-\x{1F37C}]|\x{1F37D}|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F396}-\x{1F397}]|[\x{1F399}-\x{1F39B}]|[\x{1F39E}-\x{1F39F}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CB}-\x{1F3CE}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3D4}-\x{1F3DF}]|[\x{1F3E0}-\x{1F3F0}]|[\x{1F3F3}-\x{1F3F5}]|\x{1F3F7}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F43F}|\x{1F440}|\x{1F441}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FD}|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F549}-\x{1F54A}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|[\x{1F56F}-\x{1F570}]|[\x{1F573}-\x{1F579}]|\x{1F57A}|\x{1F587}|[\x{1F58A}-\x{1F58D}]|\x{1F590}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|\x{1F5A5}|\x{1F5A8}|[\x{1F5B1}-\x{1F5B2}]|\x{1F5BC}|[\x{1F5C2}-\x{1F5C4}]|[\x{1F5D1}-\x{1F5D3}]|[\x{1F5DC}-\x{1F5DE}]|\x{1F5E1}|\x{1F5E3}|\x{1F5E8}|\x{1F5EF}|\x{1F5F3}|\x{1F5FA}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|[\x{1F6CB}-\x{1F6CF}]|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6E0}-\x{1F6E5}]|\x{1F6E9}|[\x{1F6EB}-\x{1F6EC}]|\x{1F6F0}|\x{1F6F3}|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])\x{FE0F}?)'; $hashtagRegexp = '/'. -
ravisorg revised this gist
Mar 30, 2017 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -11,7 +11,7 @@ // Emoji are spread throughout the Unicode codespace and can contain modifiers. This is one of // those "don't reinvent the wheel" type things, and was appropriated / liberated from // https://github.com/gmac/gemoji-parser/blob/master/output/rx_unicode.rb $emojiRegexp = '(?:(?:\x{261D}|\x{26F9}|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{1F385}|[\x{1F3C2}-\x{1F3C4}]|\x{1F3C7}|\x{1F3CA}|[\x{1F3CB}-\x{1F3CC}]|[\x{1F442}-\x{1F443}]|[\x{1F446}-\x{1F450}]|[\x{1F466}-\x{1F469}]|\x{1F46E}|[\x{1F470}-\x{1F478}]|\x{1F47C}|[\x{1F481}-\x{1F483}]|[\x{1F485}-\x{1F487}]|\x{1F4AA}|[\x{1F574}-\x{1F575}]|\x{1F57A}|\x{1F590}|[\x{1F595}-\x{1F596}]|[\x{1F645}-\x{1F647}]|[\x{1F64B}-\x{1F64F}]|\x{1F6A3}|[\x{1F6B4}-\x{1F6B6}]|\x{1F6C0}|\x{1F6CC}|\x{1F918}|[\x{1F919}-\x{1F91C}]|\x{1F91E}|\x{1F91F}|\x{1F926}|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F939}]|[\x{1F93D}-\x{1F93E}]|[\x{1F9D1}-\x{1F9DD}])(?:[\x{1F3FB}-\x{1F3FF}])?|(?:[\x{231A}-\x{231B}]|[\x{23E9}-\x{23EC}]|\x{23F0}|\x{23F3}|[\x{25FD}-\x{25FE}]|[\x{2614}-\x{2615}]|[\x{2648}-\x{2653}]|\x{267F}|\x{2693}|\x{26A1}|[\x{26AA}-\x{26AB}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26CE}|\x{26D4}|\x{26EA}|[\x{26F2}-\x{26F3}]|\x{26F5}|\x{26FA}|\x{26FD}|\x{2705}|[\x{270A}-\x{270B}]|\x{2728}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2795}-\x{2797}]|\x{27B0}|\x{27BF}|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{1F004}|\x{1F0CF}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|\x{1F201}|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F236}]|[\x{1F238}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|[\x{1F337}-\x{1F37C}]|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3E0}-\x{1F3F0}]|\x{1F3F4}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F440}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|\x{1F57A}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|\x{1F6CC}|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6EB}-\x{1F6EC}]|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])|(?:\x{0023}|\x{002A}|[\x{0030}-\x{0039}]|\x{00A9}|\x{00AE}|\x{203C}|\x{2049}|\x{2122}|\x{2139}|[\x{2194}-\x{2199}]|[\x{21A9}-\x{21AA}]|[\x{231A}-\x{231B}]|\x{2328}|\x{23CF}|[\x{23E9}-\x{23F3}]|[\x{23F8}-\x{23FA}]|\x{24C2}|[\x{25AA}-\x{25AB}]|\x{25B6}|\x{25C0}|[\x{25FB}-\x{25FE}]|[\x{2600}-\x{2604}]|\x{260E}|\x{2611}|[\x{2614}-\x{2615}]|\x{2618}|\x{261D}|\x{2620}|[\x{2622}-\x{2623}]|\x{2626}|\x{262A}|[\x{262E}-\x{262F}]|[\x{2638}-\x{263A}]|\x{2640}|\x{2642}|[\x{2648}-\x{2653}]|\x{2660}|\x{2663}|[\x{2665}-\x{2666}]|\x{2668}|\x{267B}|\x{267F}|[\x{2692}-\x{2697}]|\x{2699}|[\x{269B}-\x{269C}]|[\x{26A0}-\x{26A1}]|[\x{26AA}-\x{26AB}]|[\x{26B0}-\x{26B1}]|[\x{26BD}-\x{26BE}]|[\x{26C4}-\x{26C5}]|\x{26C8}|\x{26CE}|\x{26CF}|\x{26D1}|[\x{26D3}-\x{26D4}]|[\x{26E9}-\x{26EA}]|[\x{26F0}-\x{26F5}]|[\x{26F7}-\x{26FA}]|\x{26FD}|\x{2702}|\x{2705}|[\x{2708}-\x{2709}]|[\x{270A}-\x{270B}]|[\x{270C}-\x{270D}]|\x{270F}|\x{2712}|\x{2714}|\x{2716}|\x{271D}|\x{2721}|\x{2728}|[\x{2733}-\x{2734}]|\x{2744}|\x{2747}|\x{274C}|\x{274E}|[\x{2753}-\x{2755}]|\x{2757}|[\x{2763}-\x{2764}]|[\x{2795}-\x{2797}]|\x{27A1}|\x{27B0}|\x{27BF}|[\x{2934}-\x{2935}]|[\x{2B05}-\x{2B07}]|[\x{2B1B}-\x{2B1C}]|\x{2B50}|\x{2B55}|\x{3030}|\x{303D}|\x{3297}|\x{3299}|\x{1F004}|\x{1F0CF}|[\x{1F170}-\x{1F171}]|\x{1F17E}|\x{1F17F}|\x{1F18E}|[\x{1F191}-\x{1F19A}]|[\x{1F1E6}-\x{1F1FF}]|[\x{1F201}-\x{1F202}]|\x{1F21A}|\x{1F22F}|[\x{1F232}-\x{1F23A}]|[\x{1F250}-\x{1F251}]|[\x{1F300}-\x{1F320}]|\x{1F321}|[\x{1F324}-\x{1F32C}]|[\x{1F32D}-\x{1F32F}]|[\x{1F330}-\x{1F335}]|\x{1F336}|[\x{1F337}-\x{1F37C}]|\x{1F37D}|[\x{1F37E}-\x{1F37F}]|[\x{1F380}-\x{1F393}]|[\x{1F396}-\x{1F397}]|[\x{1F399}-\x{1F39B}]|[\x{1F39E}-\x{1F39F}]|[\x{1F3A0}-\x{1F3C4}]|\x{1F3C5}|[\x{1F3C6}-\x{1F3CA}]|[\x{1F3CB}-\x{1F3CE}]|[\x{1F3CF}-\x{1F3D3}]|[\x{1F3D4}-\x{1F3DF}]|[\x{1F3E0}-\x{1F3F0}]|[\x{1F3F3}-\x{1F3F5}]|\x{1F3F7}|[\x{1F3F8}-\x{1F3FF}]|[\x{1F400}-\x{1F43E}]|\x{1F43F}|\x{1F440}|\x{1F441}|[\x{1F442}-\x{1F4F7}]|\x{1F4F8}|[\x{1F4F9}-\x{1F4FC}]|\x{1F4FD}|\x{1F4FF}|[\x{1F500}-\x{1F53D}]|[\x{1F549}-\x{1F54A}]|[\x{1F54B}-\x{1F54E}]|[\x{1F550}-\x{1F567}]|[\x{1F56F}-\x{1F570}]|[\x{1F573}-\x{1F579}]|\x{1F57A}|\x{1F587}|[\x{1F58A}-\x{1F58D}]|\x{1F590}|[\x{1F595}-\x{1F596}]|\x{1F5A4}|\x{1F5A5}|\x{1F5A8}|[\x{1F5B1}-\x{1F5B2}]|\x{1F5BC}|[\x{1F5C2}-\x{1F5C4}]|[\x{1F5D1}-\x{1F5D3}]|[\x{1F5DC}-\x{1F5DE}]|\x{1F5E1}|\x{1F5E3}|\x{1F5E8}|\x{1F5EF}|\x{1F5F3}|\x{1F5FA}|[\x{1F5FB}-\x{1F5FF}]|\x{1F600}|[\x{1F601}-\x{1F610}]|\x{1F611}|[\x{1F612}-\x{1F614}]|\x{1F615}|\x{1F616}|\x{1F617}|\x{1F618}|\x{1F619}|\x{1F61A}|\x{1F61B}|[\x{1F61C}-\x{1F61E}]|\x{1F61F}|[\x{1F620}-\x{1F625}]|[\x{1F626}-\x{1F627}]|[\x{1F628}-\x{1F62B}]|\x{1F62C}|\x{1F62D}|[\x{1F62E}-\x{1F62F}]|[\x{1F630}-\x{1F633}]|\x{1F634}|[\x{1F635}-\x{1F640}]|[\x{1F641}-\x{1F642}]|[\x{1F643}-\x{1F644}]|[\x{1F645}-\x{1F64F}]|[\x{1F680}-\x{1F6C5}]|[\x{1F6CB}-\x{1F6CF}]|\x{1F6D0}|[\x{1F6D1}-\x{1F6D2}]|[\x{1F6E0}-\x{1F6E5}]|\x{1F6E9}|[\x{1F6EB}-\x{1F6EC}]|\x{1F6F0}|\x{1F6F3}|[\x{1F6F4}-\x{1F6F6}]|[\x{1F6F7}-\x{1F6F8}]|[\x{1F910}-\x{1F918}]|[\x{1F919}-\x{1F91E}]|\x{1F91F}|[\x{1F920}-\x{1F927}]|[\x{1F928}-\x{1F92F}]|\x{1F930}|[\x{1F931}-\x{1F932}]|[\x{1F933}-\x{1F93A}]|[\x{1F93C}-\x{1F93E}]|[\x{1F940}-\x{1F945}]|[\x{1F947}-\x{1F94B}]|\x{1F94C}|[\x{1F950}-\x{1F95E}]|[\x{1F95F}-\x{1F96B}]|[\x{1F980}-\x{1F984}]|[\x{1F985}-\x{1F991}]|[\x{1F992}-\x{1F997}]|\x{1F9C0}|[\x{1F9D0}-\x{1F9E6}])\x{FE0F}?)'; $hashtagRegexp = '/'. -
ravisorg revised this gist
Mar 26, 2017 . 1 changed file with 2 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,7 @@ <?php // You can find the test data file at https://www.ravis.org/hashtag-test.zip // You're gonna want to have your console output supporting UTF8 before running this, or you're // gonna see a bunch of ? in the output... -
ravisorg created this gist
Mar 26, 2017 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,113 @@ <?php // You're gonna want to have your console output supporting UTF8 before running this, or you're // gonna see a bunch of ? in the output... // For curiosity's sake, post number 693,847 is an emoji hashtag: #(heart) // Emoji are spread throughout the Unicode codespace and can contain modifiers. This is one of // those "don't reinvent the wheel" type things, and was appropriated / liberated from // https://github.com/gmac/gemoji-parser/blob/master/output/rx_unicode.rb $emojiRegexp = '(?:\x{1f602}|\x{2665}|\x{2764}|\x{1f60d}|\x{1f612}|\x{1f60a}|\x{1f62d}|\x{1f44c}|\x{263a}|\x{1f618}|\x{1f495}|\x{1f629}|\x{1f614}|\x{1f60f}|\x{1f601}|\x{1f633}|\x{1f64f}|\x{1f64c}|\x{1f44d}|\x{270c}|\x{1f609}|\x{1f60c}|\x{1f648}|\x{1f481}|\x{1f60e}|\x{1f3b6}|\x{1f440}|\x{1f604}|\x{1f634}|\x{1f611}|\x{1f61c}|\x{1f622}|\x{1f61e}|\x{1f4af}|\x{1f496}|\x{1f60b}|\x{1f44f}|\x{1f49c}|\x{2728}|\x{1f615}|\x{1f64a}|\x{270b}|\x{1f499}|\x{1f610}|\x{1f605}|\x{1f494}|\x{1f62a}|\x{1f497}|\x{1f48b}|\x{1f49e}|\x{1f631}|\x{1f498}|\x{1f621}|\x{1f62b}|\x{2600}|\x{1f44a}|\x{1f603}|\x{1f338}|\x{1f608}|\x{1f61d}|\x{1f624}|\x{1f44b}|\x{1f339}|\x{2714}|\x{1f4aa}|\x{1f389}|\x{1f637}|\x{1f49b}|\x{1f623}|\x{1f49a}|\x{1f63b}|\x{1f606}|\x{1f449}|\x{1f613}|\x{1f616}|\x{1f480}|\x{1f61a}|\x{1f31a}|\x{1f52b}|\x{1f525}|\x{1f64b}|\x{1f620}|\x{1f645}|\x{1f625}|\x{1f62c}|\x{270a}|\x{1f3a7}|\x{1f44e}|\x{1f483}|\x{1f61b}|\x{1f451}|\x{1f493}|\x{1f448}|\x{1f600}|\x{1f636}|\x{1f31f}|\x{25b6}|\x{1f427}|\x{1f4a9}|\x{2611}|\x{1f6b6}|\x{1f630}|\x{1f4ab}|\x{1f3b5}|\x{1f355}|\x{1f646}|\x{1f48e}|\x{1f4a5}|\x{1f450}|\x{2708}|\x{1f4a4}|\x{1f607}|\x{1f647}|\x{1f31e}|\x{1f47d}|\x{1f4ad}|\x{274c}|\x{1f485}|\x{1f380}|\x{1f47c}|\x{1f639}|\x{27a1}|\x{1f340}|\x{1f46f}|\x{1f445}|\x{2744}|\x{1f61f}|\x{1f30d}|\x{1f447}|\x{1f628}|\x{2601}|\x{1f33a}|\x{25c0}|\x{261d}|\x{2663}|\x{2716}|\x{1f4a6}|\x{1f388}|\x{2757}|\x{1f343}|\x{2b50}|\x{1f381}|\x{1f3c3}|\x{1f30e}|\x{2705}|\x{1f3a4}|\x{1f384}|\x{1f64d}|\x{1f319}|\x{1f49d}|\x{1f46b}|\x{2668}|\x{1f649}|\x{1f534}|\x{1f436}|\x{1f446}|\x{1f48f}|\x{1f635}|\x{1f33f}|\x{1f334}|\x{1f47b}|\x{26bd}|\x{1f37b}|\x{1f33b}|\x{1f4b0}|\x{1f3c0}|\x{203c}|\x{1f619}|\x{1f46d}|\x{1f486}|\x{1f300}|\x{1f478}|\x{1f62f}|\x{1f4a8}|\x{1f490}|\x{1f62e}|\x{1f632}|\x{1f48d}|\x{1f335}|\x{1f385}|\x{1f31d}|\x{1f431}|\x{1f63f}|\x{1f627}|\x{1f382}|\x{2615}|\x{1f49f}|\x{1f337}|\x{26c4}|\x{1f52a}|\x{1f3a5}|\x{1f4d5}|\x{1f308}|\x{1f38a}|\x{260e}|\x{1f4b8}|\x{1f3b8}|\x{1f30f}|\x{2b55}|\x{1f1fa}\x{1f1f8}|\x{1f3c8}|\x{2614}|\x{1f4a3}|\x{26a1}|\x{1f346}|\x{1f47f}|\x{2b05}|\x{1f626}|\x{2666}|\x{1f63c}|\x{1f491}|\x{1f47e}|\x{1f63d}|\x{1f476}|\x{1f638}|\x{2660}|\x{0031}\x{fe0f}?\x{20e3}|\x{1f680}|\x{1f34c}|\x{1f4f7}|\x{1f341}|\x{1f437}|\x{1f48c}|\x{25aa}|\x{1f640}|\x{1f43c}|\x{1f33c}|\x{26c5}|\x{1f4bf}|\x{1f30a}|\x{1f444}|\x{1f374}|\x{1f3ae}|\x{0032}\x{fe0f}?\x{20e3}|\x{1f42f}|\x{1f463}|\x{1f369}|\x{1f6ac}|\x{1f422}|\x{1f35f}|\x{1f52e}|\x{1f4a2}|\x{1f482}|\x{1f37a}|\x{1f3bc}|\x{1f4f1}|\x{1f197}|\x{1f379}|\x{1f3c6}|\x{1f51e}|\x{1f342}|\x{1f697}|\x{1f354}|\x{1f36d}|\x{1f617}|\x{1f349}|\x{1f377}|\x{1f468}|\x{1f4f2}|\x{1f370}|\x{1f36a}|\x{1f6a8}|\x{1f366}|\x{1f383}|\x{1f51d}|\x{1f43b}|\x{1f36b}|\x{1f489}|\x{1f438}|\x{1f42c}|\x{267b}|\x{1f63a}|\x{0033}\x{fe0f}?\x{20e3}|\x{1f479}|\x{1f41d}|\x{1f433}|\x{1f64e}|\x{26a0}|\x{1f6ab}|\x{1f30c}|\x{1f412}|\x{1f487}|\x{1f423}|\x{1f430}|\x{1f378}|\x{1f63e}|\x{1f4b5}|\x{1f40d}|\x{1f3ca}|\x{1f43e}|\x{1f192}|\x{1f353}|\x{1f418}|\x{1f4fa}|\x{1f351}|\x{1f469}|\x{1f4da}|\x{1f33e}|\x{1f390}|\x{1f311}|\x{1f315}|\x{1f3ac}|\x{2702}|\x{1f50a}|\x{1f46a}|\x{1f352}|\x{1f4bb}|\x{1f198}|\x{1f425}|\x{1f475}|\x{1f1eb}\x{1f1f7}|\x{0034}\x{fe0f}?\x{20e3}|\x{26be}|\x{2709}|\x{2753}|\x{1f34d}|\x{1f467}|\x{2b07}|\x{1f473}|\x{1f419}|\x{1f4e2}|\x{1f4ac}|\x{1f4dd}|\x{1f460}|\x{1f698}|\x{1f484}|\x{1f1ee}\x{1f1f9}|\x{2693}|\x{1f357}|\x{1f386}|\x{1f3e1}|\x{1f466}|\x{1f393}|\x{270f}|\x{1f459}|\x{1f519}|\x{1f4d6}|\x{1f443}|\x{1f4de}|\x{1f510}|\x{1f45f}|\x{1f46c}|\x{1f371}|\x{1f239}|\x{1f453}|\x{1f31b}|\x{1f48a}|\x{1f50b}|\x{1f34e}|\x{1f6bf}|\x{1f442}|\x{0035}\x{fe0f}?\x{20e3}|\x{26aa}|\x{1f320}|\x{1f535}|\x{2197}|\x{1f35d}|\x{1f305}|\x{1f470}|\x{1f313}|\x{1f6c5}|\x{1f428}|\x{1f312}|\x{1f474}|\x{1f435}|\x{1f347}|\x{1f420}|\x{1f314}|\x{1f4a7}|\x{26ab}|\x{1f36c}|\x{1f4a1}|\x{1f1ec}\x{1f1e7}|\x{1f317}|\x{1f41f}|\x{1f365}|\x{1f318}|\x{1f316}|\x{1f35c}|\x{1f31c}|\x{1f43d}|\x{2199}|\x{1f434}|\x{1f303}|\x{1f3a8}|\x{1f3e0}|\x{2198}|\x{1f3ea}|\x{1f40b}|\x{1f539}|\x{2733}|\x{1f344}|\x{1f457}|\x{1f5ff}|\x{1f414}|\x{1f34a}|\x{1f331}|\x{1f472}|\x{1f3c4}|\x{1f6ae}|\x{1f367}|\x{1f699}|\x{1f332}|\x{1f333}|\x{1f3a3}|\x{231a}|\x{1f42d}|\x{1f42e}|\x{1f373}|\x{1f195}|\x{2196}|\x{1f3a9}|\x{26fd}|\x{1f426}|\x{1f46e}|\x{1f3ad}|\x{1f43a}|\x{1f47a}|\x{1f6b2}|\x{1f411}|\x{1f35e}|\x{1f364}|\x{1f511}|\x{1f6c0}|\x{2755}|\x{1f37c}|\x{1f471}|\x{1f387}|\x{1f34b}|\x{23f0}|\x{2194}|\x{1f506}|\x{1f518}|\x{23e9}|\x{1f375}|\x{1f193}|\x{2b06}|\x{0036}\x{fe0f}?\x{20e3}|\x{1f199}|\x{1f3be}|\x{1f40e}|\x{24c2}|\x{1f1e9}\x{1f1ea}|\x{21aa}|\x{1f3c1}|\x{1f424}|\x{2195}|\x{1f528}|\x{0037}\x{fe0f}?\x{20e3}|\x{26f3}|\x{1f512}|\x{1f50c}|\x{2049}|\x{1f1ea}\x{1f1f8}|\x{1f363}|\x{1f41b}|\x{1f503}|\x{0030}\x{fe0f}?\x{20e3}|\x{1f35a}|\x{1f372}|\x{1f456}|\x{1f3c2}|\x{1f439}|\x{1f34f}|\x{1f51c}|\x{27b0}|\x{1f531}|\x{1f492}|\x{1f41e}|\x{1f304}|\x{1f3af}|\x{2734}|\x{264b}|\x{1f41a}|\x{1f356}|\x{1f4a0}|\x{21a9}|\x{1f33d}|\x{1f40a}|\x{2935}|\x{1f464}|\x{1f454}|\x{1f40c}|\x{2747}|\x{1f368}|\x{1f413}|\x{1f3e5}|\x{1f5fd}|\x{1f429}|\x{2712}|\x{1f38e}|\x{1f4e3}|\x{1f6a9}|\x{1f41c}|\x{1f538}|\x{1f4b2}|\x{1f465}|\x{1f307}|\x{1f4ae}|\x{1f36f}|\x{1f6a3}|\x{1f35b}|\x{1f694}|\x{1f513}|\x{1f53a}|\x{1f6aa}|\x{1f477}|\x{1f36e}|\x{2648}|\x{1f3b1}|\x{1f504}|\x{1f3eb}|\x{2795}|\x{264f}|\x{1f45c}|\x{1f345}|\x{1f42a}|\x{2652}|\x{1f6bc}|\x{1f6bd}|\x{3299}|\x{26d4}|\x{1f537}|\x{3297}|\x{1f3b9}|\x{1f455}|\x{1f408}|\x{1f3e2}|\x{1f682}|\x{1f409}|\x{1f532}|\x{1f6ba}|\x{1f68c}|\x{1f517}|\x{1f536}|\x{1f42b}|\x{1f410}|\x{1f4c0}|\x{1f306}|\x{26f5}|\x{1f3b3}|\x{1f30b}|\x{264c}|\x{1f501}|\x{1f3e9}|\x{1f330}|\x{1f45e}|\x{1f194}|\x{1f4b3}|\x{1f3b2}|\x{1f53b}|\x{1f4cd}|\x{1f3a2}|\x{2650}|\x{1f415}|\x{1f452}|\x{2651}|\x{1f488}|\x{1f432}|\x{1f407}|\x{0038}\x{fe0f}?\x{20e3}|\x{264a}|\x{1f4f9}|\x{1f404}|\x{303d}|\x{0039}\x{fe0f}?\x{20e3}|\x{1f5fb}|\x{1f302}|\x{264d}|\x{1f1ef}\x{1f1f5}|\x{2649}|\x{1f693}|\x{1f6b4}|\x{2796}|\x{25fe}|\x{1f350}|\x{1f4ba}|\x{1f40f}|\x{264e}|\x{1f405}|\x{2653}|\x{1f696}|\x{1f514}|\x{1f361}|\x{1f45b}|\x{1f309}|\x{1f417}|\x{1f4b7}|\x{1f310}|\x{1f4cc}|\x{1f3e4}|\x{2139}|\x{1f4e6}|\x{2754}|\x{1f6a2}|\x{26ea}|\x{1f566}|\x{1f0cf}|\x{23f3}|\x{1f3c9}|\x{274e}|\x{1f38d}|\x{1f4f4}|\x{1f301}|\x{1f400}|\x{1f6a6}|\x{1f392}|\x{1f416}|\x{1f462}|\x{1f461}|\x{0023}\x{fe0f}?\x{20e3}|\x{1f3c7}|\x{1f1f7}\x{1f1fa}|\x{1f4d3}|\x{1f5fc}|\x{1f38b}|\x{1f3bf}|\x{1f4fb}|\x{1f3bb}|\x{1f3a1}|\x{1f3b7}|\x{1f51f}|\x{1f4e9}|\x{1f4d2}|\x{1f4bd}|\x{1f68d}|\x{23ea}|\x{1f691}|\x{1f3ec}|\x{1f362}|\x{231b}|\x{1f402}|\x{1f38f}|\x{1f421}|\x{1f695}|\x{25fc}|\x{1f51b}|\x{26fa}|\x{1f530}|\x{1f3ba}|\x{1f348}|\x{1f6b9}|\x{1f4b4}|\x{1f4bc}|\x{1f681}|\x{25ab}|\x{1f406}|\x{1f52f}|\x{1f505}|\x{1f45a}|\x{1f4f6}|\x{1f3a0}|\x{1f4b6}|\x{1f3f0}|\x{1f533}|\x{1f38c}|\x{1f6a4}|\x{1f19a}|\x{1f4c5}|\x{1f359}|\x{1f51a}|\x{1f55b}|\x{1f526}|\x{2b1c}|\x{1f692}|\x{1f3bd}|\x{1f360}|\x{1f3aa}|\x{1f4d8}|\x{1f6be}|\x{25fb}|\x{1f250}|\x{1f4db}|\x{2934}|\x{1f550}|\x{1f4f0}|\x{1f22f}|\x{1f403}|\x{1f551}|\x{1f52d}|\x{1f196}|\x{1f529}|\x{1f3e7}|\x{1f6a7}|\x{1f552}|\x{1f50d}|\x{1f50e}|\x{1f69a}|\x{1f4c6}|\x{1f4d7}|\x{1f6ad}|\x{1f458}|\x{1f401}|\x{1f391}|\x{1f683}|\x{1f4e1}|\x{1f233}|\x{1f507}|\x{1f376}|\x{1f1f0}\x{1f1f7}|\x{1f3e6}|\x{1f4fc}|\x{1f3e8}|\x{1f3b0}|\x{1f3a6}|\x{1f69c}|\x{1f687}|\x{1f3ee}|\x{1f4ee}|\x{1f689}|\x{267f}|\x{1f4dc}|\x{1f4ec}|\x{26f2}|\x{1f4c8}|\x{1f527}|\x{1f684}|\x{1f4f3}|\x{1f68a}|\x{23ec}|\x{1f553}|\x{2b1b}|\x{1f23a}|\x{1f4ce}|\x{1f358}|\x{1f53d}|\x{1f236}|\x{1f238}|\x{1f18e}|\x{1f6c1}|\x{1f21a}|\x{1f6b7}|\x{1f53c}|\x{1f4e7}|\x{1f4f5}|\x{1f4cb}|\x{1f69b}|\x{1f3b4}|\x{1f4c9}|\x{1f554}|\x{1f6a5}|\x{1f4c4}|\x{1f45d}|\x{1f558}|\x{1f1e8}\x{1f1f3}|\x{25fd}|\x{1f502}|\x{1f191}|\x{1f232}|\x{1f235}|\x{1f690}|\x{1f559}|\x{1f3ef}|\x{1f522}|\x{1f557}|\x{1f685}|\x{1f004}|\x{1f3ab}|\x{1f17f}|\x{1f515}|\x{1f52c}|\x{1f4e5}|\x{1f4c3}|\x{1f4d1}|\x{1f3ed}|\x{1f234}|\x{1f4d9}|\x{1f55a}|\x{1f4b1}|\x{1f516}|\x{1f4d4}|\x{1f6b5}|\x{1f5fe}|\x{1f4ca}|\x{1f6bb}|\x{1f3e3}|\x{1f6b8}|\x{27bf}|\x{1f556}|\x{1f4be}|\x{1f4e0}|\x{1f4b9}|\x{1f4d0}|\x{1f4df}|\x{1f555}|\x{1f251}|\x{1f68e}|\x{1f686}|\x{1f4cf}|\x{1f6b0}|\x{1f4e8}|\x{1f55c}|\x{1f4ef}|\x{1f509}|\x{1f50f}|\x{1f4eb}|\x{1f68f}|\x{1f4c7}|\x{1f524}|\x{1f564}|\x{1f55d}|\x{1f68b}|\x{1f508}|\x{1f688}|\x{1f565}|\x{1f567}|\x{1f4c2}|\x{1f69d}|\x{1f4c1}|\x{1f500}|\x{1f520}|\x{1f523}|\x{1f4e4}|\x{1f55f}|\x{1f4ea}|\x{26ce}|\x{1f55e}|\x{1f562}|\x{23eb}|\x{1f521}|\x{1f69e}|\x{1f560}|\x{1f4ed}|\x{2797}|\x{1f201}|\x{1f6a1}|\x{1f563}|\x{1f561}|\x{1f6af}|\x{1f6a0}|\x{1f6b1}|\x{1f6b3}|\x{1f69f}|\x{1f6c2}|\x{1f6c3}|\x{1f6c4}|\x{1f202}|\x{1f170}|\x{00a9}|\x{1f171}|\x{1f237}|\x{3030}|\x{00ae}|\x{2122}|\x{1f17e})\x{fe0f}?'; $hashtagRegexp = '/'. // Start with a pound sign (or a unicode variant of that) '[##]'. // capture the entire hashtag '('. // Note that twitter requires the first character to be a non-numeric one (so #2 would not be a // hashtag, for example). ADN DID allow #2 as a hashtag. So I'm supporting the easier/more efficient // version (#2 is a valid hashtag). That can be easily changed if desired. // Followed by one or more valid hashtag characters... '(?:'. '\p{L}|'. // Any language letter '\p{M}|'. // Any language letter modifier $emojiRegexp.'|'. // Any valid emoji '\d|'. // Any number '_'. // Underscore ')+'. // end capturing the hashtag ')'. // use unicode modifiers / unicode strings '/u'; // Load test data scraped from ADN $csvFile = fopen('hashtag-test.csv','rb'); $passed = 0; $failed = 0; $testNumber = 0; while (!feof($csvFile)) { $hashtags = fgetcsv($csvFile); // Ignore blank lines. if (!$hashtags) { continue; } // Post ID is first column, separate it from the hashtags. $postId = array_shift($hashtags); // Text of the post is the second column, separate it from the hashtags. $text = array_shift($hashtags); // This is a cheat for now - remove URLs that contain # from the text so we don't get hung up on // something that is apparently already handled in the pnut code. $textBefore = $text; $text = preg_replace('%\b(https?://|ftp://|mailto:)?[a-z0-9\-\.]+\.[a-z]{2,}/[^\s]*#[^\s]+%i','',$text); // Convert test hashtags to lower case foreach ($hashtags as &$hashtag) { $hashtag = mb_strtolower($hashtag); unset($hashtag); } // Run our own hashtag detection on the text to see if it matches what ADN said it should be. $pnutHashtags = array(); if (preg_match_all($hashtagRegexp,$text,$temp)) { foreach ($temp[1] as $htag) { $pnutHashtags[] = mb_strtolower($htag); } } // Sort them so we can reliably compare them both. sort($pnutHashtags); sort($hashtags); // If we failed, dump to output (should perhaps be stderr). if ($hashtags!=$pnutHashtags) { print "Failed test number $testNumber\n"; print " ADN Post: ".number_format($postId)."\n"; print " Text: ".$text."\n"; if ($text != $textBefore) { print " Orig Text: ".$textBefore."\n"; } print " ADN Hashtags: ".implode(', ',$hashtags)."\n"; print "PNUT Hashtags: ".implode(', ',$pnutHashtags)."\n\n"; $failed++; } else { $passed++; } // Increment, rinse, wipe, repeat. $testNumber++; } // Clean up fclose($csvFile); // Report print "Ran ".number_format($testNumber)." tests.\n"; print "Passed ".number_format($passed)." (".number_format(($passed/$testNumber)*100,2)."%)\n"; print "Failed ".number_format($failed)." (".number_format(($failed/$testNumber)*100,2)."%)\n";