Skip to content

Instantly share code, notes, and snippets.

@clintongormley
Created April 23, 2014 10:48
Show Gist options
  • Select an option

  • Save clintongormley/11210513 to your computer and use it in GitHub Desktop.

Select an option

Save clintongormley/11210513 to your computer and use it in GitHub Desktop.

Revisions

  1. clintongormley created this gist Apr 23, 2014.
    77 changes: 77 additions & 0 deletions emoticons.pl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,77 @@
    #!/usr/bin/env perl

    use strict;
    use warnings;
    use utf8;

    my @order;
    my %char_filters;

    use Regexp::Trie;
    use JSON::XS;
    use Data::Dump qw(pp);

    while (<DATA>) {
    chomp;
    next unless $_;
    my ($name,@emoticons) = split /\s+/;
    my $r = Regexp::Trie->new;
    $r->add($_) for @emoticons;

    my $regex = $r->regexp.'';
    $DB::single=1;
    $regex=~s/^\(\?\^u://;
    $regex=~s/\)$//;
    $char_filters{$name} = {
    type => 'pattern_replace',
    pattern => $regex,
    replacement => " $name "
    };
    push @order, $name;
    }

    my %analysis = (
    analyzer => {
    emoticons => {
    tokenizer => 'standard',
    filter =>['lowercase'],
    char_filter => [@order]
    }
    },
    char_filter=> {
    map {($_ => $char_filters{$_})} @order
    }
    );

    my $json= JSON::XS->new->pretty->encode(\%analysis);
    utf8::encode($json);
    print "$json\n";




    __DATA__
    e_smiley :-) :) :o) :] :3 :c) :> =] 8) =) :} :^) :っ) :-)) :-))) :-)))) :)) :))) :))))
    e_laugh :-D :D 8-D 8D x-D xD X-D XD =-D =D =-3 =3 B^D
    e_sad >:[ :-( :( :-c :c :-< :っC :< :-[ :[ :{
    e_wink_frown ;(
    e_angry :-|| :@ >:(
    e_cry :'-( :'(
    e_happy_tears :'-) :')
    e_disgust D:< D: D8 D; D= DX v.v D-':
    e_surprise >:O :-O :O :-o :o 8-0 O_O o-o O_o o_O o_o O-O
    e_kiss :* :^* '}{'
    e_wink ;-) ;) *-) *) ;-] ;] ;D ;^) :-,
    e_tongue >:P :-P :P X-P x-p xp XP :-p :p =p :-Þ :Þ :þ :-þ :-b :b d:
    e_skeptical >:\ >:/ :-/ :-. :/ :\ =/ =\ :L =L :S >.<
    e_neutral :| :-|
    e_embarrassed :$ :S
    e_silent :-X :X :-# :#
    e_halo O:-) 0:-3 0:3 0:-) 0:) 0;^)
    e_evil >:) >;) >:-)
    e_devil }:-) }:) 3:-) 3:)
    e_highfive o/\o ^5 >_>^ ^<_<
    e_disapprove ಠ_ಠ
    e_cheer \o/
    e_heart <3
    e_broken_heart </3