-
-
Save nickfnblum/c6b5b7661f013a5128033bb71a8979ac to your computer and use it in GitHub Desktop.
Revisions
-
juster created this gist
Jul 12, 2011 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,44 @@ #!/usr/bin/perl use warnings; use strict; my $WORDCOUNT = 5; my $LANGCOUNT = 50; my @FEATS = ('a'..'d'); # create a random comma separated string of features sub genfeats { my (@feats, %seen); my $count = int rand @FEATS; for my $i (0 .. $count) { my $feat = $FEATS[int rand @FEATS]; push @feats, $feat unless $seen{$feat}++; } return join q{,}, sort @feats; } printf "%s\n", join q{}, map { "\tword$_" } 1 .. $WORDCOUNT; my ($repcount, @repme) = (0); for my $i (1 .. $LANGCOUNT) { my @feats; if ($repcount > 0) { @feats = @repme; --$repcount; } else { @feats = map genfeats, 1 .. $WORDCOUNT; } printf "%s\n", join qq{\t}, "lang$i", @feats; # repeat words/features every once and awhile if ((int rand 5) == 0) { $repcount = 1 + int rand 3; @repme = @feats; } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,69 @@ #!/usr/bin/perl use warnings; use strict; sub group { my ($langs_ref) = @_; my @grouped; insertlang(shift @$langs_ref, \@grouped) while @$langs_ref; return \@grouped; } sub insertlang { my ($lang, $langs_ref) = @_; my $i = findaffine($lang, $langs_ref); if ($i+1 >= @$langs_ref) { push @$langs_ref, $lang; } else { splice @$langs_ref, $i+1, 0, $lang; } return; } sub findaffine { my ($me, $them) = @_; my ($idx, $score) = (0, 0); for my $i (0 .. $#$them) { my $tmp = affinity($me, $them->[$i]); if ($tmp >= $score) { $idx = $i; $score = $tmp; } # >= so we append at the end of grouped languages } return $idx; } sub affinity { my ($llang, $rlang) = @_; my ($lwords, $rwords) = ($llang->[1], $rlang->[1]); my $score = 0; for my $i (0 .. $#$rwords) { for my $feat (keys %{$rwords->[$i]}) { ++$score if exists $lwords->[$i]{$feat}; } } return $score; } my @langs; my $words = <>; while (my $line = <>) { my (undef, @feats) = split /\s+/, $line; my $words; for my $featlist (@feats) { my $wordfeats; $wordfeats->{$_} = 1 for split /,/, $featlist; push @$words, $wordfeats; } push @langs, [ $line, $words ]; } my $grouped = group(\@langs); print "$words", map { $_->[0] } @$grouped;