Skip to content

Instantly share code, notes, and snippets.

@nickfnblum
Forked from juster/gen.pl
Created July 18, 2020 06:07
Show Gist options
  • Save nickfnblum/c6b5b7661f013a5128033bb71a8979ac to your computer and use it in GitHub Desktop.
Save nickfnblum/c6b5b7661f013a5128033bb71a8979ac to your computer and use it in GitHub Desktop.

Revisions

  1. @juster juster created this gist Jul 12, 2011.
    44 changes: 44 additions & 0 deletions gen.pl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,44 @@
    #!/usr/bin/perl

    use warnings;
    use strict;

    my $WORDCOUNT = 5;
    my $LANGCOUNT = 50;
    my @FEATS = ('a'..'d');

    # create a random comma separated string of features
    sub genfeats
    {
    my (@feats, %seen);

    my $count = int rand @FEATS;
    for my $i (0 .. $count) {
    my $feat = $FEATS[int rand @FEATS];
    push @feats, $feat unless $seen{$feat}++;
    }

    return join q{,}, sort @feats;
    }

    printf "%s\n", join q{}, map { "\tword$_" } 1 .. $WORDCOUNT;

    my ($repcount, @repme) = (0);
    for my $i (1 .. $LANGCOUNT) {
    my @feats;
    if ($repcount > 0) {
    @feats = @repme;
    --$repcount;
    }
    else {
    @feats = map genfeats, 1 .. $WORDCOUNT;
    }

    printf "%s\n", join qq{\t}, "lang$i", @feats;

    # repeat words/features every once and awhile
    if ((int rand 5) == 0) {
    $repcount = 1 + int rand 3;
    @repme = @feats;
    }
    }
    69 changes: 69 additions & 0 deletions sort.pl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,69 @@
    #!/usr/bin/perl

    use warnings;
    use strict;

    sub group
    {
    my ($langs_ref) = @_;

    my @grouped;
    insertlang(shift @$langs_ref, \@grouped) while @$langs_ref;
    return \@grouped;
    }

    sub insertlang
    {
    my ($lang, $langs_ref) = @_;

    my $i = findaffine($lang, $langs_ref);
    if ($i+1 >= @$langs_ref) { push @$langs_ref, $lang; }
    else { splice @$langs_ref, $i+1, 0, $lang; }

    return;
    }

    sub findaffine
    {
    my ($me, $them) = @_;

    my ($idx, $score) = (0, 0);
    for my $i (0 .. $#$them) {
    my $tmp = affinity($me, $them->[$i]);
    if ($tmp >= $score) { $idx = $i; $score = $tmp; }
    # >= so we append at the end of grouped languages
    }

    return $idx;
    }

    sub affinity
    {
    my ($llang, $rlang) = @_;
    my ($lwords, $rwords) = ($llang->[1], $rlang->[1]);

    my $score = 0;
    for my $i (0 .. $#$rwords) {
    for my $feat (keys %{$rwords->[$i]}) {
    ++$score if exists $lwords->[$i]{$feat};
    }
    }

    return $score;
    }

    my @langs;
    my $words = <>;
    while (my $line = <>) {
    my (undef, @feats) = split /\s+/, $line;
    my $words;
    for my $featlist (@feats) {
    my $wordfeats;
    $wordfeats->{$_} = 1 for split /,/, $featlist;
    push @$words, $wordfeats;
    }
    push @langs, [ $line, $words ];
    }

    my $grouped = group(\@langs);
    print "$words", map { $_->[0] } @$grouped;