Skip to content

Instantly share code, notes, and snippets.

@wchristian
Forked from vti/books.pl
Created July 18, 2012 13:36
Show Gist options
  • Save wchristian/3136256 to your computer and use it in GitHub Desktop.
Save wchristian/3136256 to your computer and use it in GitHub Desktop.

Revisions

  1. Viacheslav Tykhanovskyi created this gist Jul 4, 2012.
    140 changes: 140 additions & 0 deletions books.pl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,140 @@
    #!/usr/bin/env perl

    use strict;
    use warnings;

    use URI;
    use URI::Escape;
    use Digest::SHA qw(hmac_sha256_base64);
    use LWP::UserAgent;
    use XML::LibXML;

    my $ASSOCIATE_TAG = '';
    my $TOKEN = '';
    my $SECRET_KEY = '';

    my $VERSION = '2011-08-01';
    my $NS = "http://webservices.amazon.com/AWSECommerceService/$VERSION";
    my $BASE_URI = URI->new(
    'http://webservices.amazon.com/onca/xml?Service=AWSECommerceService');

    my ($subject) = @ARGV or die "Usage: $0 <subject>";

    for my $year (2002 .. 2012) {
    my $page = 1;
    my $last_page = 10;
    while ($page <= $last_page) {
    my $uri = build_uri(
    $BASE_URI, $SECRET_KEY,
    ResponseGroup => 'Medium',
    Operation => 'ItemSearch',
    SearchIndex => 'Books',
    Sort => 'daterank',
    Power => "subject:$subject and subject:programming "
    . "and keyword:$subject and pubdate: $year "
    . "and language: English and not fiction",
    ItemPage => $page
    );

    my $result = fetch_uri($uri);

    my $dom = XML::LibXML->load_xml(string => $result);

    my $xc = XML::LibXML::XPathContext->new($dom->documentElement());
    $xc->registerNs('ns', $NS);

    my $books = [];

    my @nodes = $xc->findnodes('//ns:TotalPages');
    my $total_pages = $nodes[0]->textContent;
    if ($total_pages > 10) {
    die "TotalPage = $total_pages";
    }
    else {
    warn "$year: $page/$total_pages";
    $last_page = $total_pages;
    }

    @nodes = $xc->findnodes('//ns:ItemAttributes');
    foreach my $node (@nodes) {
    my @children = $node->childNodes;

    my $book = {};
    foreach my $child (@children) {
    $book->{$child->nodeName} = $child->textContent || '';
    }

    push @$books, $book;
    }

    foreach my $book (@$books) {
    next unless $book->{Title} =~ m/$subject/i;
    next unless $book->{Binding} =~ m/(?:paperback|hardcover)/i;

    next if $book->{Title} =~ m/catalyst|rails|sinatra|django/i;

    print join ';', $book->{PublicationDate}, $book->{Title},
    ($book->{Edition} || ''), "\n";
    }

    $page++;
    sleep 1;
    }
    }

    sub fetch_uri {
    my ($uri) = @_;

    my $ua = LWP::UserAgent->new();
    my $response = $ua->get($uri);

    if ($response->is_success) {
    return $response->decoded_content;
    }
    else {
    die $response->status_line;
    }
    }

    sub build_uri {
    my ($uri, $secret, %params) = @_;

    $uri->query_form(
    'Service' => 'AWSECommerceService',
    'AWSAccessKeyId' => $TOKEN,
    'Version' => $VERSION,
    'AssociateTag' => $ASSOCIATE_TAG,
    map { $_, $params{$_} } sort keys %params,
    );

    return sign_request($uri, $secret);
    }

    # Taken from Net::Amazon
    sub sign_request {
    my ($uri, $secret) = @_;

    # This assumes no duplicated keys. Safe assumption?
    my %query = $uri->query_form;
    my @now = gmtime;
    $query{Timestamp} ||= sprintf(
    '%04d-%02d-%02dT%02d:%02d:%02dZ',
    $now[5] + 1900,
    $now[4] + 1,
    @now[3, 2, 1, 0]
    );
    my $qstring = join '&',
    map { "$_=" . uri_escape($query{$_}, "^A-Za-z0-9\-_.~") }
    sort keys %query;

    # Use chr(10), not "\n" which varies by platform
    my $signme = join chr(10), "GET", $uri->host, $uri->path, $qstring;
    my $sig = hmac_sha256_base64($signme, $SECRET_KEY);

    # Digest does not properly pad b64 strings
    $sig .= '=' while length($sig) % 4;
    $sig = uri_escape($sig, "^A-Za-z0-9\-_.~");
    $qstring .= "&Signature=$sig";
    $uri->query($qstring);
    return $uri;
    }