#!/usr/bin/env perl use strict; use warnings; use URI; use URI::Escape; use Digest::SHA qw(hmac_sha256_base64); use LWP::UserAgent; use XML::LibXML; my $ASSOCIATE_TAG = ''; my $TOKEN = ''; my $SECRET_KEY = ''; my $VERSION = '2011-08-01'; my $NS = "http://webservices.amazon.com/AWSECommerceService/$VERSION"; my $BASE_URI = URI->new( 'http://webservices.amazon.com/onca/xml?Service=AWSECommerceService'); my ($subject) = @ARGV or die "Usage: $0 "; for my $year (2002 .. 2012) { my $page = 1; my $last_page = 10; while ($page <= $last_page) { my $uri = build_uri( $BASE_URI, $SECRET_KEY, ResponseGroup => 'Medium', Operation => 'ItemSearch', SearchIndex => 'Books', Sort => 'daterank', Power => "subject:$subject and subject:programming " . "and keyword:$subject and pubdate: $year " . "and language: English and not fiction", ItemPage => $page ); my $result = fetch_uri($uri); my $dom = XML::LibXML->load_xml(string => $result); my $xc = XML::LibXML::XPathContext->new($dom->documentElement()); $xc->registerNs('ns', $NS); my $books = []; my @nodes = $xc->findnodes('//ns:TotalPages'); my $total_pages = $nodes[0]->textContent; if ($total_pages > 10) { die "TotalPage = $total_pages"; } else { warn "$year: $page/$total_pages"; $last_page = $total_pages; } @nodes = $xc->findnodes('//ns:ItemAttributes'); foreach my $node (@nodes) { my @children = $node->childNodes; my $book = {}; foreach my $child (@children) { $book->{$child->nodeName} = $child->textContent || ''; } push @$books, $book; } foreach my $book (@$books) { next unless $book->{Title} =~ m/$subject/i; next unless $book->{Binding} =~ m/(?:paperback|hardcover)/i; next if $book->{Title} =~ m/catalyst|rails|sinatra|django/i; print join ';', $book->{PublicationDate}, $book->{Title}, ($book->{Edition} || ''), "\n"; } $page++; sleep 1; } } sub fetch_uri { my ($uri) = @_; my $ua = LWP::UserAgent->new(); my $response = $ua->get($uri); if ($response->is_success) { return $response->decoded_content; } else { die $response->status_line; } } sub build_uri { my ($uri, $secret, %params) = @_; $uri->query_form( 'Service' => 'AWSECommerceService', 'AWSAccessKeyId' => $TOKEN, 'Version' => $VERSION, 'AssociateTag' => $ASSOCIATE_TAG, map { $_, $params{$_} } sort keys %params, ); return sign_request($uri, $secret); } # Taken from Net::Amazon sub sign_request { my ($uri, $secret) = @_; # This assumes no duplicated keys. Safe assumption? my %query = $uri->query_form; my @now = gmtime; $query{Timestamp} ||= sprintf( '%04d-%02d-%02dT%02d:%02d:%02dZ', $now[5] + 1900, $now[4] + 1, @now[3, 2, 1, 0] ); my $qstring = join '&', map { "$_=" . uri_escape($query{$_}, "^A-Za-z0-9\-_.~") } sort keys %query; # Use chr(10), not "\n" which varies by platform my $signme = join chr(10), "GET", $uri->host, $uri->path, $qstring; my $sig = hmac_sha256_base64($signme, $SECRET_KEY); # Digest does not properly pad b64 strings $sig .= '=' while length($sig) % 4; $sig = uri_escape($sig, "^A-Za-z0-9\-_.~"); $qstring .= "&Signature=$sig"; $uri->query($qstring); return $uri; }