Skip to content

Instantly share code, notes, and snippets.

@ftrain
Created April 27, 2015 02:54
Show Gist options
  • Save ftrain/dd84a97fd4063c9a3bf7 to your computer and use it in GitHub Desktop.
Save ftrain/dd84a97fd4063c9a3bf7 to your computer and use it in GitHub Desktop.

Revisions

  1. ftrain created this gist Apr 27, 2015.
    79 changes: 79 additions & 0 deletions bing-gobble.pl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,79 @@
    #!/usr/bin/perl

    # Copyright (c) 2015, Paul Ford, [email protected]
    # All rights reserved.
    #
    # Redistribution and use in source and binary forms, with or without
    # modification, are permitted provided that the following conditions
    # are met:
    #
    # 1. Redistributions of source code must retain the above copyright
    # notice, this list of conditions and the following disclaimer.
    #
    # 2. Redistributions in binary form must reproduce the above
    # copyright notice, this list of conditions and the following
    # disclaimer in the documentation and/or other materials provided
    # with the distribution.
    #
    # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
    # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
    # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
    # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
    # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
    # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
    # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
    # TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
    # ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
    # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
    # THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    # SUCH DAMAGE.

    # ----------------------------------------------------------------------
    # HELLO!

    # I take a list of sites and a list of words and search Bing for
    # those words on those sites, and then I put the resulting number of
    # results into a line in a tab-separated value, and what a good boy
    # am I.
    #
    # I require curl and nothing else.
    #
    # I don't know if Bing likes me or not.
    #

    my @sites = ("ask.metafilter.com", "bbc.co.uk/news",
    "businessweek.com", "bustle.com", "buzzfeed.com", "cbsnews.com",
    "chronicle.com", "cnn.com", "deadspin.com", "drudgereport.com",
    "ebony.com", "elle.com", "en.wikipedia.org", "esquire.com",
    "facebook.com", "fivethirtyeight.com", "forbes.com",
    "gawker.com", "huffingtonpost.com", "io9.com", "jezebel.com",
    "kotaku.com", "latimes.com", "medium.com", "www.nbcnews.com",
    "news.yahoo.com", "news.ycombinator.com", "newyorker.com",
    "nydailynews.com", "nypost.com", "nytimes.com", "quora.com",
    "reddit.com", "returnofkings.com", "reuters.com",
    "sbnation.com", "slashdot.org", "somethingawful.com",
    "splitsider.com", "theatlantic.com", "theawl.com",
    "thehairpin.com", "thehindu.com", "thenewrepublic.com",
    "theroot.com", "theverge.com", "time.com", "tumblr.com",
    "twitter.com", "vogue.com", "vox.com", "washingtonpost.com",
    "weather.com", "wired.com", "metafilter.com");

    my @words = ("cat", "dog", "democrat", "republican", "conservative",
    "liberal", "pro-life", "pro-choice", "penis", "vagina",
    "fellatio", "cunnilingus", "women", "men", "woman",
    "man");

    open (F, ">site_query_results.tsv");
    for $site (@sites) {
    print F $site;
    for $word (@words) {
    my $url = "http://www.bing.com/search?q=site%3A$site%20$word";
    my $result = `curl $url 2>&1`;
    $result =~ /<span class="sb_count">([0-9,]+) results/;
    print F "\t$word\t$1";
    print "$site $word $1\n";
    }
    print F "\n";
    }
    close F;