#!/usr/bin/env bash # # Retrieves MX and A records for 'Alexa Top 1 Million' hosts # and prints them as pretty formatted JSON objects to stdout. # # *Optional* parallelism support with GNU Parallel (recommended): # $ sudo apt-get install parallel # # Authors: Aaron Zauner # License: CC0 1.0 (https://creativecommons.org/publicdomain/zero/1.0) # set -eo pipefail readonly top1m_s3l="https://s3.amazonaws.com/alexa-static/top-1m.csv.zip" readonly top1m_zip=${top1m_s3l##*/} readonly top1m_csv=${top1m_zip%.*} function hosts() { for line in $(<${top1m_csv}); do local host=${line/*,/} printf "%s " ${host} done } function get_mx() { # build JSON object from input hostname mapping # MX records to the corresponding A records of # a given host. including messy in-line formatting. local mx_records=($(dig +short +nosearch +keepopen \ +time=2 mx "${1}" | sed 's/.*\ //')) printf '{\n\t"%s": {\n\t\t"mx_records": [\n' "${1}" for mx in "${mx_records[@]}"; do local ip=($(getent ahostsv4 ${mx})) # in our case, v4 suffices. printf '\t\t\t"%s": \t"%s",\n' "${mx}" "${ip}" done printf "\t\t]\n\t}\n}\n" } # main [[ -e ${top1m_csv} ]] || { wget "${top1m_s3l}" &> /dev/null unzip "${top1m_zip}" &> /dev/null } [[ ${1} == "get_mx" ]] && { get_mx "${2}" && exit 0 } if [[ $(which parallel) ]]; then printf "<< parallel mode >>\n\n" >&2 parallel --progress --colsep ',' "${0} get_mx {2}" :::: ${top1m_csv} else printf "<< sequential mode (slow! install \`parallel\`.) >>\n\n" >&2 for host in $(hosts); do get_mx "${host}" done fi trap '{ rm ${top1m_zip} ${top1m_csv}; \ printf "\n\n<< finished run. >> \ $(date --rfc-3339=ns) \n" >&2 }' EXIT