#!/usr/bin/python # This program is free software. It comes without any warranty, to # the extent permitted by applicable law. You can redistribute it # and/or modify it under the terms of the Do What The Fuck You Want # To Public License, Version 2, a copy of which is provided in the # file LICENSE.txt. # Enclose the line below in a loop to have it scrape over multiple pages of a site. # This line currently scrapes one page to pull out emails. import re import sys import urllib url = urllib.urlopen(sys.argv[1]) response = url.read() regex = re.compile(r'[\w\-][\w\-\.]+@[\w\-][\w\-\.]+[a-zA-Z]{1,4}') emails = regex.findall(response) with open('emails.csv', 'w+') as email_file: email_file.write('\n'.join(set(emails)))