-
-
Save mjseeley/5e182c0c29dde014cfac to your computer and use it in GitHub Desktop.
Revisions
-
mjseeley revised this gist
Mar 27, 2015 . 1 changed file with 104 additions and 79 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,104 +1,129 @@ # Download ALL attachments from GMail # 1. Script needs to be run via console not in an IDE, getpass.getpass() will fail otherwise. # https://docs.python.org/2/library/getpass.html # 2. Make sure you have IMAP enabled in your GMail settings. # https://support.google.com/mail/troubleshooter/1668960?hl=en # 3. If you are using 2 step verification you may need an APP Password. # https://support.google.com/accounts/answer/185833 # 4. Reference information for GMail IMAP extension can be found here. # https://developers.google.com/gmail/imap_extensions import email import hashlib import getpass import imaplib import os from collections import defaultdict, Counter import platform fileNameCounter = Counter() fileNameHashes = defaultdict(set) NewMsgIDs = set() ProcessedMsgIDs = set() def recover(resumeFile): if os.path.exists(resumeFile): print('Recovery file found resuming...') with open(resumeFile) as f: processedIds = f.read() for ProcessedId in processedIds.split(','): ProcessedMsgIDs.add(ProcessedId) else: print('No Recovery file found.') open(resumeFile, 'a').close() def GenerateMailMessages(userName, password, resumeFile): imapSession = imaplib.IMAP4_SSL('imap.gmail.com') typ, accountDetails = imapSession.login(userName, password) print(typ) print(accountDetails) if typ != 'OK': print('Not able to sign in!') raise imapSession.select('[Gmail]/All Mail') typ, data = imapSession.search(None, '(X-GM-RAW "has:attachment")') # typ, data = imapSession.search(None, 'ALL') if typ != 'OK': print('Error searching Inbox.') raise # Iterating over all emails for msgId in data[0].split(): NewMsgIDs.add(msgId) typ, messageParts = imapSession.fetch(msgId, '(RFC822)') if typ != 'OK': print('Error fetching mail.') raise emailBody = messageParts[0][1] if msgId not in ProcessedMsgIDs: yield email.message_from_string(emailBody) ProcessedMsgIDs.add(msgId) with open(resumeFile, "a") as resume: resume.write('{id},'.format(id=msgId)) imapSession.close() imapSession.logout() def SaveAttachmentsFromMailMessage(message, directory): for part in message.walk(): if part.get_content_maintype() == 'multipart': # print(part.as_string()) continue if part.get('Content-Disposition') is None: # print(part.as_string()) continue fileName = part.get_filename() if fileName is not None: fileName = ''.join(fileName.splitlines()) if fileName: # print('Processing: {file}'.format(file=fileName)) payload = part.get_payload(decode=True) if payload: x_hash = hashlib.md5(payload).hexdigest() if x_hash in fileNameHashes[fileName]: print('\tSkipping duplicate file: {file}'.format(file=fileName)) continue fileNameCounter[fileName] += 1 fileStr, fileExtension = os.path.splitext(fileName) if fileNameCounter[fileName] > 1: new_fileName = '{file}({suffix}){ext}'.format(suffix=fileNameCounter[fileName], file=fileStr, ext=fileExtension) print('\tRenaming and storing: {file} to {new_file}'.format(file=fileName, new_file=new_fileName)) else: new_fileName = fileName print('\tStoring: {file}'.format(file=fileName)) fileNameHashes[fileName].add(x_hash) file_path = os.path.join(directory, new_fileName) if os.path.exists(file_path): print('\tExists in destination: {file}'.format(file=new_fileName)) continue try: with open(file_path, 'wb') as fp: fp.write(payload) except: print('Could not store: {file} it has a shitty file name or path under {op_sys}.'.format( file=file_path, op_sys=platform.system())) else: print('Attachment {file} was returned as type: {ftype} skipping...'.format(file=fileName, ftype=type(payload))) continue if __name__ == '__main__': resumeFile = file_path = os.path.join('resume.txt') userName = raw_input('Enter your GMail username: ') password = getpass.getpass('Enter your password: ') recover(resumeFile) if 'attachments' not in os.listdir(os.getcwd()): os.mkdir('attachments') for msg in GenerateMailMessages(userName, password, resumeFile): SaveAttachmentsFromMailMessage(msg, 'attachments') os.remove(file_path) -
mjseeley revised this gist
Dec 22, 2014 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,6 @@ # Something in lines of http://stackoverflow.com/questions/348630/how-can-i-download-all-emails-with-attachments-from-gmail # Make sure you have IMAP enabled in your gmail settings. # Right now it does download same file multiple times if their contents are different. Uses MD5 hash of each file to skip identical files # If you are using 2-step verification you may need an APP Password. # https://support.google.com/accounts/answer/185833 -
mjseeley revised this gist
Dec 22, 2014 . 1 changed file with 28 additions and 10 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -34,6 +34,7 @@ def get_hash(file_to_hash): passwd = getpass.getpass('Enter your password: ') imapSession = imaplib.IMAP4_SSL('imap.gmail.com') imapSession = imaplib.IMAP4_SSL('imap.gmail.com') typ, accountDetails = imapSession.login(userName, passwd) @@ -44,6 +45,7 @@ def get_hash(file_to_hash): raise imapSession.select('[Gmail]/All Mail') typ, data = imapSession.search(None, 'ALL') if typ != 'OK': print 'Error searching Inbox.' @@ -67,20 +69,36 @@ def get_hash(file_to_hash): fileName = part.get_filename() if bool(fileName): filePath = os.path.join(detach_dir, 'attachments', 'temp.attachment') if not os.path.isfile(filePath): # print 'Processing: {file}'.format(file=fileName) fp = open(filePath, 'wb') fp.write(part.get_payload(decode=True)) fp.close() x_hash = get_hash(filePath) if x_hash in fileNameList_dict[fileName]: print '\tSkipping duplicate file: {file}'.format(file=fileName) if os.path.isfile(filePath): os.remove(filePath) pass else: fileNameCount_dict[fileName] += 1 fileStr, fileExtension = os.path.splitext(fileName) if fileNameCount_dict[fileName] > 1: new_fileName = '{file}({suffix}){ext}'.format(suffix=fileNameCount_dict[fileName], file=fileStr, ext=fileExtension) else: new_fileName = fileName fileNameList_dict[fileName].append(x_hash) hash_path = os.path.join(detach_dir, 'attachments', new_fileName) if not os.path.isfile(hash_path): if new_fileName == fileName: print '\tStoring: {file}'.format(file=fileName) else: print('\tRenaming and storing: {file} to {new_file}'.format(file=fileName, new_file=new_fileName)) os.rename(filePath, hash_path) if os.path.isfile(filePath): os.remove(filePath) imapSession.close() imapSession.logout() -
mjseeley revised this gist
Dec 22, 2014 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,9 @@ # Something in lines of http://stackoverflow.com/questions/348630/how-can-i-download-all-emails-with-attachments-from-gmail # Make sure you have IMAP enabled in your gmail settings. # Right now it does download same file multiple times if their contents are different. # If you are using 2-step verification you may need an APP Password. # https://support.google.com/accounts/answer/185833 import email import hashlib import getpass -
mjseeley revised this gist
Dec 22, 2014 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,6 +1,6 @@ # Something in lines of http://stackoverflow.com/questions/348630/how-can-i-download-all-emails-with-attachments-from-gmail # Make sure you have IMAP enabled in your gmail settings. # Right now it does download same file multiple times if their contents are different. import email import hashlib import getpass -
mjseeley revised this gist
Dec 22, 2014 . 1 changed file with 69 additions and 44 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,58 +1,83 @@ # Something in lines of http://stackoverflow.com/questions/348630/how-can-i-download-all-emails-with-attachments-from-gmail # Make sure you have IMAP enabled in your gmail settings. # Right now it won't download same file name twice even if their contents are different. import email import hashlib import getpass import imaplib import os def get_hash(file_to_hash): # return unique hash of file blocksize = 65536 hasher = hashlib.md5() try: with open(file_to_hash, 'rb') as afile: buf = afile.read(blocksize) while len(buf) > 0: hasher.update(buf) buf = afile.read(blocksize) except IOError as err: print err return hasher.hexdigest() detach_dir = '.' if 'attachments' not in os.listdir(detach_dir): os.mkdir('attachments') userName = raw_input('Enter your GMail username: ') passwd = getpass.getpass('Enter your password: ') imapSession = imaplib.IMAP4_SSL('imap.gmail.com') typ, accountDetails = imapSession.login(userName, passwd) print typ print accountDetails if typ != 'OK': print 'Not able to sign in!' raise imapSession.select('[Gmail]/All Mail') typ, data = imapSession.search(None, 'ALL') if typ != 'OK': print 'Error searching Inbox.' raise # Iterating over all emails for msgId in data[0].split(): typ, messageParts = imapSession.fetch(msgId, '(RFC822)') if typ != 'OK': print 'Error fetching mail.' raise emailBody = messageParts[0][1] mail = email.message_from_string(emailBody) for part in mail.walk(): if part.get_content_maintype() == 'multipart': # print part.as_string() continue if part.get('Content-Disposition') is None: # print part.as_string() continue fileName = part.get_filename() if bool(fileName): filePath = os.path.join(detach_dir, 'attachments', 'temp.attachment') if not os.path.isfile(filePath): print 'Processing: {file}'.format(file=fileName) fp = open(filePath, 'wb') fp.write(part.get_payload(decode=True)) fp.close() x_hash = get_hash(filePath) fileStr, fileExtension = os.path.splitext(fileName) new_fileName = '{file}(#{suffix}#){ext}'.format(suffix=x_hash, file=fileStr, ext=fileExtension) hash_path = os.path.join(detach_dir, 'attachments', new_fileName) if not os.path.isfile(hash_path): print('Renaming {file} to {new_file}'.format(file=fileName, new_file=new_fileName)) os.rename(filePath, hash_path) if os.path.isfile(filePath): os.remove(filePath) imapSession.close() imapSession.logout() -
baali revised this gist
May 8, 2012 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,6 @@ # Something in lines of http://stackoverflow.com/questions/348630/how-can-i-download-all-emails-with-attachments-from-gmail # Make sure you have IMAP enabled in your gmail settings. # Right now it won't download same file name twice even if their contents are different. import email import getpass, imaplib -
baali created this gist
May 8, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,57 @@ # Something in lines of http://stackoverflow.com/questions/348630/how-can-i-download-all-emails-with-attachments-from-gmail # Make sure you have IMAP enabled in your gmail settings. import email import getpass, imaplib import os import sys detach_dir = '.' if 'attachments' not in os.listdir(detach_dir): os.mkdir('attachments') userName = raw_input('Enter your GMail username:') passwd = getpass.getpass('Enter your password: ') try: imapSession = imaplib.IMAP4_SSL('imap.gmail.com') typ, accountDetails = imapSession.login(userName, passwd) if typ != 'OK': print 'Not able to sign in!' raise imapSession.select('[Gmail]/All Mail') typ, data = imapSession.search(None, 'ALL') if typ != 'OK': print 'Error searching Inbox.' raise # Iterating over all emails for msgId in data[0].split(): typ, messageParts = imapSession.fetch(msgId, '(RFC822)') if typ != 'OK': print 'Error fetching mail.' raise emailBody = messageParts[0][1] mail = email.message_from_string(emailBody) for part in mail.walk(): if part.get_content_maintype() == 'multipart': # print part.as_string() continue if part.get('Content-Disposition') is None: # print part.as_string() continue fileName = part.get_filename() if bool(fileName): filePath = os.path.join(detach_dir, 'attachments', fileName) if not os.path.isfile(filePath) : print fileName fp = open(filePath, 'wb') fp.write(part.get_payload(decode=True)) fp.close() imapSession.close() imapSession.logout() except : print 'Not able to download all attachments.'