Last active
November 16, 2023 15:55
-
-
Save exogen/ea5f3afbb8db888b6690546ab06d24e4 to your computer and use it in GitHub Desktop.
Revisions
-
exogen revised this gist
Mar 3, 2017 . 1 changed file with 14 additions and 10 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,26 +2,27 @@ # # Usage: python3 uber.py ~/Downloads/Takeout/Mail/Uber.mbox # # Dependencies: Python 3.4+ # # How to get the .mbox export: # # In Gmail, create a filter that applies the label "Uber" to emails matching: # # from:(uber.com) subject:("uber ride receipt" OR "trip with uber" OR "ride with uber") # # Apply the filter to your past emails. Then, go to: # # https://takeout.google.com/settings/takeout # # Export your Mail data, selecting just the label "Uber". # Google will send you an email containing an archive. # Point this script at its contents. # import sys import re import mailbox import email.utils from datetime import datetime from collections import defaultdict def parse_amount(body): @@ -53,13 +54,14 @@ def extract_trips(messages): if 'you have earned $10 in Uber credit' in body: continue date = email.utils.parsedate_to_datetime(msg['Date']) amount = parse_amount(body) if amount is None: skipped += 1 else: amount = float(amount[1:]) yield (date, amount) if skipped: print("Skipped {} message(s) where I couldn't parse a trip amount.".format(skipped)) @@ -75,14 +77,16 @@ def summarize_trips(trips): for date, amount in trips: total_trips += 1 total_amount += amount year = date.astimezone().year trips_by_year[year] += 1 amount_by_year[year] += amount print("You've spent a total of ${:.2f} on {} trip(s) with Uber.\n" .format(total_amount, total_trips)) start_year = min(trips_by_year) end_year = max(max(trips_by_year), email.utils.localtime().year) for year in range(start_year, end_year + 1): count = trips_by_year[year] amount = amount_by_year[year] print("{:7} ${:8.2f} {:3} trip(s)".format(year, amount, count)) -
exogen revised this gist
Mar 2, 2017 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -58,8 +58,8 @@ def extract_trips(messages): if amount is None: skipped += 1 else: yield (date, float(amount[1:])) if skipped: print("Skipped {} message(s) where I couldn't parse a trip amount.".format(skipped)) -
exogen revised this gist
Mar 2, 2017 . 1 changed file with 3 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -58,15 +58,14 @@ def extract_trips(messages): if amount is None: skipped += 1 yield (date, float(amount[1:])) if skipped: print("Skipped {} message(s) where I couldn't parse a trip amount.".format(skipped)) # If you want to debug this: # print(body) # ...and add a new regex to `parse_amount` above. def summarize_trips(trips): trips_by_year = defaultdict(int) -
exogen revised this gist
Mar 2, 2017 . 1 changed file with 5 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -44,6 +44,7 @@ def parse_amount(body): return amount def extract_trips(messages): skipped = 0 for msg in messages: if 'uber' not in msg['From'] or ':' in msg['Subject']: continue @@ -56,13 +57,17 @@ def extract_trips(messages): amount = parse_amount(body) if amount is None: skipped += 1 raise RuntimeError("Trip amount not found in message body.") # If you get this error and want to fix it, run: # print(body) # and add a new regular expression to `parse_amount` above. yield (date, float(amount[1:])) if skipped: print("Skipped {} message(s) where I couldn't parse a trip amount.".format(skipped)) def summarize_trips(trips): trips_by_year = defaultdict(int) amount_by_year = defaultdict(float) -
exogen revised this gist
Mar 2, 2017 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -40,7 +40,7 @@ def parse_amount(body): for attrs, amount in re.findall(r'<td\s([^>]+)>\s*([$]\d+(?:[.]\d+)?)\s*<', body): # If you're debugging a newer table-based email format, it may help to uncomment: # print(attrs, amount) if 'final-charge' in attrs or 'totalPrice' in attrs: return amount def extract_trips(messages): -
exogen revised this gist
Mar 2, 2017 . 1 changed file with 2 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -38,6 +38,8 @@ def parse_amount(body): for amount in re.findall(r'^\s*Total: +([$]\d+(?:[.]\d+)?)', body): return amount for attrs, amount in re.findall(r'<td\s([^>]+)>\s*([$]\d+(?:[.]\d+)?)\s*<', body): # If you're debugging a newer table-based email format, it may help to uncomment: # print(attrs, amount) if 'final-charge' in attrs: return amount -
exogen revised this gist
Mar 2, 2017 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -55,6 +55,9 @@ def extract_trips(messages): if amount is None: raise RuntimeError("Trip amount not found in message body.") # If you get this error and want to fix it, run: # print(body) # and add a new regular expression to `parse_amount` above. yield (date, float(amount[1:])) -
exogen revised this gist
Mar 2, 2017 . 1 changed file with 14 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -4,6 +4,20 @@ # # Install dependencies: pip3 install python-dateutil # # How to get the .mbox export: # # In Gmail, create a filter that applies the label "Uber" to emails matching: # # from:(uber.com) subject:("uber ride receipt" OR "trip with uber" OR "ride with uber") # # (and apply it to your past emails). Then, go to: # # https://takeout.google.com/settings/takeout # # And export your Mail data, selecting just the label "Uber". # Google will send you an email containing an archive. # Point this script at its contents. # import sys import re import mailbox -
exogen created this gist
Mar 2, 2017 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,70 @@ #!/usr/bin/env python3 # # Usage: python3 uber.py ~/Downloads/Takeout/Mail/Uber.mbox # # Install dependencies: pip3 install python-dateutil # import sys import re import mailbox import dateutil.parser from collections import defaultdict def parse_amount(body): # Uber has changed their email format over the years. # This should find the amount charged for emails from 2011-2016 at least. for amount in re.findall(r'([$]\d+(?:[.]\d+)?) has been charged', body): return amount for amount in re.findall(r'(?:Billed to Card:?|Amount Charged:) +\(([^)]+)\)', body): return amount for amount in re.findall(r'Billed [Tt]o Card: ([$]\d+(?:[.]\d+)?)', body): return amount for amount in re.findall(r'Total Fare: +([$]\d+(?:[.]\d+)?)', body): return amount for amount in re.findall(r'^\s*Total: +([$]\d+(?:[.]\d+)?)', body): return amount for attrs, amount in re.findall(r'<td\s([^>]+)>\s*([$]\d+(?:[.]\d+)?)\s*<', body): if 'final-charge' in attrs: return amount def extract_trips(messages): for msg in messages: if 'uber' not in msg['From'] or ':' in msg['Subject']: continue body = msg.get_payload()[0].get_payload(decode=True).decode() if 'you have earned $10 in Uber credit' in body: continue date = dateutil.parser.parse(msg['Date']) amount = parse_amount(body) if amount is None: raise RuntimeError("Trip amount not found in message body.") yield (date, float(amount[1:])) def summarize_trips(trips): trips_by_year = defaultdict(int) amount_by_year = defaultdict(float) total_trips = 0 total_amount = 0 for date, amount in trips: total_trips += 1 total_amount += amount year = dateutil.parser.parse(str(date)).astimezone().year trips_by_year[year] += 1 amount_by_year[year] += amount print("You've spent a total of ${:.2f} on {} trip(s) with Uber.\n" .format(total_amount, total_trips)) for year in sorted(trips_by_year): count = trips_by_year[year] amount = amount_by_year[year] print("{:7} ${:8.2f} {:3} trip(s)".format(year, amount, count)) if __name__ == '__main__': mbox = mailbox.mbox(sys.argv[1], create=False) trips = list(extract_trips(mbox)) summarize_trips(trips)