Skip to content

Instantly share code, notes, and snippets.

@exogen
Last active November 16, 2023 15:55
Show Gist options
  • Select an option

  • Save exogen/ea5f3afbb8db888b6690546ab06d24e4 to your computer and use it in GitHub Desktop.

Select an option

Save exogen/ea5f3afbb8db888b6690546ab06d24e4 to your computer and use it in GitHub Desktop.

Revisions

  1. exogen revised this gist Mar 3, 2017. 1 changed file with 14 additions and 10 deletions.
    24 changes: 14 additions & 10 deletions uber.py
    Original file line number Diff line number Diff line change
    @@ -2,26 +2,27 @@
    #
    # Usage: python3 uber.py ~/Downloads/Takeout/Mail/Uber.mbox
    #
    # Install dependencies: pip3 install python-dateutil
    # Dependencies: Python 3.4+
    #
    # How to get the .mbox export:
    #
    # In Gmail, create a filter that applies the label "Uber" to emails matching:
    #
    #
    # from:(uber.com) subject:("uber ride receipt" OR "trip with uber" OR "ride with uber")
    #
    # (and apply it to your past emails). Then, go to:
    # Apply the filter to your past emails. Then, go to:
    #
    # https://takeout.google.com/settings/takeout
    #
    # And export your Mail data, selecting just the label "Uber".
    # Export your Mail data, selecting just the label "Uber".
    # Google will send you an email containing an archive.
    # Point this script at its contents.
    #
    #
    import sys
    import re
    import mailbox
    import dateutil.parser
    import email.utils
    from datetime import datetime
    from collections import defaultdict

    def parse_amount(body):
    @@ -53,13 +54,14 @@ def extract_trips(messages):
    if 'you have earned $10 in Uber credit' in body:
    continue

    date = dateutil.parser.parse(msg['Date'])
    date = email.utils.parsedate_to_datetime(msg['Date'])
    amount = parse_amount(body)

    if amount is None:
    skipped += 1
    else:
    yield (date, float(amount[1:]))
    amount = float(amount[1:])
    yield (date, amount)

    if skipped:
    print("Skipped {} message(s) where I couldn't parse a trip amount.".format(skipped))
    @@ -75,14 +77,16 @@ def summarize_trips(trips):
    for date, amount in trips:
    total_trips += 1
    total_amount += amount
    year = dateutil.parser.parse(str(date)).astimezone().year
    year = date.astimezone().year
    trips_by_year[year] += 1
    amount_by_year[year] += amount

    print("You've spent a total of ${:.2f} on {} trip(s) with Uber.\n"
    .format(total_amount, total_trips))

    for year in sorted(trips_by_year):
    start_year = min(trips_by_year)
    end_year = max(max(trips_by_year), email.utils.localtime().year)
    for year in range(start_year, end_year + 1):
    count = trips_by_year[year]
    amount = amount_by_year[year]
    print("{:7} ${:8.2f} {:3} trip(s)".format(year, amount, count))
  2. exogen revised this gist Mar 2, 2017. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions uber.py
    Original file line number Diff line number Diff line change
    @@ -58,8 +58,8 @@ def extract_trips(messages):

    if amount is None:
    skipped += 1

    yield (date, float(amount[1:]))
    else:
    yield (date, float(amount[1:]))

    if skipped:
    print("Skipped {} message(s) where I couldn't parse a trip amount.".format(skipped))
  3. exogen revised this gist Mar 2, 2017. 1 changed file with 3 additions and 4 deletions.
    7 changes: 3 additions & 4 deletions uber.py
    Original file line number Diff line number Diff line change
    @@ -58,15 +58,14 @@ def extract_trips(messages):

    if amount is None:
    skipped += 1
    raise RuntimeError("Trip amount not found in message body.")
    # If you get this error and want to fix it, run:
    # print(body)
    # and add a new regular expression to `parse_amount` above.

    yield (date, float(amount[1:]))

    if skipped:
    print("Skipped {} message(s) where I couldn't parse a trip amount.".format(skipped))
    # If you want to debug this:
    # print(body)
    # ...and add a new regex to `parse_amount` above.

    def summarize_trips(trips):
    trips_by_year = defaultdict(int)
  4. exogen revised this gist Mar 2, 2017. 1 changed file with 5 additions and 0 deletions.
    5 changes: 5 additions & 0 deletions uber.py
    Original file line number Diff line number Diff line change
    @@ -44,6 +44,7 @@ def parse_amount(body):
    return amount

    def extract_trips(messages):
    skipped = 0
    for msg in messages:
    if 'uber' not in msg['From'] or ':' in msg['Subject']:
    continue
    @@ -56,13 +57,17 @@ def extract_trips(messages):
    amount = parse_amount(body)

    if amount is None:
    skipped += 1
    raise RuntimeError("Trip amount not found in message body.")
    # If you get this error and want to fix it, run:
    # print(body)
    # and add a new regular expression to `parse_amount` above.

    yield (date, float(amount[1:]))

    if skipped:
    print("Skipped {} message(s) where I couldn't parse a trip amount.".format(skipped))

    def summarize_trips(trips):
    trips_by_year = defaultdict(int)
    amount_by_year = defaultdict(float)
  5. exogen revised this gist Mar 2, 2017. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion uber.py
    Original file line number Diff line number Diff line change
    @@ -40,7 +40,7 @@ def parse_amount(body):
    for attrs, amount in re.findall(r'<td\s([^>]+)>\s*([$]\d+(?:[.]\d+)?)\s*<', body):
    # If you're debugging a newer table-based email format, it may help to uncomment:
    # print(attrs, amount)
    if 'final-charge' in attrs:
    if 'final-charge' in attrs or 'totalPrice' in attrs:
    return amount

    def extract_trips(messages):
  6. exogen revised this gist Mar 2, 2017. 1 changed file with 2 additions and 0 deletions.
    2 changes: 2 additions & 0 deletions uber.py
    Original file line number Diff line number Diff line change
    @@ -38,6 +38,8 @@ def parse_amount(body):
    for amount in re.findall(r'^\s*Total: +([$]\d+(?:[.]\d+)?)', body):
    return amount
    for attrs, amount in re.findall(r'<td\s([^>]+)>\s*([$]\d+(?:[.]\d+)?)\s*<', body):
    # If you're debugging a newer table-based email format, it may help to uncomment:
    # print(attrs, amount)
    if 'final-charge' in attrs:
    return amount

  7. exogen revised this gist Mar 2, 2017. 1 changed file with 3 additions and 0 deletions.
    3 changes: 3 additions & 0 deletions uber.py
    Original file line number Diff line number Diff line change
    @@ -55,6 +55,9 @@ def extract_trips(messages):

    if amount is None:
    raise RuntimeError("Trip amount not found in message body.")
    # If you get this error and want to fix it, run:
    # print(body)
    # and add a new regular expression to `parse_amount` above.

    yield (date, float(amount[1:]))

  8. exogen revised this gist Mar 2, 2017. 1 changed file with 14 additions and 0 deletions.
    14 changes: 14 additions & 0 deletions uber.py
    Original file line number Diff line number Diff line change
    @@ -4,6 +4,20 @@
    #
    # Install dependencies: pip3 install python-dateutil
    #
    # How to get the .mbox export:
    #
    # In Gmail, create a filter that applies the label "Uber" to emails matching:
    #
    # from:(uber.com) subject:("uber ride receipt" OR "trip with uber" OR "ride with uber")
    #
    # (and apply it to your past emails). Then, go to:
    #
    # https://takeout.google.com/settings/takeout
    #
    # And export your Mail data, selecting just the label "Uber".
    # Google will send you an email containing an archive.
    # Point this script at its contents.
    #
    import sys
    import re
    import mailbox
  9. exogen created this gist Mar 2, 2017.
    70 changes: 70 additions & 0 deletions uber.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,70 @@
    #!/usr/bin/env python3
    #
    # Usage: python3 uber.py ~/Downloads/Takeout/Mail/Uber.mbox
    #
    # Install dependencies: pip3 install python-dateutil
    #
    import sys
    import re
    import mailbox
    import dateutil.parser
    from collections import defaultdict

    def parse_amount(body):
    # Uber has changed their email format over the years.
    # This should find the amount charged for emails from 2011-2016 at least.
    for amount in re.findall(r'([$]\d+(?:[.]\d+)?) has been charged', body):
    return amount
    for amount in re.findall(r'(?:Billed to Card:?|Amount Charged:) +\(([^)]+)\)', body):
    return amount
    for amount in re.findall(r'Billed [Tt]o Card: ([$]\d+(?:[.]\d+)?)', body):
    return amount
    for amount in re.findall(r'Total Fare: +([$]\d+(?:[.]\d+)?)', body):
    return amount
    for amount in re.findall(r'^\s*Total: +([$]\d+(?:[.]\d+)?)', body):
    return amount
    for attrs, amount in re.findall(r'<td\s([^>]+)>\s*([$]\d+(?:[.]\d+)?)\s*<', body):
    if 'final-charge' in attrs:
    return amount

    def extract_trips(messages):
    for msg in messages:
    if 'uber' not in msg['From'] or ':' in msg['Subject']:
    continue

    body = msg.get_payload()[0].get_payload(decode=True).decode()
    if 'you have earned $10 in Uber credit' in body:
    continue

    date = dateutil.parser.parse(msg['Date'])
    amount = parse_amount(body)

    if amount is None:
    raise RuntimeError("Trip amount not found in message body.")

    yield (date, float(amount[1:]))

    def summarize_trips(trips):
    trips_by_year = defaultdict(int)
    amount_by_year = defaultdict(float)
    total_trips = 0
    total_amount = 0
    for date, amount in trips:
    total_trips += 1
    total_amount += amount
    year = dateutil.parser.parse(str(date)).astimezone().year
    trips_by_year[year] += 1
    amount_by_year[year] += amount

    print("You've spent a total of ${:.2f} on {} trip(s) with Uber.\n"
    .format(total_amount, total_trips))

    for year in sorted(trips_by_year):
    count = trips_by_year[year]
    amount = amount_by_year[year]
    print("{:7} ${:8.2f} {:3} trip(s)".format(year, amount, count))

    if __name__ == '__main__':
    mbox = mailbox.mbox(sys.argv[1], create=False)
    trips = list(extract_trips(mbox))
    summarize_trips(trips)