exogen · November 16, 2023 15:55 · Mar 3, 2017 · Mar 2, 2017 · Mar 2, 2017 · Mar 2, 2017
diff --git a/uber.py b/uber.py
@@ -2,26 +2,27 @@
 #
 # Usage: python3 uber.py ~/Downloads/Takeout/Mail/Uber.mbox
 #
-# Install dependencies: pip3 install python-dateutil
+# Dependencies: Python 3.4+
 #
 # How to get the .mbox export:
 #
 # In Gmail, create a filter that applies the label "Uber" to emails matching:
-# 
+#
 #   from:(uber.com) subject:("uber ride receipt" OR "trip with uber" OR "ride with uber")
 #
-# (and apply it to your past emails). Then, go to:
+# Apply the filter to your past emails. Then, go to:
 #
 #   https://takeout.google.com/settings/takeout
 #
-# And export your Mail data, selecting just the label "Uber".
+# Export your Mail data, selecting just the label "Uber".
 # Google will send you an email containing an archive.
 # Point this script at its contents.
-# 
+#
 import sys
 import re
 import mailbox
-import dateutil.parser
+import email.utils
+from datetime import datetime
 from collections import defaultdict
 
 def parse_amount(body):
@@ -53,13 +54,14 @@ def extract_trips(messages):
         if 'you have earned $10 in Uber credit' in body:
             continue
 
-        date = dateutil.parser.parse(msg['Date'])
+        date = email.utils.parsedate_to_datetime(msg['Date'])
         amount = parse_amount(body)
 
         if amount is None:
             skipped += 1
         else:
-            yield (date, float(amount[1:]))
+            amount = float(amount[1:])
+            yield (date, amount)
 
     if skipped:
         print("Skipped {} message(s) where I couldn't parse a trip amount.".format(skipped))
@@ -75,14 +77,16 @@ def summarize_trips(trips):
     for date, amount in trips:
         total_trips += 1
         total_amount += amount
-        year = dateutil.parser.parse(str(date)).astimezone().year
+        year = date.astimezone().year
         trips_by_year[year] += 1
         amount_by_year[year] += amount
 
     print("You've spent a total of ${:.2f} on {} trip(s) with Uber.\n"
         .format(total_amount, total_trips))
 
-    for year in sorted(trips_by_year):
+    start_year = min(trips_by_year)
+    end_year = max(max(trips_by_year), email.utils.localtime().year)
+    for year in range(start_year, end_year + 1):
         count = trips_by_year[year]
         amount = amount_by_year[year]
         print("{:7}   ${:8.2f}   {:3} trip(s)".format(year, amount, count))

diff --git a/uber.py b/uber.py
@@ -58,8 +58,8 @@ def extract_trips(messages):
 
         if amount is None:
             skipped += 1
-
-        yield (date, float(amount[1:]))
+        else:
+            yield (date, float(amount[1:]))
 
     if skipped:
         print("Skipped {} message(s) where I couldn't parse a trip amount.".format(skipped))

diff --git a/uber.py b/uber.py
@@ -58,15 +58,14 @@ def extract_trips(messages):
 
         if amount is None:
             skipped += 1
-            raise RuntimeError("Trip amount not found in message body.")
-            # If you get this error and want to fix it, run:
-            #   print(body)
-            # and add a new regular expression to `parse_amount` above.
 
         yield (date, float(amount[1:]))
 
     if skipped:
         print("Skipped {} message(s) where I couldn't parse a trip amount.".format(skipped))
+        # If you want to debug this:
+        # print(body)
+        # ...and add a new regex to `parse_amount` above.
 
 def summarize_trips(trips):
     trips_by_year = defaultdict(int)

diff --git a/uber.py b/uber.py
@@ -44,6 +44,7 @@ def parse_amount(body):
             return amount
 
 def extract_trips(messages):
+    skipped = 0
     for msg in messages:
         if 'uber' not in msg['From'] or ':' in msg['Subject']:
             continue
@@ -56,13 +57,17 @@ def extract_trips(messages):
         amount = parse_amount(body)
 
         if amount is None:
+            skipped += 1
             raise RuntimeError("Trip amount not found in message body.")
             # If you get this error and want to fix it, run:
             #   print(body)
             # and add a new regular expression to `parse_amount` above.
 
         yield (date, float(amount[1:]))
 
+    if skipped:
+        print("Skipped {} message(s) where I couldn't parse a trip amount.".format(skipped))
+
 def summarize_trips(trips):
     trips_by_year = defaultdict(int)
     amount_by_year = defaultdict(float)

diff --git a/uber.py b/uber.py
@@ -40,7 +40,7 @@ def parse_amount(body):
     for attrs, amount in re.findall(r'<td\s([^>]+)>\s*([$]\d+(?:[.]\d+)?)\s*<', body):
         # If you're debugging a newer table-based email format, it may help to uncomment:
         # print(attrs, amount)
-        if 'final-charge' in attrs:
+        if 'final-charge' in attrs or 'totalPrice' in attrs:
             return amount
 
 def extract_trips(messages):

diff --git a/uber.py b/uber.py
@@ -38,6 +38,8 @@ def parse_amount(body):
     for amount in re.findall(r'^\s*Total: +([$]\d+(?:[.]\d+)?)', body):
         return amount
     for attrs, amount in re.findall(r'<td\s([^>]+)>\s*([$]\d+(?:[.]\d+)?)\s*<', body):
+        # If you're debugging a newer table-based email format, it may help to uncomment:
+        # print(attrs, amount)
         if 'final-charge' in attrs:
             return amount
 

diff --git a/uber.py b/uber.py
@@ -55,6 +55,9 @@ def extract_trips(messages):
 
         if amount is None:
             raise RuntimeError("Trip amount not found in message body.")
+            # If you get this error and want to fix it, run:
+            #   print(body)
+            # and add a new regular expression to `parse_amount` above.
 
         yield (date, float(amount[1:]))
 

diff --git a/uber.py b/uber.py
@@ -4,6 +4,20 @@
 #
 # Install dependencies: pip3 install python-dateutil
 #
+# How to get the .mbox export:
+#
+# In Gmail, create a filter that applies the label "Uber" to emails matching:
+# 
+#   from:(uber.com) subject:("uber ride receipt" OR "trip with uber" OR "ride with uber")
+#
+# (and apply it to your past emails). Then, go to:
+#
+#   https://takeout.google.com/settings/takeout
+#
+# And export your Mail data, selecting just the label "Uber".
+# Google will send you an email containing an archive.
+# Point this script at its contents.
+# 
 import sys
 import re
 import mailbox

diff --git a/uber.py b/uber.py
@@ -0,0 +1,70 @@
+#!/usr/bin/env python3
+#
+# Usage: python3 uber.py ~/Downloads/Takeout/Mail/Uber.mbox
+#
+# Install dependencies: pip3 install python-dateutil
+#
+import sys
+import re
+import mailbox
+import dateutil.parser
+from collections import defaultdict
+
+def parse_amount(body):
+    # Uber has changed their email format over the years.
+    # This should find the amount charged for emails from 2011-2016 at least.
+    for amount in re.findall(r'([$]\d+(?:[.]\d+)?) has been charged', body):
+        return amount
+    for amount in re.findall(r'(?:Billed to Card:?|Amount Charged:) +\(([^)]+)\)', body):
+        return amount
+    for amount in re.findall(r'Billed [Tt]o Card: ([$]\d+(?:[.]\d+)?)', body):
+        return amount
+    for amount in re.findall(r'Total Fare: +([$]\d+(?:[.]\d+)?)', body):
+        return amount
+    for amount in re.findall(r'^\s*Total: +([$]\d+(?:[.]\d+)?)', body):
+        return amount
+    for attrs, amount in re.findall(r'<td\s([^>]+)>\s*([$]\d+(?:[.]\d+)?)\s*<', body):
+        if 'final-charge' in attrs:
+            return amount
+
+def extract_trips(messages):
+    for msg in messages:
+        if 'uber' not in msg['From'] or ':' in msg['Subject']:
+            continue
+
+        body = msg.get_payload()[0].get_payload(decode=True).decode()
+        if 'you have earned $10 in Uber credit' in body:
+            continue
+
+        date = dateutil.parser.parse(msg['Date'])
+        amount = parse_amount(body)
+
+        if amount is None:
+            raise RuntimeError("Trip amount not found in message body.")
+
+        yield (date, float(amount[1:]))
+
+def summarize_trips(trips):
+    trips_by_year = defaultdict(int)
+    amount_by_year = defaultdict(float)
+    total_trips = 0
+    total_amount = 0
+    for date, amount in trips:
+        total_trips += 1
+        total_amount += amount
+        year = dateutil.parser.parse(str(date)).astimezone().year
+        trips_by_year[year] += 1
+        amount_by_year[year] += amount
+
+    print("You've spent a total of ${:.2f} on {} trip(s) with Uber.\n"
+        .format(total_amount, total_trips))
+
+    for year in sorted(trips_by_year):
+        count = trips_by_year[year]
+        amount = amount_by_year[year]
+        print("{:7}   ${:8.2f}   {:3} trip(s)".format(year, amount, count))
+
+if __name__ == '__main__':
+    mbox = mailbox.mbox(sys.argv[1], create=False)
+    trips = list(extract_trips(mbox))
+    summarize_trips(trips)
No results found