import sys import json import base64 import argparse from binascii import Error as BinasciiError from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, parse_qsl version = "0.1" def urlsafe_base64_decode(s): """ Decode a base64 encoded string. Add back any trailing equal signs that might have been stripped. (via https://docs.djangoproject.com/en/4.2/_modules/django/utils/http/#urlsafe_base64_decode) """ s = s.encode() try: return base64.urlsafe_b64decode(s.ljust(len(s) + len(s) % 4, b"=")) except (LookupError, BinasciiError) as e: raise ValueError(e) def remove_utm(s): """ Remove URL parametes starting with utm_, as these are usually only used for tracking. (see https://en.wikipedia.org/wiki/UTM_parameters) """ parsed_url = urlparse(s) # Reconstruct the original URL without utm_ parameters query_parameters = parse_qsl(parsed_url.query) filtered_parameters = [(key, value) for key, value in query_parameters if not key.startswith('utm_')] new_query = urlencode(filtered_parameters) # Reconstruct the URL with the modified query parameters filtered_url = urlunparse(parsed_url._replace(query=new_query)) return filtered_url def mandrill_extract(tracking_url): """ Extract original URL from a mandrill link. (via https://gist.github.com/medmunds/1b696ee88ccb0480d71f) """ # Parse the URL parsed_url = urlparse(tracking_url) # Extract components account_id = parsed_url.path.split('/')[-2] # Extract account ID base_url = parsed_url.path.split('/')[-1] # Extract base URL data_field = parse_qs(parsed_url.query)['p'][0] # Extract data field payload = json.loads(urlsafe_base64_decode(data_field)) params = json.loads(payload['p']) original_url = params['url'] filtered_url = remove_utm(original_url) return account_id, base_url, data_field, original_url, filtered_url def main(): parser = argparse.ArgumentParser(description='Decode a mandrill tracking link and reconstruct the original URL without tracking') parser.add_argument('tracking_url', help='The tracking URL to process') parser.add_argument('--version', action='store_true', help='Show version info') parser.add_argument('-j', '--json', action='store_true', help='Output all fields as JSON') parser.add_argument('-v', '--verbose', action='store_true', help='Output all variants individually') parser.add_argument('-u', '--unfiltered', action='store_true', help='Output decoded link with tracking parameters') args = parser.parse_args() if args.version: print(f"Mandrill link decoder\n Version {version}") sys.exit(0) tracking_url = args.tracking_url try: account_id, base_url, data_field, original_url, filtered_url = mandrill_extract(tracking_url) except Exception as e: print("Unable to parse link; exiting.", file=sys.stderr) print(f"Error: {str(e)}", file=sys.stderr) sys.exit(1) if args.json: result = { "AccountID": account_id, "BaseURL": base_url, "DataField": data_field, "OriginalURL": original_url, "FilteredURL": filtered_url } print(json.dumps(result, indent=4)) elif args.verbose: print("Account ID:", account_id) print("Base URL:", base_url) print("Original URL:", original_url) print("Filtered URL:", filtered_url) elif args.unfiltered: print(original_url) else: print(filtered_url) if __name__ == "__main__": main()