Created
September 30, 2023 11:18
-
-
Save adlerweb/2be1a440bfadff1a521bfbb841c1d5ec to your computer and use it in GitHub Desktop.
Revisions
-
adlerweb created this gist
Sep 30, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,103 @@ import sys import json import base64 import argparse from binascii import Error as BinasciiError from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, parse_qsl version = "0.1" def urlsafe_base64_decode(s): """ Decode a base64 encoded string. Add back any trailing equal signs that might have been stripped. (via https://docs.djangoproject.com/en/4.2/_modules/django/utils/http/#urlsafe_base64_decode) """ s = s.encode() try: return base64.urlsafe_b64decode(s.ljust(len(s) + len(s) % 4, b"=")) except (LookupError, BinasciiError) as e: raise ValueError(e) def remove_utm(s): """ Remove URL parametes starting with utm_, as these are usually only used for tracking. (see https://en.wikipedia.org/wiki/UTM_parameters) """ parsed_url = urlparse(s) # Reconstruct the original URL without utm_ parameters query_parameters = parse_qsl(parsed_url.query) filtered_parameters = [(key, value) for key, value in query_parameters if not key.startswith('utm_')] new_query = urlencode(filtered_parameters) # Reconstruct the URL with the modified query parameters filtered_url = urlunparse(parsed_url._replace(query=new_query)) return filtered_url def mandrill_extract(tracking_url): """ Extract original URL from a mandrill link. (via https://gist.github.com/medmunds/1b696ee88ccb0480d71f) """ # Parse the URL parsed_url = urlparse(tracking_url) # Extract components account_id = parsed_url.path.split('/')[-2] # Extract account ID base_url = parsed_url.path.split('/')[-1] # Extract base URL data_field = parse_qs(parsed_url.query)['p'][0] # Extract data field payload = json.loads(urlsafe_base64_decode(data_field)) params = json.loads(payload['p']) original_url = params['url'] filtered_url = remove_utm(original_url) return account_id, base_url, data_field, original_url, filtered_url def main(): parser = argparse.ArgumentParser(description='Decode a mandrill tracking link and reconstruct the original URL without tracking') parser.add_argument('tracking_url', help='The tracking URL to process') parser.add_argument('--version', action='store_true', help='Show version info') parser.add_argument('-j', '--json', action='store_true', help='Output all fields as JSON') parser.add_argument('-v', '--verbose', action='store_true', help='Output all variants individually') parser.add_argument('-u', '--unfiltered', action='store_true', help='Output decoded link with tracking parameters') args = parser.parse_args() if args.version: print(f"Mandrill link decoder\n Version {version}") sys.exit(0) tracking_url = args.tracking_url try: account_id, base_url, data_field, original_url, filtered_url = mandrill_extract(tracking_url) except Exception as e: print("Unable to parse link; exiting.", file=sys.stderr) print(f"Error: {str(e)}", file=sys.stderr) sys.exit(1) if args.json: result = { "AccountID": account_id, "BaseURL": base_url, "DataField": data_field, "OriginalURL": original_url, "FilteredURL": filtered_url } print(json.dumps(result, indent=4)) elif args.verbose: print("Account ID:", account_id) print("Base URL:", base_url) print("Original URL:", original_url) print("Filtered URL:", filtered_url) elif args.unfiltered: print(original_url) else: print(filtered_url) if __name__ == "__main__": main()