Skip to content

Instantly share code, notes, and snippets.

@adlerweb
Created September 30, 2023 11:18
Show Gist options
  • Save adlerweb/2be1a440bfadff1a521bfbb841c1d5ec to your computer and use it in GitHub Desktop.
Save adlerweb/2be1a440bfadff1a521bfbb841c1d5ec to your computer and use it in GitHub Desktop.

Revisions

  1. adlerweb created this gist Sep 30, 2023.
    103 changes: 103 additions & 0 deletions mandrill_extract.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,103 @@
    import sys
    import json
    import base64
    import argparse
    from binascii import Error as BinasciiError
    from urllib.parse import urlparse, parse_qs, urlencode, urlunparse, parse_qsl

    version = "0.1"

    def urlsafe_base64_decode(s):
    """
    Decode a base64 encoded string. Add back any trailing equal signs that
    might have been stripped.
    (via https://docs.djangoproject.com/en/4.2/_modules/django/utils/http/#urlsafe_base64_decode)
    """
    s = s.encode()
    try:
    return base64.urlsafe_b64decode(s.ljust(len(s) + len(s) % 4, b"="))
    except (LookupError, BinasciiError) as e:
    raise ValueError(e)

    def remove_utm(s):
    """
    Remove URL parametes starting with utm_, as these are usually only used
    for tracking.
    (see https://en.wikipedia.org/wiki/UTM_parameters)
    """
    parsed_url = urlparse(s)

    # Reconstruct the original URL without utm_ parameters
    query_parameters = parse_qsl(parsed_url.query)
    filtered_parameters = [(key, value) for key, value in query_parameters if not key.startswith('utm_')]
    new_query = urlencode(filtered_parameters)

    # Reconstruct the URL with the modified query parameters
    filtered_url = urlunparse(parsed_url._replace(query=new_query))
    return filtered_url

    def mandrill_extract(tracking_url):
    """
    Extract original URL from a mandrill link.
    (via https://gist.github.com/medmunds/1b696ee88ccb0480d71f)
    """
    # Parse the URL
    parsed_url = urlparse(tracking_url)

    # Extract components
    account_id = parsed_url.path.split('/')[-2] # Extract account ID
    base_url = parsed_url.path.split('/')[-1] # Extract base URL
    data_field = parse_qs(parsed_url.query)['p'][0] # Extract data field

    payload = json.loads(urlsafe_base64_decode(data_field))
    params = json.loads(payload['p'])

    original_url = params['url']
    filtered_url = remove_utm(original_url)

    return account_id, base_url, data_field, original_url, filtered_url

    def main():
    parser = argparse.ArgumentParser(description='Decode a mandrill tracking link and reconstruct the original URL without tracking')

    parser.add_argument('tracking_url', help='The tracking URL to process')
    parser.add_argument('--version', action='store_true', help='Show version info')
    parser.add_argument('-j', '--json', action='store_true', help='Output all fields as JSON')
    parser.add_argument('-v', '--verbose', action='store_true', help='Output all variants individually')
    parser.add_argument('-u', '--unfiltered', action='store_true', help='Output decoded link with tracking parameters')

    args = parser.parse_args()

    if args.version:
    print(f"Mandrill link decoder\n Version {version}")
    sys.exit(0)

    tracking_url = args.tracking_url
    try:
    account_id, base_url, data_field, original_url, filtered_url = mandrill_extract(tracking_url)
    except Exception as e:
    print("Unable to parse link; exiting.", file=sys.stderr)
    print(f"Error: {str(e)}", file=sys.stderr)
    sys.exit(1)

    if args.json:
    result = {
    "AccountID": account_id,
    "BaseURL": base_url,
    "DataField": data_field,
    "OriginalURL": original_url,
    "FilteredURL": filtered_url
    }
    print(json.dumps(result, indent=4))
    elif args.verbose:
    print("Account ID:", account_id)
    print("Base URL:", base_url)
    print("Original URL:", original_url)
    print("Filtered URL:", filtered_url)
    elif args.unfiltered:
    print(original_url)
    else:
    print(filtered_url)

    if __name__ == "__main__":
    main()