/** * Steps to use: * 1. Create CF Worker, copy and paste this in * 2. (Optional) Update configuration defaults * - If you want to manage in code, do so below under "Static Configuration" * - If you want dynamic custom config: Create CFW KV namespace, link them, and add reference below * * - You can overwrite default path prefix for loading analytics.js (/ajs) * (corresponding KV entry: `script_path_prefix`) * - You can overwrite default path prefix for handling first-party data collection (/data) * (corresponding KV entry: `collection_api_path_prefix`) * - You can overwrite default cookie name for the edge-side anonymous ID * (corresponding KV entry: `cookie_name`) * - You can overwrite default integration list path prefix (/int-list) * (corresponding KV entry: `integration_list_path_prefix`) * - You can overwrite the default refresh trigger if you want to more regularly update the anonymousId * (corresponding KV entry: `refresh_threshold`) * - You can set a path for echoing the session ID * (corresponding KV entry: `default_write_key`) * - You can set a default write key if you just want to use one globally and want to omit it from your site code * (corresponding KV entry: `write_key`) * - You can set an error collection endpoint if you have a logging service that accepts webhooks * (corresponding KV entry: `write_key`) * * 3. (If needed) If you use it for Consent Management, update any conditional destination loading logic to pull the integration list from your host + integration list path prefix * eg. If using Segment Consent Manager or https://gist.github.com/sperand-io/4725e248a35d5005d68d810d8a8f7b29 * ...instead of fetch(`https://cdn.segment.com/v1/projects/${writeKey}/integrations`) * ...replace with fetch(`${location.origin}/ilist/${writeKey}`) or fetch(`${location.origin}/ilist/}`) * 3. (REQUIRED) Deploy and configure the worker to serve for your desired domain/subdomain and at your desired path * 4. (REQUIRED) Update your segment snippet to load from your host + script path prefix * (eg find n.src="https://cdn.segment.com/analytics.js/v1/"+t+"/analytics.min.js" in snippet and ...) * (replace with n.src=`${location.origin}/ajs` if you have a default write key set) * (or with n.src=`${location.origin}/ajs/${t}` if not) */ let KV_NAMESPACE // START STATIC CONFIGURATION const STATIC_CONFIG = { COOKIE_NAME: '__anonymous_session_id', SCRIPT_PATH_PREFIX: 'ajs', COLLECTION_API_PATH_PREFIX: 'data', INTEGRATION_LIST_PATH_PREFIX: 'ilist', ANONYMOUS_ID_ECHO_PATH: '', REFRESH_THRESHOLD: 45, DEFAULT_WRITE_KEY: '3K4xZlUgQFAa3MRdnRRKvbvDEukDCWeu', ERROR_ENDPOINT: 'https://enj0zt42hq1y.x.pipedream.net' } // END STATIC CONFIGUATION. Editing below this line is discouraged. /** * Attach top-level responder. */ addEventListener('fetch', event => { event.respondWith(handleErr(event)) }) /** * Top level event handler. * * Wraps our request handler in an error handler, * optionally forward errors to a logging service. * * @param {Event} event */ async function handleErr(event) { try { const res = await handleEvent(event) return res } catch (err) { let endpoint = KV_NAMESPACE && (await KV_NAMESPACE.get('error_endpoint')) if (!endpoint) endpoint = STATIC_CONFIG['ERROR_ENDPOINT'] if (endpoint) event.waitUntil(log(endpoint, err, event.request)) return new Response(err.message || 'An error occurred!', { status: err.statusCode || 500 }) } } /** * Respond to the request * * Provides special handling for Segment requests against the configured || default paths. * * @param {Event} event */ async function handleEvent(event) { const config = KV_NAMESPACE ? await hydrateConfig(KV_NAMESPACE) : STATIC_CONFIG const { COOKIE_NAME, SCRIPT_PATH_PREFIX, COLLECTION_API_PATH_PREFIX, INTEGRATION_LIST_PATH_PREFIX, ANONYMOUS_ID_ECHO_PATH } = config const cache = caches.default const { request } = event const url = new URL(request.url) // extract cookie information const cookieData = getCookieData(request, COOKIE_NAME) // serve analytics.js if (startsWith(url, SCRIPT_PATH_PREFIX)) return await handleScript(event, cache, cookieData, config) // serve first party data collection pings if (startsWith(url, COLLECTION_API_PATH_PREFIX)) return await handleDataCollection(request, cookieData, config) // serve first party data collection pings if (startsWith(url, INTEGRATION_LIST_PATH_PREFIX)) return await handleIntegrationListing(request, config) // serve anonymousId echo if (ANONYMOUS_ID_ECHO_PATH && startsWith(url, ANONYMOUS_ID_ECHO_PATH)) return await handleEcho(event, cookieData) // passthrough everything else return await fetch(request) } /** * Serve analytics.js * * Serves a modified analytics.js for (default || passed) writeKey at (default || configured) (path || path prefix) * Mods: * If writeKey is omitted, get the default script * Updates data collection api host in the script itself * If needed, sets an HTTPOnly anonymous session cookie (and corresponding set-at cookie) * * @param {Event} event * @param {Cache} cache * @param {Object} cookieData * @param {String} cookieData.anonymousId * @param {Date} cookieData.expires * @param {Object} config */ async function handleScript( event, cache, { anonymousId, expires }, { SCRIPT_PATH_PREFIX, DEFAULT_WRITE_KEY, COLLECTION_API_PATH_PREFIX, COOKIE_NAME, REFRESH_THRESHOLD } ) { const { request } = event const { pathname, hostname } = new URL(request.url) let [_, writeKey] = pathname.split(`/${SCRIPT_PATH_PREFIX}/`) if (!writeKey) writeKey = DEFAULT_WRITE_KEY let response const cached = await cache.match(request) if (cached) { response = cached } else { const endpoint = `https://cdn.segment.com/analytics.js/v1/${writeKey}/analytics.min.js` const originalResponse = await fetch(new Request(endpoint, request)) const newResponse = originalResponse.clone() const analyticsjs = await originalResponse.text() const modifiedAnalyticsjs = analyticsjs.replace( /\api\.segment\.io\/v1/g, `${hostname}/${COLLECTION_API_PATH_PREFIX}` ) response = new Response(modifiedAnalyticsjs, newResponse) event.waitUntil(cache.put(request, response.clone())) } if (!anonymousId || expiresSoon(expires, REFRESH_THRESHOLD)) { const oneYearFromNow = new Date() oneYearFromNow.setFullYear(oneYearFromNow.getFullYear() + 1) response.headers.append( 'Set-Cookie', createCookie(COOKIE_NAME, uuid(), oneYearFromNow) ) response.headers.append( 'Set-Cookie', createCookie(`${COOKIE_NAME}_set`, oneYearFromNow.toUTCString(), oneYearFromNow) ) } return response } /** * Serve first party data collection API * * Serves a handler to modify and forward events to Segment at the default || configured path prefix * Mods: * If present in the request cookie, overwrites anonymousId with edge-side cookie value * * @param {Request} request * @param {Object} cookieData * @param {String} cookieData.anonymousId * @param {Object} config */ async function handleDataCollection( request, { anonymousId }, { COLLECTION_API_PATH_PREFIX } ) { const originalRequest = request.clone() const body = JSON.stringify({ ...(await request.json()), ...(anonymousId ? { anonymousId } : {}) }) const { pathname, hostname } = new URL(request.url) const correctPath = pathname.replace(COLLECTION_API_PATH_PREFIX, 'v1') const newRequest = new Request( `https://api.segment.io${correctPath}`, new Request(originalRequest, { body }) ) newRequest.headers.append('origin', `https://${hostname}`) return await fetch(newRequest) } /** * Serve first party integration list API * * Serves a handler to passthrough list requests for default || passed writeKey at the default || configured path prefix * * @param {Request} request * @param {Object} config */ async function handleIntegrationListing( request, { INTEGRATION_LIST_PATH_PREFIX, DEFAULT_WRITE_KEY } ) { const { pathname } = new URL(request.url) let [_, writeKey] = pathname.split(`/${INTEGRATION_LIST_PATH_PREFIX}/`) if (!writeKey) writeKey = DEFAULT_WRITE_KEY const endpoint = `https://cdn.segment.com/v1/projects/${writeKey}/integrations` return await fetch(new Request(endpoint, new Request(request, { body }))) } /** * Serve first party anonymousID echo API * * @param {Request} request * @param {Object} config */ async function handleEcho(request, { anonymousId }) { if (anonymousId) { return new Response(JSON.stringify({ anonymousId }), { headers: new Headers({ 'Content-Type': 'application/json' }) }) } new Response('No AnonymousId', { status: 404 }) } /** * HELPERS */ /** * Check if url path begins with a specified prefix * @param {NAMESPACE} KV */ async function hydrateConfig(KV) { const keys = [ 'cookie_name', 'script_path_prefix', 'collection_api_path_prefix', 'integration_list_path_prefix', 'refresh_threshold', 'default_write_key' ] return Promise.all( keys.map(async k => { return { [k.toUpperCase()]: (await KV.get(k)) || '' } }) ).reduce((config, { key, storedKValue }) => { if (storedKValue) { config[key] = storedKValue } return config }, STATIC_CONFIG) } /** * Check if url path begins with a specified prefix */ function startsWith(url, prefix) { if (url.pathname.startsWith(`/${prefix}`)) return true return false } /** * Check if the anonymousId is due to be refreshed * (ie. is our expiration closer than our threshold window allows?) */ function expiresSoon(when, REFRESH_THRESHOLD) { // eg. 45 days from now const threshold = new Date() threshold.setDate(threshold.getDate() + REFRESH_THRESHOLD) // is expiration in less than eg. 45 days? if (when < threshold) return true else return false } /** * Encode a cookie string suited for our use case */ function createCookie(name, value, expires) { return `${encodeURIComponent(name)}=${encodeURIComponent( value )}; Expires=${expires.toUTCString()}; SameSite=Strict; Secure; HttpOnly` } /** * Generate a spec-compliant uuid-v4 * adapted from: https://gist.github.com/bentranter/ed524091170137a72c1d54d641493c1f */ function uuid() { const bytes = crypto.getRandomValues(new Uint8Array(16)) bytes[6] = (bytes[6] & 0x0f) | 0x40 bytes[8] = (bytes[8] & 0xbf) | 0x80 const chars = [...bytes].map(byte => byte.toString(16)) const insertionPoints = [4, 6, 8, 10] return chars.reduce((uuid, char, index) => { if (insertionPoints.includes(index)) { return (uuid += `-${char}`) } else { return (uuid += char) } }) } /** * Grabs the anonymousId and expiration time from the cookies in the request header * * Adapted from: https://developers.cloudflare.com/workers/templates/pages/cookie_extract/ * * @param {Request} request incoming Request * @param {string} name of the edge-side cookie */ function getCookieData(request, name) { let anonymousId = null let expires = null let cookieString = request.headers.get('Cookie') if (cookieString) { let cookies = cookieString.split(';') cookies.forEach(cookie => { let cookieName = cookie.split('=')[0].trim() if (cookieName === name) { anonymousId = cookie.split('=')[1] } if (cookieName === `${name}_set`) { expires = new Date(decodeURIComponent(cookie.split('=')[1])) } }) } return { anonymousId, expires } } /** * Ship the error with some helpful request context as JSON to the specified endpoint * * ADAPTED from https://github.com/bustle/cf-sentry/ * * @param {String} endpoint * @param {Error} err the error * @param {Request} request incoming Request */ async function log(endpoint, err, request) { const body = JSON.stringify(errToJson(err, request)) const res = await fetch(endpoint, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body }) if (res.status === 200) { return } // We couldn't send to error endpoint, try to log the response at least console.error({ httpStatus: res.status, ...(await res.json()) }) // eslint-disable-line no-console } /** * Encode the parsed and formatted error as JSON * * ADAPTED from https://github.com/bustle/cf-sentry/ * * @param {Error} err the error * @param {Request} request incoming Request */ function errToJson(err, request) { const errType = err.name || (err.contructor || {}).name const frames = parse(err) const extraKeys = Object.keys(err).filter( key => !['name', 'message', 'stack'].includes(key) ) return { message: errType + ': ' + (err.message || ''), exception: { values: [ { type: errType, value: err.message, stacktrace: frames.length ? { frames: frames.reverse() } : undefined } ] }, extra: extraKeys.length ? { [errType]: extraKeys.reduce((obj, key) => ({ ...obj, [key]: err[key] }), {}) } : undefined, platform: 'worker', timestamp: Date.now() / 1000, request: request && request.url ? { method: request.method, url: request.url, query_string: request.query, headers: request.headers, data: request.body } : undefined } } /** * Parse errors. * * ADAPTED from https://github.com/bustle/cf-sentry/ * * @param {Error} err the error\ */ function parse(err) { return (err.stack || '') .split('\n') .slice(1) .map(line => { if (line.match(/^\s*[-]{4,}$/)) { return { filename: line } } // From https://github.com/felixge/node-stack-trace/blob/1ec9ba43eece124526c273c917104b4226898932/lib/stack-trace.js#L42 const lineMatch = line.match( /at (?:(.+)\s+\()?(?:(.+?):(\d+)(?::(\d+))?|([^)]+))\)?/ ) if (!lineMatch) { return } return { function: lineMatch[1] || undefined, filename: lineMatch[2] || undefined, lineno: +lineMatch[3] || undefined, colno: +lineMatch[4] || undefined, in_app: lineMatch[5] !== 'native' || undefined } }) .filter(Boolean) }