Skip to content

Instantly share code, notes, and snippets.

@vunb
Forked from sperand-io/worker.js
Created June 20, 2023 01:24
Show Gist options
  • Select an option

  • Save vunb/51b76988aa554adf6283ecf7c60aeaa3 to your computer and use it in GitHub Desktop.

Select an option

Save vunb/51b76988aa554adf6283ecf7c60aeaa3 to your computer and use it in GitHub Desktop.
Cloudflare Workers / Segment Smart Proxy — serve data collection assets and endpoints from your own domain
/**
* Steps to use:
* 1. Create CF Worker & paste this in
* 2. (Optional) Update configuration defaults
* - If you want to manage in code, do so below under "Static Configuration"
* - If you want dynamic custom config: Create CFW KV namespace, link them, and add reference below
*
* - You can overwrite default path prefix for loading analytics.js (<yourdomain>/ajs)
* (corresponding KV entry: `script_path_prefix`)
* - You can overwrite default path prefix for handling first-party data collection (<yourdomain>/data)
* (corresponding KV entry: `collection_api_path_prefix`)
* - You can overwrite default cookie name for the edge-side anonymous ID
* (corresponding KV entry: `cookie_name`)
* - You can overwrite default integration list path prefix (/int-list)
* (corresponding KV entry: `integration_list_path_prefix`)
* - You can overwrite the default refresh trigger if you want to more regularly update the anonymousId
* (corresponding KV entry: `refresh_threshold`)
* - You can set a default write key if you just want to use one globally and want to omit it from your site code
* (corresponding KV entry: `write_key`)
* - You can set an error collection endpoint if you have a logging service that accepts webhooks
* (corresponding KV entry: `write_key`)
* 3. (If needed) If you use it for Consent Management, update any conditional destination loading logic to pull the integration list from your host + integration list path prefix
* eg. If using Segment Consent Manager or https://gist.github.com/sperand-io/4725e248a35d5005d68d810d8a8f7b29
* ...instead of fetch(`https://cdn.segment.com/v1/projects/${writeKey}/integrations`)
* ...replace with fetch(`${location.origin}/ilist/${writeKey}`) or fetch(`${location.origin}/ilist/}`)
* 3. (REQUIRED) Deploy and configure the worker to serve for your desired domain/subdomain and at your desired path
* 4. (REQUIRED) Update your segment snippet to load from your host + script path prefix
* (eg find n.src="https://cdn.segment.com/analytics.js/v1/"+t+"/analytics.min.js" in snippet and ...)
* (replace with n.src=`${location.origin}/ajs` if you have a default write key set)
* (or with n.src=`${location.origin}/ajs/${t}` if not)
*/
let KV_NAMESPACE;
// START STATIC CONFIGURATION
const STATIC_CONFIG = {
COOKIE_NAME: "__anonymous_session_id",
SCRIPT_PATH_PREFIX: "ajs",
COLLECTION_API_PATH_PREFIX: "data",
INTEGRATION_LIST_PATH_PREFIX: "ilist",
REFRESH_THRESHOLD: 45,
DEFAULT_WRITE_KEY: "3K4xZlUgQFAa3MRdnRRKvbvDEukDCWeu",
ERROR_ENDPOINT: "https://enj0zt42hq1y.x.pipedream.net"
};
// END STATIC CONFIGUATION. Editing below this line is discouraged.
/**
* Attach top-level responder.
*/
addEventListener("fetch", event => {
event.respondWith(handleErr(event));
});
/**
* Top level event handler.
*
* Wraps our request handler in an error handler,
* optionally forward errors to a logging service.
*
* @param {Event} event
*/
async function handleErr(event) {
try {
const res = await handleEvent(event);
return res;
} catch (err) {
let endpoint = KV_NAMESPACE && (await KV_NAMESPACE.get("error_endpoint"));
if (!endpoint) endpoint = STATIC_CONFIG["ERROR_ENDPOINT"];
if (endpoint) event.waitUntil(log(endpoint, err, event.request));
return new Response(err.message || "An error occurred!", {
status: err.statusCode || 500
});
}
}
/**
* Respond to the request
*
* Provides special handling for Segment requests against the configured || default paths.
*
* @param {Event} event
*/
async function handleEvent(event) {
const config = KV_NAMESPACE ? await hydrateConfig(KV_NAMESPACE) : STATIC_CONFIG;
const {
COOKIE_NAME,
SCRIPT_PATH_PREFIX,
COLLECTION_API_PATH_PREFIX,
INTEGRATION_LIST_PATH_PREFIX
} = config;
const cache = caches.default;
const { request } = event;
const url = new URL(request.url);
// extract cookie information
const cookieData = getCookieData(request, COOKIE_NAME);
// serve analytics.js
if (startsWith(url, SCRIPT_PATH_PREFIX))
return await handleScript(event, cache, cookieData, config);
// serve first party data collection pings
if (startsWith(url, COLLECTION_API_PATH_PREFIX))
return await handleDataCollection(request, cookieData, config);
// serve first party data collection pings
if (startsWith(url, INTEGRATION_LIST_PATH_PREFIX))
return await handleIntegrationListing(request, config);
// passthrough everything else
return await fetch(request);
}
/**
* Serve analytics.js
*
* Serves a modified analytics.js for (default || passed) writeKey at (default || configured) (path || path prefix)
* Mods:
* If writeKey is omitted, get the default script
* Updates data collection api host in the script itself
* If needed, sets an HTTPOnly anonymous session cookie (and corresponding set-at cookie)
*
* @param {Event} event
* @param {Cache} cache
* @param {Object} cookieData
* @param {String} cookieData.anonymousId
* @param {Date} cookieData.expires
* @param {Object} config
*/
async function handleScript(
event,
cache,
{ anonymousId, expires },
{
SCRIPT_PATH_PREFIX,
DEFAULT_WRITE_KEY,
COLLECTION_API_PATH_PREFIX,
COOKIE_NAME,
REFRESH_THRESHOLD
}
) {
const { request } = event;
const { pathname, hostname } = new URL(request.url);
let [_, writeKey] = pathname.split(`/${SCRIPT_PATH_PREFIX}/`);
if (!writeKey) writeKey = DEFAULT_WRITE_KEY;
let response;
const cached = await cache.match(request);
if (cached) {
response = cached;
} else {
const endpoint = `https://cdn.segment.com/analytics.js/v1/${writeKey}/analytics.min.js`;
const originalResponse = await fetch(new Request(endpoint, request));
const newResponse = originalResponse.clone();
const analyticsjs = await originalResponse.text();
modifiedAnalyticsjs = analyticsjs.replace(
/\api\.segment\.io\/v1/g,
`${hostname}/${COLLECTION_API_PATH_PREFIX}`
);
response = new Response(modifiedAnalyticsjs, newResponse);
event.waitUntil(cache.put(request, response.clone()));
}
if (!anonymousId || expiresSoon(expires, REFRESH_THRESHOLD)) {
const oneYearFromNow = new Date();
oneYearFromNow.setFullYear(oneYearFromNow.getFullYear() + 1);
response.headers.append(
"Set-Cookie",
createCookie(COOKIE_NAME, uuid(), oneYearFromNow)
);
response.headers.append(
"Set-Cookie",
createCookie(`${COOKIE_NAME}_set`, oneYearFromNow.toUTCString(), oneYearFromNow)
);
}
return response;
}
/**
* Serve first party data collection API
*
* Serves a handler to modify and forward events to Segment at the default || configured path prefix
* Mods:
* If present in the request cookie, overwrites anonymousId with edge-side cookie value
*
* @param {Request} request
* @param {Object} cookieData
* @param {String} cookieData.anonymousId
* @param {Object} config
*/
async function handleDataCollection(
request,
{ anonymousId },
{ COLLECTION_API_PATH_PREFIX }
) {
const originalRequest = request.clone();
const body = JSON.stringify({
...(await request.json()),
...(anonymousId ? { anonymousId } : {})
});
const { pathname, hostname } = new URL(request.url);
const correctPath = pathname.replace(COLLECTION_API_PATH_PREFIX, "v1");
const newRequest = new Request(
`https://api.segment.io${correctPath}`,
new Request(originalRequest, { body })
);
newRequest.headers.append("origin", `https://${hostname}`);
const response = await fetch(newRequest);
for (let [name, value] of response.headers.entries()) {
console.log(name, value);
}
return response;
}
/**
* Serve first party integration list API
*
* Serves a handler to passthrough list requests for default || passed writeKey at the default || configured path prefix
*
* @param {Request} request
* @param {Object} config
*/
async function handleIntegrationListing(
request,
{ INTEGRATION_LIST_PATH_PREFIX, DEFAULT_WRITE_KEY }
) {
const { pathname } = new URL(request.url);
let [_, writeKey] = pathname.split(`/${INTEGRATION_LIST_PATH_PREFIX}/`);
if (!writeKey) writeKey = DEFAULT_WRITE_KEY;
const endpoint = `https://cdn.segment.com/v1/projects/${writeKey}/integrations`;
return await fetch(new Request(endpoint, new Request(request, { body })));
}
/**
* HELPERS
*/
/**
* Check if url path begins with a specified prefix
* @param {NAMESPACE} KV
*/
async function hydrateConfig(KV) {
const keys = [
"cookie_name",
"script_path_prefix",
"collection_api_path_prefix",
"integration_list_path_prefix",
"refresh_threshold",
"default_write_key"
];
return Promise.all(
keys.map(async k => {
return { [k.toUpperCase()]: (await KV.get(k)) || "" };
})
).reduce((config, { key, storedKValue }) => {
if (storedKValue) {
config[key] = storedKValue;
}
return config;
}, STATIC_CONFIG);
console.log(config);
}
/**
* Check if url path begins with a specified prefix
*/
function startsWith(url, prefix) {
if (url.pathname.startsWith(`/${prefix}`)) return true;
return false;
}
/**
* Check if the anonymousId is due to be refreshed
* (ie. is our expiration closer than our threshold window allows?)
*/
function expiresSoon(when, REFRESH_THRESHOLD) {
// eg. 45 days from now
const threshold = new Date();
threshold.setDate(threshold.getDate() + REFRESH_THRESHOLD);
// is expiration in less than eg. 45 days?
if (when < threshold) return true;
else return false;
}
/**
* Encode a cookie string suited for our use case
*/
function createCookie(name, value, expires) {
return `${encodeURIComponent(name)}=${encodeURIComponent(
value
)}; Expires=${expires.toUTCString()}; SameSite=Strict; Secure; HttpOnly`;
}
/**
* Generate a spec-compliant uuid-v4
* adapted from: https://gist.github.com/bentranter/ed524091170137a72c1d54d641493c1f
*/
function uuid() {
const bytes = crypto.getRandomValues(new Uint8Array(16));
bytes[6] = (bytes[6] & 0x0f) | 0x40;
bytes[8] = (bytes[8] & 0xbf) | 0x80;
const chars = [...bytes].map(byte => byte.toString(16));
const insertionPoints = [4, 6, 8, 10];
return chars.reduce((uuid, char, index) => {
if (insertionPoints.includes(index)) {
return (uuid += `-${char}`);
} else {
return (uuid += char);
}
});
}
/**
* Grabs the anonymousId and expiration time from the cookies in the request header
*
* Adapted from: https://developers.cloudflare.com/workers/templates/pages/cookie_extract/
*
* @param {Request} request incoming Request
* @param {string} name of the edge-side cookie
*/
function getCookieData(request, name) {
let anonymousId = null;
let expires = null;
let cookieString = request.headers.get("Cookie");
if (cookieString) {
let cookies = cookieString.split(";");
cookies.forEach(cookie => {
let cookieName = cookie.split("=")[0].trim();
if (cookieName === name) {
anonymousId = cookie.split("=")[1];
}
if (cookieName === `${name}_set`) {
expires = new Date(decodeURIComponent(cookie.split("=")[1]));
}
});
}
return { anonymousId, expires };
}
/**
* Ship the error with some helpful request context as JSON to the specified endpoint
*
* ADAPTED from https://github.com/bustle/cf-sentry/
*
* @param {String} endpoint
* @param {Error} err the error
* @param {Request} request incoming Request
*/
async function log(endpoint, err, request) {
const body = JSON.stringify(errToJson(err, request));
const res = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json"
},
body
});
if (res.status === 200) {
return;
}
// We couldn't send to error endpoint, try to log the response at least
console.error({ httpStatus: res.status, ...(await res.json()) }); // eslint-disable-line no-console
}
/**
* Encode the parsed and formatted error as JSON
*
* ADAPTED from https://github.com/bustle/cf-sentry/
*
* @param {Error} err the error
* @param {Request} request incoming Request
*/
function errToJson(err, request) {
const errType = err.name || (err.contructor || {}).name;
const frames = parse(err);
const extraKeys = Object.keys(err).filter(
key => !["name", "message", "stack"].includes(key)
);
return {
message: errType + ": " + (err.message || "<no message>"),
exception: {
values: [
{
type: errType,
value: err.message,
stacktrace: frames.length ? { frames: frames.reverse() } : undefined
}
]
},
extra: extraKeys.length
? {
[errType]: extraKeys.reduce((obj, key) => ({ ...obj, [key]: err[key] }), {})
}
: undefined,
platform: "worker",
timestamp: Date.now() / 1000,
request:
request && request.url
? {
method: request.method,
url: request.url,
query_string: request.query,
headers: request.headers,
data: request.body
}
: undefined
};
}
/**
* Parse errors.
*
* ADAPTED from https://github.com/bustle/cf-sentry/
*
* @param {Error} err the error\
*/
function parse(err) {
return (err.stack || "")
.split("\n")
.slice(1)
.map(line => {
if (line.match(/^\s*[-]{4,}$/)) {
return { filename: line };
}
// From https://github.com/felixge/node-stack-trace/blob/1ec9ba43eece124526c273c917104b4226898932/lib/stack-trace.js#L42
const lineMatch = line.match(
/at (?:(.+)\s+\()?(?:(.+?):(\d+)(?::(\d+))?|([^)]+))\)?/
);
if (!lineMatch) {
return;
}
return {
function: lineMatch[1] || undefined,
filename: lineMatch[2] || undefined,
lineno: +lineMatch[3] || undefined,
colno: +lineMatch[4] || undefined,
in_app: lineMatch[5] !== "native" || undefined
};
})
.filter(Boolean);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment