Last active
September 29, 2021 13:46
-
-
Save philfreo/181b92528c352cdccf43f57a5908c47f to your computer and use it in GitHub Desktop.
Revisions
-
philfreo revised this gist
Jan 19, 2021 . 1 changed file with 1 addition and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -10,6 +10,7 @@ def hash(s): anonymous_id = '(This needs to be passed to your backend from the browser/JS)' # Then on any call to Segment where integrations['Google Analytics'] is not False... analytics.track(user_id, 'Test', {}, anonymous_id=anonymous_id, integrations={ 'Google Analytics': { # Segment ignores our custom clientId if it's not a string -
philfreo revised this gist
Jan 19, 2021 . 1 changed file with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,7 +8,7 @@ def hash(s): hash = (hash * 33) ^ c[0] return c_uint32(hash).value anonymous_id = '(This needs to be passed to your backend from the browser/JS)' analytics.track(user_id, 'Test', {}, anonymous_id=anonymous_id, integrations={ 'Google Analytics': { -
philfreo revised this gist
Jan 19, 2021 . 2 changed files with 11 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -16,8 +16,8 @@ function hash(str) { // https://segment.com/docs/connections/sources/catalog/libraries/website/javascript/middleware/ const segmentSourceMiddleware = function({ payload, next, integrations }) { payload.obj.integrations['Google Analytics'] = { // Segment ignores our custom clientId if it's not a string clientId: String(hash(analytics.user().anonymousId())) }; next(payload); }; This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -7,5 +7,12 @@ def hash(s): for c in reversed(list(struct.iter_unpack('H', s.encode('utf-16le')))): hash = (hash * 33) ^ c[0] return c_uint32(hash).value anonymous_id = '(This needs to be passed to your backend)' analytics.track(user_id, 'Test', {}, anonymous_id=anonymous_id, integrations={ 'Google Analytics': { # Segment ignores our custom clientId if it's not a string 'clientId': str(hash(anonymous_id || user_id)) } } -
philfreo revised this gist
Jan 14, 2021 . 1 changed file with 3 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -11,6 +11,9 @@ function hash(str) { return hash >>> 0; } // Segment Source Middleware helps us here so that we don't have to // customize each and every track(), page() etc. // https://segment.com/docs/connections/sources/catalog/libraries/website/javascript/middleware/ const segmentSourceMiddleware = function({ payload, next, integrations }) { payload.obj.integrations['Google Analytics'] = { // Segment ignores our custom clientId if it's not a string... -
philfreo revised this gist
Jan 13, 2021 . 1 changed file with 3 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,10 +1,11 @@ # This matches the same hash function that Segment uses on their server-side GA integration # Ported to Python from https://github.com/darkskyapp/string-hash/blob/master/index.js def hash(s): import struct from ctypes import c_uint32 hash = 5381 for c in reversed(list(struct.iter_unpack('H', s.encode('utf-16le')))): hash = (hash * 33) ^ c[0] return c_uint32(hash).value # TODO -
philfreo revised this gist
Jan 13, 2021 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ # This matches the same hash function that Segment uses on their server-side GA integration # Ported to Python from https://github.com/darkskyapp/string-hash/blob/master/index.js def hash(s): from ctypes import c_uint32 hash = 5381 -
philfreo revised this gist
Jan 13, 2021 . 2 changed files with 13 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,8 +1,7 @@ // In browser code with analytics.js when using a Google Analytics Destination in Cloud Mode // This is the same hash function that Segment uses on their server-side GA integration // from https://github.com/darkskyapp/string-hash/blob/master/index.js function hash(str) { var hash = 5381, i = str.length; This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1 +1,10 @@ # This is the same hash function that Segment uses on their server-side GA integration # Ported from https://github.com/darkskyapp/string-hash/blob/master/index.js def hash(s): from ctypes import c_uint32 hash = 5381 for i in range(len(s) - 1, -1, -1): hash = (hash * 33) ^ ord(s[i]) return c_uint32(hash).value # TODO -
philfreo revised this gist
Jan 13, 2021 . 1 changed file with 2 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -14,7 +14,8 @@ function hash(str) { const segmentSourceMiddleware = function({ payload, next, integrations }) { payload.obj.integrations['Google Analytics'] = { // Segment ignores our custom clientId if it's not a string... clientId: ""+hash(analytics.user().anonymousId()) }; next(payload); }; -
philfreo revised this gist
Jan 13, 2021 . 2 changed files with 6 additions and 3 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -12,13 +12,13 @@ This is a problem even if you carefully pass `anonymousId`, `userId`, web page U ## Cause In Segment's Google Analytics server-side integration they do this to compute the value passed to GA's `cid` ("Client ID") field: `let cid = hash(facade.userId() || facade.anonymousId())` The `hash` they use is the [string-hash npm package](https://www.npmjs.com/package/string-hash) But because they _change_ this value as soon as a visitor is given a `userId`, this makes it impossible to track users/sessions in GA from anonymous (marketing stie) to identified (signed up in product) Segment incorrectly prefers sending a `clientId`/`cid` to Google Analytics based on the Segment `userId` (if present) rather than preferring their `anonymousId` which would fix that problem and more appropriately use the fields outlined by @@ -30,6 +30,8 @@ Segment needs to update their Google Analytics server-side integration to be lik `let cid = hash(facade.anonymousId() || facade.userId())` This would allow for consistent tracking in GA from anonymous to identified. There is already a _separate_ field/option to pass User ID directly to GA, so there's no reason to prefer `userId` here when both are passed. Segment has indicated that the more customers complain about this, the more likely they are to make this change. Currently they haven't committed to making any change :( This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1 @@ # coming soon -
philfreo revised this gist
Jan 13, 2021 . 1 changed file with 7 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,7 +2,13 @@ ## Problem If you use Segment's Google Analytics server-side integrations, even if you follow **all** of their documentation / recommendation, it is impossible to properly track your visitors/sessions in GA from anonymous through identified. This means that if you use Segment like this, you cannot do very basic things in GA like understanding attribution of your product's sign ups. In GA, a brand new session is created for the identified users, which is not correct. This problem applies to using Segment on the web with analytics.js when the Google Analytics Destination is set to Cloud Mode, _or_ when using a true server-side Source such as Python/Ruby/Node. This is a problem even if you carefully pass `anonymousId`, `userId`, web page URL/title, and user's IP from your frontend to your backend and then to Segment following all their recommendations. ## Cause -
philfreo revised this gist
Jan 13, 2021 . 2 changed files with 6 additions and 6 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,10 +1,10 @@ # Overview ## Problem ** more details about use case coming soon ** ## Cause In Segment's Google Analytics server-side integration they do this: @@ -18,15 +18,15 @@ Segment incorrectly prefers sending a `clientId`/`cid` to Google Analytics based than preferring their `anonymousId` which would fix that problem and more appropriately use the fields outlined by [Google Analytics API documentation here](https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters#user) ## Solution Segment needs to update their Google Analytics server-side integration to be like this: `let cid = hash(facade.anonymousId() || facade.userId())` Segment has indicated that the more customers complain about this, the more likely they are to make this change. Currently they haven't committed to making any change :( ## Workaround See code snippets below. File renamed without changes. -
philfreo revised this gist
Jan 13, 2021 . 1 changed file with 4 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -20,3 +20,7 @@ const segmentSourceMiddleware = function({ payload, next, integrations }) { }; analytics.addSourceMiddleware(segmentSourceMiddleware); // Then you can use analytics.js like normal: analytics.track('Test'); -
philfreo created this gist
Jan 13, 2021 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,32 @@ ## Overview # Problem ** more details about use case coming soon ** # Cause In Segment's Google Analytics server-side integration they do this: `let cid = hash(facade.userId() || facade.anonymousId())` Where `hash` uses the [string-hash npm package](https://www.npmjs.com/package/string-hash) But this makes it impossible to track users/sessions in GA from anonymous (marketing stie) to identified (signed up in product) Segment incorrectly prefers sending a `clientId`/`cid` to Google Analytics based on the Segment `userId` (if present) rather than preferring their `anonymousId` which would fix that problem and more appropriately use the fields outlined by [Google Analytics API documentation here](https://developers.google.com/analytics/devguides/collection/protocol/v1/parameters#user) # Solution Segment needs to update their Google Analytics server-side integration to be like this: `let cid = hash(facade.anonymousId() || facade.userId())` Segment has indicated that the more customers complain about this, the more likely they are to make this change. Right now they haven't committed to making any change :( # Workaround See code snippets below. This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,22 @@ // In browser code with analytics.js, even for Google Analytics in Cloud Mode // In Segment's Google Analytics server-side integration they do this: // This is the same hash that Segment uses on their server-side, from here: // From https://github.com/darkskyapp/string-hash/blob/master/index.js function hash(str) { var hash = 5381, i = str.length; while(i) { hash = (hash * 33) ^ str.charCodeAt(--i); } return hash >>> 0; } const segmentSourceMiddleware = function({ payload, next, integrations }) { payload.obj.integrations['Google Analytics'] = { clientId: hash(analytics.user().anonymousId()) }; next(payload); }; analytics.addSourceMiddleware(segmentSourceMiddleware);