Skip to content

Commit

Permalink
Remove pixel cookie sharing detection for now
Browse files Browse the repository at this point in the history
  • Loading branch information
ghostwords committed Sep 12, 2023
1 parent 5e1dd94 commit d962537
Show file tree
Hide file tree
Showing 6 changed files with 0 additions and 369 deletions.
26 changes: 0 additions & 26 deletions src/js/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ function Badger(from_qunit) {
manifestJson.background.persistent === false);
}());

self.firstPartyDomainPotentiallyRequired = testCookiesFirstPartyDomain();

self.widgetList = [];
let widgetListPromise = widgetLoader.loadWidgetsFromFile(
"data/socialwidgets.json").catch(console.error);
Expand Down Expand Up @@ -133,30 +131,6 @@ function Badger(from_qunit) {
}
}

/**
* Checks for availability of firstPartyDomain chrome.cookies API parameter.
* https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/cookies/getAll#Parameters
*
* firstPartyDomain is required when privacy.websites.firstPartyIsolate is enabled,
* and is in Firefox since Firefox 59. (firstPartyIsolate is in Firefox since 58).
*
* We don't care whether firstPartyIsolate is enabled, but rather whether
* firstPartyDomain is supported. Assuming firstPartyDomain is supported,
* setting it to null in chrome.cookies.getAll() produces the same result
* regardless of the state of firstPartyIsolate.
*
* firstPartyDomain is not currently supported in Chrome.
*/
function testCookiesFirstPartyDomain() {
try {
chrome.cookies.getAll({
firstPartyDomain: null
}, function () {});
} catch (ex) {
return false;
}
return true;
}
} /* end of Badger constructor */

Badger.prototype = {
Expand Down
220 changes: 0 additions & 220 deletions src/js/heuristicblocking.js
Original file line number Diff line number Diff line change
Expand Up @@ -192,178 +192,6 @@ HeuristicBlocker.prototype = {
}
},

/**
* Calls the pixel cookie sharing checking function
* iff the request is for an image in the top-level frame,
* and the request URL has querystring parameters.
*
* @param {Object} details webRequest onResponseStarted details object
*/
checkForPixelCookieSharing: function (details) {
if (!badger.isLearningEnabled(details.tabId)) {
return;
}

if (details.type != 'image' || details.frameId !== 0 || details.url.indexOf('?') == -1) {
return;
}

let self = this,
tab_base = self.tabOrigins[details.tabId];
if (!tab_base) {
return;
}
let tab_url = self.tabUrls[details.tabId];

let request_host = (new URI(details.url)).host;
// CNAME uncloaking
if (utils.hasOwn(badger.cnameDomains, request_host)) {
request_host = badger.cnameDomains[request_host];
}
let request_base = getBaseDomain(request_host);

let initiator_url = getInitiatorUrl(tab_url, details);
if (initiator_url) {
tab_url = initiator_url;
tab_base = getBaseDomain(extractHostFromURL(initiator_url));
}

// ignore first-party requests
if (!utils.isThirdPartyDomain(request_base, tab_base)) {
return;
}

// short-circuit if we already observed this eTLD+1 tracking on this site
let firstParties = self.storage.getStore('snitch_map').getItem(request_base);
if (firstParties && firstParties.includes(tab_base)) {
return;
}

// short-circuit if we already made a decision for this FQDN
let action = self.storage.getBestAction(request_host);
if (action != constants.NO_TRACKING && action != constants.ALLOW) {
return;
}

// get all non-HttpOnly cookies for the top-level frame
// and pass those to the pixel cookie-share accounting function
let config = {
url: tab_url
};
if (badger.firstPartyDomainPotentiallyRequired) {
config.firstPartyDomain = null;
}
chrome.cookies.getAll(config, function (cookies) {
cookies = cookies.filter(cookie => !cookie.httpOnly);
if (cookies.length < 1) {
return;
}

// TODO refactor with new URI() above?
let searchParams = (new URL(details.url)).searchParams;

self.pixelCookieShareAccounting(tab_url, tab_base, searchParams, request_host, request_base, cookies);
});
},

/**
* Checks for cookie sharing: requests to third-party domains
* that include high entropy data from first-party cookies.
*
* Only catches plain-text verbatim sharing (b64 encoding etc. defeat it).
*
* Assumes any long string that doesn't contain URL fragments
* or stopwords is an identifier.
*
* Doesn't catch cookie syncing (3rd party -> 3rd party),
* but most of those tracking cookies should be blocked anyway.
*/
pixelCookieShareAccounting: function (tab_url, tab_base, searchParams, request_host, request_base, cookies) {
const TRACKER_ENTROPY_THRESHOLD = 33,
MIN_STR_LEN = 8;

let self = this;

for (let p of searchParams) {
let key = p[0],
value = p[1];

// the argument must be sufficiently long
if (!value || value.length < MIN_STR_LEN) {
continue;
}

// check if this argument is derived from a high-entropy first-party cookie
for (let cookie of cookies) {
// the cookie value must be sufficiently long
if (!cookie.value || cookie.value.length < MIN_STR_LEN) {
continue;
}

// find the longest common substring between this arg and the cookies
// associated with the document
let substrings = utils.findCommonSubstrings(cookie.value, value) || [];
for (let s of substrings) {
// ignore the substring if it's part of the first-party URL. sometimes
// content servers take the url of the page they're hosting content
// for as an argument. e.g.
// https://example-cdn.com/content?u=http://example.com/index.html
if (tab_url.indexOf(s) != -1) {
continue;
}

// elements of the user agent string are also commonly included in
// both cookies and arguments; e.g. "Mozilla/5.0" might be in both.
// This is not a special tracking risk since third parties can see
// this info anyway.
if (navigator.userAgent.indexOf(s) != -1) {
continue;
}

// Sometimes the entire url and then some is included in the
// substring -- the common string might be "https://example.com/:true"
// In that case, we only care about the information around the URL.
if (s.indexOf(tab_url) != -1) {
s = s.replace(tab_url, "");
}

// During testing we found lots of common values like "homepage",
// "referrer", etc. were being flagged as high entropy. This searches
// for a few of those and removes them before we go further.
let lower = s.toLowerCase();
lowEntropyQueryValues.forEach(function (qv) {
let start = lower.indexOf(qv);
if (start != -1) {
s = s.replace(s.substring(start, start + qv.length), "");
}
});

// at this point, since we might have removed things, make sure the
// string is still long enough to bother with
if (s.length < MIN_STR_LEN) {
continue;
}

// compute the entropy of this common substring. if it's greater than
// our threshold, record the tracking action and exit the function.
let entropy = utils.estimateMaxEntropy(s);
if (entropy > TRACKER_ENTROPY_THRESHOLD) {
log("Found high-entropy cookie share from", tab_base, "to", request_host,
":", entropy, "bits\n cookie:", cookie.name, '=', cookie.value,
"\n arg:", key, "=", value, "\n substring:", s);
self._recordPrevalence(request_host, request_base, tab_base);

// record pixel cookie sharing
badger.storage.recordTrackingDetails(
request_base, tab_base, 'pixelcookieshare');

return;
}
}
}
}
},

/**
* Wraps _recordPrevalence for use outside of webRequest listeners.
*
Expand Down Expand Up @@ -660,51 +488,6 @@ var lowEntropyCookieValues = {
"zu":8
};

const lowEntropyQueryValues = [
"https",
"http",
"://",
"%3A%2F%2F",
"www",
"url",
"undefined",
"impression",
"session",
"homepage",
"client",
"version",
"business",
"title",
"get",
"site",
"name",
"category",
"account_id",
"smartadserver",
"front",
"page",
"view",
"first",
"visit",
"platform",
"language",
"automatic",
"disabled",
"landing",
"entertainment",
"amazon",
"official",
"webvisor",
"anonymous",
"across",
"narrative",
"\":null",
"\":false",
"\":\"",
"\",\"",
"\",\"",
];

/**
* Extract cookies from onBeforeSendHeaders
*
Expand Down Expand Up @@ -820,9 +603,6 @@ function startListeners() {
badger.heuristicBlocking.checkForTrackingCookies(details);
}

// check for pixel cookie sharing if the response appears to be for an image pixel
badger.heuristicBlocking.checkForPixelCookieSharing(details);

}, {urls: ["<all_urls>"]}, extraInfoSpec);
}

Expand Down
43 changes: 0 additions & 43 deletions src/js/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -173,48 +173,6 @@ function estimateMaxEntropy(str) {
return max_bits;
}

// Adapted from https://gist.github.com/jaewook77/cd1e3aa9449d7ea4fb4f
// Find all common substrings more than 8 characters long, using DYNAMIC
// PROGRAMMING
function findCommonSubstrings(str1, str2) {
/*
Let D[i,j] be the length of the longest matching string suffix between
str1[1]..str1[i] and a segment of str2 between str2[1]..str2[j].
If the ith character in str1 doesn’t match the jth character in str2, then
D[i,j] is zero to indicate that there is no matching suffix
*/

// we only care about strings >= 8 chars
let D = [], LCS = [], LCS_MIN = 8;

// runs in O(M x N) time!
for (let i = 0; i < str1.length; i++) {
D[i] = [];
for (let j = 0; j < str2.length; j++) {
if (str1[i] == str2[j]) {
if (i == 0 || j == 0) {
D[i][j] = 1;
} else {
D[i][j] = D[i-1][j-1] + 1;
}

// store all common substrings longer than the minimum length
if (D[i][j] == LCS_MIN) {
LCS.push(str1.substring(i-D[i][j]+1, i+1));
} else if (D[i][j] > LCS_MIN) {
// remove the shorter substring and add the new, longer one
LCS.pop();
LCS.push(str1.substring(i-D[i][j]+1, i+1));
}
} else {
D[i][j] = 0;
}
}
}

return LCS;
}

function oneSecond() {
return 1000;
}
Expand Down Expand Up @@ -563,7 +521,6 @@ let utils = {
explodeSubdomains,
fetchResource,
filter,
findCommonSubstrings,
firstPartyProtectionsEnabled,
getHostFromDomainInput,
hasOwn,
Expand Down
1 change: 0 additions & 1 deletion src/manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
"webRequest",
"webRequestBlocking",
"storage",
"cookies",
"privacy"
],
"background": {
Expand Down
24 changes: 0 additions & 24 deletions src/tests/tests/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -808,30 +808,6 @@ QUnit.module("Utils", function (/*hooks*/) {
);
});

// Tests algorithm used in the pixel tracking heuristic
// It should return a common substring between two given values
QUnit.test("findCommonSubstrings", assert => {

assert.deepEqual(
utils.findCommonSubstrings('www.foo.bar', 'www.foob.ar'),
[],
"substrings under the length threshold of 8 are ignored"
);

assert.equal(
utils.findCommonSubstrings('foobar.com/foo/fizz/buzz/bar', 'foobar.com/foo/bizz/fuzz/bar')[0],
'foobar.com/foo/',
"returns longest matching value from the pair of URLs"
);

assert.deepEqual(
utils.findCommonSubstrings('foobar.com/fizz/buzz/bar/foo', 'foobar.com/fizzbuzz/buzz/bar/foo'),
['foobar.com/fizz', "zz/buzz/bar/foo"],
"returns multiple substrings if multiple are present in comparison"
);

});

// used in pixel tracking heuristic, given a string the estimateMaxEntropy function
// will return the estimated entropy value from it, based on logic parsing the string's length,
// and classes of character complication included in the string
Expand Down
Loading

0 comments on commit d962537

Please sign in to comment.