Skip to content

Commit

Permalink
Merge pull request #570 from privacy-tech-lab/ppcrawl
Browse files Browse the repository at this point in the history
Synchronize crawl and main branch
  • Loading branch information
dadak-dom authored Apr 4, 2024
2 parents dbf5847 + 06e6225 commit 34a22de
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 47 deletions.
49 changes: 35 additions & 14 deletions src/background/analysis/analyze.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,15 @@ analyze.js
*/

import { Request } from "./classModels.js";
import { evidenceQ } from "../background.js";
import { evidenceQ, hostnameHold } from "../background.js";
import { tagParent } from "./requestAnalysis/tagRequests.js";
import { addToEvidenceStore } from "./interactDB/addEvidence.js";
import { getAllEvidenceForRequest } from "./requestAnalysis/scanHTTP.js";
import { MAX_BYTE_LEN, MINUTE_MILLISECONDS } from "./constants.js";
import { getAllEvidenceForCookies } from "./requestAnalysis/scanCookies.js";
import { getHostname } from "./utility/util.js";
import axios from "axios";
import { IS_CRAWLING, IS_CRAWLING_TESTING } from "../background.js";
// Temporary container to hold network requests while properties are being added from listener callbacks
const buffer = {};

Expand Down Expand Up @@ -67,13 +69,15 @@ export async function onBeforeRequest(details, data) {
id: details.requestId,
rootUrl: null,
reqUrl: details.url !== undefined ? details.url : null,
requestBody: details.requestBody !== undefined ? details.requestBody : null,
requestBody:
details.requestBody !== undefined ? details.requestBody : null,
type: details.type !== undefined ? details.type : null,
urlClassification: details.urlClassification !== undefined
? details.urlClassification
: [],
urlClassification:
details.urlClassification !== undefined
? details.urlClassification
: [],
responseData: undefined,
error: undefined
error: undefined,
});

if (details.tabId == -1) {
Expand All @@ -95,7 +99,9 @@ export async function onBeforeRequest(details, data) {
//@ts-ignore
const filter = browser.webRequest.filterResponseData(details.requestId);

var responseByteLength = 0, abort = false, httpResponseStrArr = [];
var responseByteLength = 0,
abort = false,
httpResponseStrArr = [];

filter.ondata = (event) => {
if (!abort) {
Expand Down Expand Up @@ -171,8 +177,13 @@ const cookieUrlObject = {};
* @returns {Promise<void>} calls a number of functions
*/
async function analyze(request, userData) {
const allEvidence = getAllEvidenceForRequest(request, userData);
const rootUrl = request.rootUrl;
const currentTime = Date.now();
const data = {
host: rootUrl,
request: JSON.stringify(request),
};
const allEvidence = getAllEvidenceForRequest(request, userData);
var allCookieEvidence = [];

const reqUrl = getHostname(request.reqUrl);
Expand Down Expand Up @@ -206,13 +217,23 @@ async function analyze(request, userData) {
//@ts-ignore
cb(
undefined,
await addToEvidenceStore(
allEvidence,
parent,
rootUrl,
reqUrl,
)
await addToEvidenceStore(allEvidence, parent, rootUrl, reqUrl)
);
if (
rootUrl.indexOf("moz-extension") === -1 &&
(currentTime - hostnameHold[getHostname(rootUrl)] < 30000 ||
hostnameHold[getHostname(rootUrl)] === undefined) &&
IS_CRAWLING_TESTING
) {
await axios.post("http://localhost:8080/allEv", data, {
headers: {
"Content-Type": "application/json",
},
});
// console.log("would send, associated with " + rootUrl)
} else {
// console.log("NOPE, associated with " + rootUrl + " and ", currentTime - hostnameHold[getHostname(rootUrl)])
}
});
}
}
50 changes: 35 additions & 15 deletions src/background/analysis/buildUserData/importSearchData.js
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,36 @@ export async function importData() {
networkKeywords[permissionEnum.personal] = {};

// first let's build up the location info
// these 'manual' values are exclusive to the crawler version of the extension.
const manual_lat = await settingsKeyval
.get("TARGET_LAT")
.then((value) => value);
const manual_long = await settingsKeyval
.get("TARGET_LONG")
.then((value) => value);
const manual_zip = await settingsKeyval
.get("TARGET_ZIP")
.then((value) => value);

var locCoords = await getLocationData();
var ret = await fetch("http://ipinfo.io/json?token" + apiIPToken);
var retJson = await ret.json();
var currIpInfo = await getIpInfo(retJson);

/*
We need to check if the crawl values have been manually set BEFORE we can actually override the values. This is because
the location data gets imported before the crawler has a chance to actually input any data, and so this will break without such a check.
As soon as the values are set, then we can replace them.
*/
if (
manual_lat != undefined &&
manual_long != undefined &&
manual_zip != undefined
) {
locCoords = [manual_lat, manual_long];
retJson.postal = manual_zip;
}

let locKey = {
[typeEnum.streetAddress]: null,
[typeEnum.zipCode]: retJson.postal,
Expand All @@ -82,18 +108,17 @@ export async function importData() {
display: `${""} ${retJson.city}, ${retJson.region} ${retJson.postal}`,
};

let man_edits = await settingsKeyval.get("watchlistmanual")
let man_edits = await settingsKeyval.get("watchlistmanual");

//This should only execute on the first download
//It adds IP Address and Street Address keywords generated by ipinfo to Watchlist
if(!man_edits) {
if (!man_edits) {
let keywordObject = await watchlistKeyval.get(IPINFO_IPKEY);
let locObject = await watchlistKeyval.get(IPINFO_ADDRESSKEY);
if (keywordObject == undefined || locObject == undefined) {
await saveKeyword(retJson.ip, typeEnum.ipAddress, "ip", false);
await saveKeyword(locKey, permissionEnum.location, "loc", false);
}
else {
} else {
for (let [t, val] of Object.entries(keywordObject)) {
if (t == "keyword") {
//If the useres watchlist is not current, then update it
Expand All @@ -103,7 +128,7 @@ export async function importData() {
}
}
}

if (locObject != locKey) {
await saveKeyword(retJson.ip, typeEnum.ipAddress, "ip", true);
await saveKeyword(locKey, permissionEnum.location, "loc", true);
Expand All @@ -121,15 +146,15 @@ export async function importData() {
let phone_arr = user_store_dict[typeEnum.phoneNumber];
phone_arr.forEach((phone) => {
const origHash = watchlistHashGen(typeEnum.phoneNumber, phone);
const phoneRegex = buildGeneralRegex(phone)
const phoneRegex = buildGeneralRegex(phone);
const phoneObj = createKeywordObj(
phoneRegex,
typeEnum.phoneNumber,
origHash
);
userPhone.push(phoneObj);
})
};
});
}
// if we have a phone we put it in the network keywords dict
if (typeof userPhone !== "undefined") {
networkKeywords[permissionEnum.personal][typeEnum.phoneNumber] = userPhone;
Expand Down Expand Up @@ -286,10 +311,5 @@ export async function importData() {
// performance, the user's current location and IP address as provided by
// ipinfo.io]

return [
locCoords,
networkKeywords,
currIpInfo,
analytic,
];
}
return [locCoords, networkKeywords, currIpInfo, analytic];
}
20 changes: 13 additions & 7 deletions src/background/analysis/interactDB/ml/jsrun.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,20 @@ export async function loadModel() {
// Load the TensorFlow SavedModel through tfjs-node API. You can find more
// details in the API documentation:
// https://js.tensorflow.org/api_node/1.3.1/#node.loadSavedModel

try {
model = await tf.loadGraphModel(path);
} catch (e) {
console.log("error");
var error = true;
while (error) {
error = false;
try {
model = await tf.loadGraphModel(path);
} catch (e) {
error = true;
}
try {
var out = await model.save("indexeddb://my-model");
} catch(e) {
error = true;
}
}

const out = await model.save("indexeddb://my-model");
}

/**
Expand Down
86 changes: 75 additions & 11 deletions src/background/background.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,10 @@ background.js
- https://developer.mozilla.org/en-US/docs/Mozilla/Add-ons/WebExtensions/API/webRequest
*/

import { evidenceKeyval as evidenceIDB, evidenceKeyval } from "./analysis/interactDB/openDB";
import {
evidenceKeyval as evidenceIDB,
evidenceKeyval,
} from "./analysis/interactDB/openDB";
import { onBeforeRequest } from "./analysis/analyze.js";
import {
getExtensionStatus,
Expand All @@ -22,6 +25,68 @@ import { runNotifications } from "../libs/indexed-db/notifications";
import Queue from "queue";
import { getHostname } from "./analysis/utility/util.js";
import { EVIDENCE_THRESHOLD, FIVE_SEC_IN_MILLIS } from "./analysis/constants";
import axios from "axios";

/**
* Holds the host name as key, time it was started as value.
* @type {Object}
*/
export var hostnameHold = {};

// Set the extension into crawl mode.
export const IS_CRAWLING = false;

// Set the extension in crawl mode, with the additional option of capturing all HTTP requests that could be analyzed.
// Ideal when testing the functionality of the crawler.
// If you want to crawl AND test, make sure BOTH values are set to true.
export const IS_CRAWLING_TESTING = false;

async function apiSend() {
//@ts-ignore
const currentWindow = await browser.tabs.query({
active: true,
currentWindow: true,
});
const currentUrl = currentWindow[0].url;
const currentHostName = getHostname(currentUrl);
async function sender() {
// posting data to sql db
// since index is either an array or an int, stringify it
const evidence = await evidenceKeyval.get(currentHostName);
// console.log(evidence, currentHostName)
for (const [label, value] of Object.entries(evidence)) {
if (label != "lastSeen") {
for (const [type, requests] of Object.entries(value)) {
for (const [url, e] of Object.entries(requests)) {
e.index = JSON.stringify(e.index);
}
}
}
}
const allSend = {
host: currentHostName,
evidence: evidence,
};
// console.log("sending " + currentHostName)
axios
.post("http://localhost:8080/entries", allSend, {
headers: {
"Content-Type": "application/json",
},
})
.then((res) => console.log(res.data))
.catch((err) => console.log(err));
}
if (!Object.keys(hostnameHold).includes(currentHostName)) {
// console.log("loaded " + currentHostName)
hostnameHold[currentHostName] = Date.now();
setTimeout(sender, 30000);
}
}
//@ts-ignore
if (IS_CRAWLING) {
browser.webNavigation.onDOMContentLoaded.addListener(apiSend);
}

// A filter that restricts the events that will be sent to a listener.
// You can play around with the urls and types.
Expand All @@ -42,7 +107,7 @@ browser.runtime.onMessage.addListener((request, sender, sendResponse) => {
if (request.msg == "background.currentTab") {
// send current, open tab to the runtime (our extension)
const send = (tabs) =>
//@ts-ignore
//@ts-ignore
browser.runtime.sendMessage({
msg: "popup.currentTab",
data: tabs[0].url,
Expand Down Expand Up @@ -139,17 +204,16 @@ importData().then((data) => {
});

//@ts-ignore
browser.webNavigation.onBeforeNavigate.addListener(async (details) => {
if (details.parentFrameId == -1) {
const host = getHostname(details.url)
let evidence = await evidenceKeyval.get(host)
if (evidence == undefined) {
evidence = {}
browser.webNavigation.onBeforeNavigate.addListener(async (details) => {
if (details.parentFrameId == -1) {
const host = getHostname(details.url);
let evidence = await evidenceKeyval.get(host);
if (evidence == undefined) {
evidence = {};
}
evidence.lastSeen = new Date()
await evidenceKeyval.set(host, evidence)
evidence.lastSeen = new Date();
await evidenceKeyval.set(host, evidence);
}

});

setDefaultSettings();
Expand Down
13 changes: 13 additions & 0 deletions src/options/views/home-view/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ import { getAnalyticsStatus } from "../../../libs/indexed-db/settings";
import { handleClick } from "../../../libs/indexed-db/getAnalytics";
import { settingsModelsEnum } from "../../../background/analysis/classModels";
import { settingsKeyval } from "../../../libs/indexed-db/openDB";
import {
IS_CRAWLING,
IS_CRAWLING_TESTING,
} from "../../../background/background.js";

/**
* Home page view containing overview and recently identified labels
Expand Down Expand Up @@ -52,6 +56,15 @@ const HomeView = () => {
alert(
"Privacy Pioneer does not collect any data from you. However, your IP address is shared with ipinfo.io to identify geographical locations in web requests. You can find ipinfo.io's privacy policy here https://ipinfo.io/privacy-policy."
);
if (IS_CRAWLING) {
const lat = prompt("Enter target lat", "");
const long = prompt("Enter target long", "");
const zip = prompt("Enter target zip", "");
settingsKeyval.set("TARGET_LAT", parseFloat(lat));
settingsKeyval.set("TARGET_LONG", parseFloat(long));
settingsKeyval.set("TARGET_ZIP", zip);
browser.runtime.sendMessage({ msg: "dataUpdated" }); // force the extension to reset the target values with the ones specified
}
settingsKeyval.set("firstHomeVisit", false);
}
});
Expand Down

0 comments on commit 34a22de

Please sign in to comment.