Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 53 additions & 8 deletions openwpm_utils/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,59 @@ def get_script_urls_from_call_stack_as_set(call_stack):
return script_urls


def get_ordered_script_urls_from_call_stack(call_stack):
"""Return the urls of the scripts involved in the call stack as a
string. Preserve order in which the scripts appear in the call stack."""
if not call_stack:
return ""
return ", ".join(get_script_urls_from_call_stack_as_list(
call_stack))


def get_script_urls_from_call_stack_as_list(call_stack):
"""Return the urls of the scripts involved in the call stack as a list."""
script_urls = []
if not call_stack:
return script_urls
stack_frames = call_stack.strip().split("\n")
last_script_url = ""
for stack_frame in stack_frames:
script_url = stack_frame.rsplit(":", 2)[0].\
split("@")[-1].split(" line")[0]

if script_url != last_script_url:
script_urls.append(script_url)
last_script_url = script_url
return script_urls


def get_set_of_script_ps1s_from_call_stack(script_urls):
if len(script_urls):
return ", ".join(
set((get_ps_plus_1(x) or "") for x in script_urls.split(", ")))
else:
return ""


def get_ordered_script_ps1s_from_call_stack(call_stack):
"""Return ordered list of script PS1s as they appear in the call stack."""
return get_ordered_script_ps1s_from_stack_script_urls(
get_ordered_script_urls_from_call_stack(call_stack))


def get_ordered_script_ps1s_from_stack_script_urls(script_urls):
"""Return ordered script PS1s as a string given a list of script URLs."""
script_ps1s = []
last_ps1 = None
for script_url in script_urls.split(", "):
ps1 = get_ps_plus_1(script_url) or ""
if ps1 != last_ps1:
script_ps1s.append(ps1)
last_ps1 = ps1

return ", ".join(script_ps1s)


def add_col_bare_script_url(js_df):
"""Add a col for script URL without scheme, www and query."""
js_df['bare_script_url'] =\
Expand Down Expand Up @@ -167,14 +220,6 @@ def get_requests_from_visits(con, visit_ids):
return read_sql_query(qry, con)


def get_set_of_script_ps1s_from_call_stack(script_urls):
if len(script_urls):
return ", ".join(
set((get_ps_plus_1(x) or "") for x in script_urls.split(", ")))
else:
return ""


def add_col_set_of_script_ps1s_from_call_stack(js_df):
js_df['stack_script_ps1s'] =\
js_df['stack_scripts'].map(get_set_of_script_ps1s_from_call_stack)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
name='openwpm-utils',
license='MPL 2.0',
url='https://github.com/mozilla/openwpm-utils',
version='0.1.2',
version='0.1.3',
packages=['openwpm_utils'],

# Dependencies
Expand Down
73 changes: 73 additions & 0 deletions tests/test_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from openwpm_utils.analysis import (
get_ordered_script_urls_from_call_stack,
get_ordered_script_ps1s_from_call_stack
)

HTTPS_SCHEME = "https://"

STACK_JS_DOMAIN_1 = 'example-1.com'
STACK_JS_DOMAIN_2 = 'example-2.com'
STACK_JS_DOMAIN_3 = 'example-3.com'

STACK_JS_URL_1 = HTTPS_SCHEME + STACK_JS_DOMAIN_1
STACK_JS_URL_2 = HTTPS_SCHEME + STACK_JS_DOMAIN_2
STACK_JS_URL_3 = HTTPS_SCHEME + STACK_JS_DOMAIN_3

SAMPLE_STACK_TRACE_1 =\
"func@" + STACK_JS_URL_1 + ":1:2;null\n"\
"func@" + STACK_JS_URL_2 + ":3:4;null\n"\
"func@" + STACK_JS_URL_3 + ":5:6;null"

SAMPLE_STACK_TRACE_2 =\
"func@" + STACK_JS_URL_1 + ":1:2;null\n"\
"func@" + STACK_JS_URL_2 + ":3:4;null\n"\
"func@" + STACK_JS_URL_2 + ":3:4;null\n"\
"func@" + STACK_JS_URL_2 + ":3:4;null\n"\
"func@" + STACK_JS_URL_3 + ":5:6;null"


SAMPLE_STACK_TRACE_3 =\
"func@" + STACK_JS_URL_1 + ":1:2;null\n"\
"func@" + STACK_JS_URL_1 + ":1:2;null\n"\
"func@" + STACK_JS_URL_3 + ":5:6;null\n"\
"func@" + STACK_JS_URL_1 + ":1:2;null\n"\
"func@" + STACK_JS_URL_2 + ":3:4;null\n"\
"func@" + STACK_JS_URL_2 + ":3:4;null\n"\
"func@" + STACK_JS_URL_2 + ":3:4;null\n"


EXPECTED_STACK_JS_URLS = ", ".join(
[STACK_JS_URL_1, STACK_JS_URL_2, STACK_JS_URL_3])

EXPECTED_STACK_JS_PS1S = ", ".join(
[STACK_JS_DOMAIN_1, STACK_JS_DOMAIN_2, STACK_JS_DOMAIN_3])

EXPECTED_STACK_JS_URLS_MIXED = ", ".join(
[STACK_JS_URL_1, STACK_JS_URL_3,
STACK_JS_URL_1, STACK_JS_URL_2])

EXPECTED_STACK_JS_PS1S_MIXED = ", ".join(
[STACK_JS_DOMAIN_1, STACK_JS_DOMAIN_3,
STACK_JS_DOMAIN_1, STACK_JS_DOMAIN_2])


def test_get_ordered_script_urls_from_call_stack():
assert get_ordered_script_urls_from_call_stack(
SAMPLE_STACK_TRACE_1) == EXPECTED_STACK_JS_URLS

assert get_ordered_script_urls_from_call_stack(
SAMPLE_STACK_TRACE_2) == EXPECTED_STACK_JS_URLS

assert get_ordered_script_urls_from_call_stack(
SAMPLE_STACK_TRACE_3) == EXPECTED_STACK_JS_URLS_MIXED


def test_get_ordered_script_ps1s_from_call_stack():
assert get_ordered_script_ps1s_from_call_stack(
SAMPLE_STACK_TRACE_1) == EXPECTED_STACK_JS_PS1S

assert get_ordered_script_ps1s_from_call_stack(
SAMPLE_STACK_TRACE_2) == EXPECTED_STACK_JS_PS1S

assert get_ordered_script_ps1s_from_call_stack(
SAMPLE_STACK_TRACE_3) == EXPECTED_STACK_JS_PS1S_MIXED
2 changes: 1 addition & 1 deletion tests/test_domain.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from crawl_utils.domain import (
from openwpm_utils.domain import (
get_ps_plus_1,
)

Expand Down