From 2102fc50d84349b54408d185caf4480a1dcb69c9 Mon Sep 17 00:00:00 2001
From: Jannis Rautenstrauch <33023300+JannisBush@users.noreply.github.com>
Date: Thu, 21 Nov 2024 13:29:03 +0100
Subject: [PATCH 01/14] Update iframe_attributes_usage description
---
sql/2024/security/iframe_attributes_usage.sql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/sql/2024/security/iframe_attributes_usage.sql b/sql/2024/security/iframe_attributes_usage.sql
index 9b64593ba90..c8894845af8 100644
--- a/sql/2024/security/iframe_attributes_usage.sql
+++ b/sql/2024/security/iframe_attributes_usage.sql
@@ -1,6 +1,6 @@
#standardSQL
# Section: Content Inclusion - Iframe Sandbox/Permissions Policy
-# Question: How often are the allow and sandbox attributes used on iframes? Both per page and over all iframe elements
+# Question: How often are the allow and sandbox attributes used on iframes? Both per page (used in at least one iframe on a page) and over all iframe elements
SELECT
client,
date,
From e14a9f0961fcab904d71d13219832c42494b9e97 Mon Sep 17 00:00:00 2001
From: JannisBush <33023300+JannisBush@users.noreply.github.com>
Date: Thu, 21 Nov 2024 16:02:25 +0100
Subject: [PATCH 02/14] Fix total_iframes in iframe_attributes_usage.sql
---
sql/2024/security/iframe_attributes_usage.sql | 24 +++++++++++++++----
1 file changed, 19 insertions(+), 5 deletions(-)
diff --git a/sql/2024/security/iframe_attributes_usage.sql b/sql/2024/security/iframe_attributes_usage.sql
index c8894845af8..7cd9d7bf54e 100644
--- a/sql/2024/security/iframe_attributes_usage.sql
+++ b/sql/2024/security/iframe_attributes_usage.sql
@@ -1,16 +1,29 @@
#standardSQL
# Section: Content Inclusion - Iframe Sandbox/Permissions Policy
# Question: How often are the allow and sandbox attributes used on iframes? Both per page (used in at least one iframe on a page) and over all iframe elements
+WITH total_iframe_count AS (
SELECT
client,
date,
- COUNT(0) AS total_iframes,
+ SUM(SAFE_CAST(JSON_EXTRACT(custom_metrics, '$.num_iframes') AS INT64)) AS total_iframes
+ FROM
+ `httparchive.all.pages`
+ WHERE
+ (date = '2022-06-01' OR date = '2023-06-01' OR date = '2023-12-01' OR date = '2024-03-01' OR date = '2024-04-01' OR date = '2024-05-01' OR date = '2024-06-01') AND
+ is_root_page
+GROUP BY client, date
+)
+
+SELECT
+ client,
+ date,
+ total_iframes,
COUNTIF(allow IS NOT NULL) AS freq_allow,
- COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames,
+ COUNTIF(allow IS NOT NULL) / total_iframes AS pct_allow_frames,
COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,
- COUNTIF(sandbox IS NOT NULL) / COUNT(0) AS pct_sandbox_frames,
+ COUNTIF(sandbox IS NOT NULL) / total_iframes AS pct_sandbox_frames,
COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) AS freq_both_frames,
- COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) / COUNT(0) AS pct_both_frames,
+ COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) / total_iframes AS pct_both_frames,
COUNT(DISTINCT url) AS total_urls,
COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) AS allow_freq_urls,
COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) / COUNT(DISTINCT url) AS allow_pct_urls,
@@ -36,8 +49,9 @@ FROM (
is_root_page
)
LEFT JOIN UNNEST(iframeAttrs) AS iframeAttr
- )
+ ) JOIN total_iframe_count USING (client, date)
GROUP BY
+ total_iframes,
client,
date
ORDER BY
From 9886bc46adf8bf07f0f0f525736be4bd437257bb Mon Sep 17 00:00:00 2001
From: JannisBush <33023300+JannisBush@users.noreply.github.com>
Date: Thu, 21 Nov 2024 16:03:12 +0100
Subject: [PATCH 03/14] Fix total pages in meta_csp_disallowed_directives.sql
---
.../meta_csp_disallowed_directives.sql | 21 +++++++++++++++++--
1 file changed, 19 insertions(+), 2 deletions(-)
diff --git a/sql/2024/security/meta_csp_disallowed_directives.sql b/sql/2024/security/meta_csp_disallowed_directives.sql
index 7816a584cd9..9aea88eeab9 100644
--- a/sql/2024/security/meta_csp_disallowed_directives.sql
+++ b/sql/2024/security/meta_csp_disallowed_directives.sql
@@ -2,9 +2,24 @@
# Section: Security misconfigurations - CSP directives that are ignored in
# Question: How many pages use invalid CSP directives in ?
# Note: uses the old payload._almanac metric location instead of custom_metrics.almanac (also the meta-nodes metric is in the generic almanac.js custom metric)
+WITH totals AS (
+ SELECT
+ client,
+ COUNT(0) AS total_pages
+ FROM
+ `httparchive.all.requests`
+ WHERE
+ date = '2024-06-01' AND
+ is_root_page
+ GROUP BY
+ client
+)
+
+
SELECT
client,
- COUNT(DISTINCT page) AS total_pages,
+ total_pages,
+ COUNT(DISTINCT page) AS total_pages_with_csp_meta,
COUNT(CASE WHEN REGEXP_CONTAINS(LOWER(JSON_VALUE(meta_node, '$.content')), r'(?i)frame-ancestors') THEN page END) AS count_frame_ancestors,
COUNT(CASE WHEN REGEXP_CONTAINS(LOWER(JSON_VALUE(meta_node, '$.content')), r'(?i)frame-ancestors') THEN page END) / COUNT(DISTINCT page) AS pct_frame_ancestors,
COUNT(CASE WHEN REGEXP_CONTAINS(LOWER(JSON_VALUE(meta_node, '$.content')), r'(?i)sandbox( allow-[a-z]+)*;') THEN page END) AS count_sandbox,
@@ -22,7 +37,9 @@ FROM (
),
UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node,
UNNEST(['Content-Security-Policy']) AS policy
+JOIN totals using (client)
WHERE
LOWER(JSON_VALUE(meta_node, '$.http-equiv')) = 'content-security-policy' OR LOWER(JSON_VALUE(meta_node, '$.name')) = 'content-security-policy'
GROUP BY
- client
+ client,
+ total_pages
From f9ac5de7476af716c7f9b40a99e67ed69bdc2244 Mon Sep 17 00:00:00 2001
From: JannisBush <33023300+JannisBush@users.noreply.github.com>
Date: Thu, 21 Nov 2024 16:03:40 +0100
Subject: [PATCH 04/14] Clarify in 3 queries that the total is not global
---
sql/2024/security/csp_number_of_allowed_hosts.sql | 5 +++--
sql/2024/security/csp_script_source_list_keywords.sql | 4 ++--
sql/2024/security/iframe_attribute_popular_hosts.sql | 8 ++++----
3 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/sql/2024/security/csp_number_of_allowed_hosts.sql b/sql/2024/security/csp_number_of_allowed_hosts.sql
index 3ca4f68b5e9..8ead4c5cfb1 100644
--- a/sql/2024/security/csp_number_of_allowed_hosts.sql
+++ b/sql/2024/security/csp_number_of_allowed_hosts.sql
@@ -1,6 +1,7 @@
#standardSQL
# Section: Attack Preventions - Preventing attacks using CSP
# Question: CSP on home pages: number of unique headers, header length and number of allowed HTTP(S) hosts in all directives
+# Note: for CSP we checked whether the header value is NULL (empty?) (99.65% of CSP headers are not NULL on desktop), we did not do this for other headers?
CREATE TEMP FUNCTION getNumUniqueHosts(str STRING) AS (
(SELECT COUNT(DISTINCT x) FROM UNNEST(REGEXP_EXTRACT_ALL(str, r'(?i)(https*://[^\s;]+)[\s;]')) AS x)
);
@@ -8,8 +9,8 @@ CREATE TEMP FUNCTION getNumUniqueHosts(str STRING) AS (
SELECT
client,
percentile,
- COUNT(0) AS total_requests,
- COUNTIF(csp_header IS NOT NULL) AS total_csp_headers,
+ COUNT(0) AS total_csp_headers,
+ COUNTIF(csp_header IS NOT NULL) AS total_non_null_csp_headers,
COUNTIF(csp_header IS NOT NULL) / COUNT(0) AS pct_csp_headers,
COUNT(DISTINCT csp_header) AS num_unique_csp_headers,
APPROX_QUANTILES(LENGTH(csp_header), 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS csp_header_length,
diff --git a/sql/2024/security/csp_script_source_list_keywords.sql b/sql/2024/security/csp_script_source_list_keywords.sql
index c1bf47b19ee..14eca43395c 100644
--- a/sql/2024/security/csp_script_source_list_keywords.sql
+++ b/sql/2024/security/csp_script_source_list_keywords.sql
@@ -3,7 +3,7 @@
# Question: usage of default/script-src, and within the directive usage of strict-dynamic, nonce values, unsafe-inline and unsafe-eval
SELECT
client,
- total_pages,
+ total_pages_with_csp,
freq_csp,
freq_default_script_src,
SAFE_DIVIDE(freq_default_script_src, freq_csp) AS pct_default_script_src_over_csp,
@@ -22,7 +22,7 @@ SELECT
FROM (
SELECT
client,
- COUNT(0) AS total_pages,
+ COUNT(0) AS total_pages_with_csp,
COUNTIF(csp_header IS NOT NULL) AS freq_csp,
COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)(default|script)-src')) AS freq_default_script_src,
COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)(default|script)-src[^;]+strict-dynamic')) AS freq_strict_dynamic,
diff --git a/sql/2024/security/iframe_attribute_popular_hosts.sql b/sql/2024/security/iframe_attribute_popular_hosts.sql
index 68d416bf98d..d2a64e39fa0 100644
--- a/sql/2024/security/iframe_attribute_popular_hosts.sql
+++ b/sql/2024/security/iframe_attribute_popular_hosts.sql
@@ -12,9 +12,9 @@ SELECT
client,
policy_type,
hostname,
- total_iframes,
+ total_iframes_with_allow_or_sandbox,
COUNTIF(has_policy) AS freq,
- COUNTIF(has_policy) / total_iframes AS pct
+ COUNTIF(has_policy) / total_iframes_with_allow_or_sandbox AS pct
FROM (
SELECT
client,
@@ -37,7 +37,7 @@ FROM (
JOIN (
SELECT
client,
- SUM(ARRAY_LENGTH(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._security'), '$.iframe-allow-sandbox'))) AS total_iframes
+ SUM(ARRAY_LENGTH(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._security'), '$.iframe-allow-sandbox'))) AS total_iframes_with_allow_or_sandbox
FROM
`httparchive.all.pages`
WHERE
@@ -49,7 +49,7 @@ USING
(client)
GROUP BY
client,
- total_iframes,
+ total_iframes_with_allow_or_sandbox,
policy_type,
hostname
HAVING
From ea124f5c3b8a8795f32b0c6083201ef0a0debc02 Mon Sep 17 00:00:00 2001
From: JannisBush <33023300+JannisBush@users.noreply.github.com>
Date: Thu, 21 Nov 2024 16:03:53 +0100
Subject: [PATCH 05/14] Note clarification
---
sql/2024/security/coep_header_prevalence.sql | 2 +-
sql/2024/security/coop_header_prevalence.sql | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/sql/2024/security/coep_header_prevalence.sql b/sql/2024/security/coep_header_prevalence.sql
index bf07e619a70..e3de007010b 100644
--- a/sql/2024/security/coep_header_prevalence.sql
+++ b/sql/2024/security/coep_header_prevalence.sql
@@ -1,7 +1,7 @@
#standardSQL
# Section: Attack Preventions - Preventing attacks using Cross-Origin policies
# Question: Which are the most common COEP values?
-# Note: Considers headers of main document responses
+# Note: Considers headers of main document responses only
SELECT
client,
coep_header,
diff --git a/sql/2024/security/coop_header_prevalence.sql b/sql/2024/security/coop_header_prevalence.sql
index 33f76fdd793..33bd048cded 100644
--- a/sql/2024/security/coop_header_prevalence.sql
+++ b/sql/2024/security/coop_header_prevalence.sql
@@ -1,7 +1,7 @@
#standardSQL
# Section: Attack Preventions - Preventing attacks using Cross-Origin policies
# Question: Which are the most common COOP values?
-# Note: Considers headers of main document responses
+# Note: Considers headers of main document responses only
SELECT
client,
coop_header,
From 164af5170954b8ee87080520d6f1dd571d95434b Mon Sep 17 00:00:00 2001
From: JannisBush <33023300+JannisBush@users.noreply.github.com>
Date: Thu, 21 Nov 2024 16:04:14 +0100
Subject: [PATCH 06/14] Update contributor details
---
src/config/contributors.json | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/config/contributors.json b/src/config/contributors.json
index f460ef9edaf..e0554e66e42 100644
--- a/src/config/contributors.json
+++ b/src/config/contributors.json
@@ -2071,6 +2071,7 @@
"JannisBush": {
"avatar_url": "33023300",
"github": "JannisBush",
+ "linkedin": "jannis-rautenstrauch",
"name": "Jannis Rautenstrauch",
"teams": {
"2024": [
@@ -2078,7 +2079,7 @@
]
},
"twitter": "jannis_r",
- "website": "https://cispa.de/en/people/c01jara"
+ "website": "https://jannisbush.github.io/"
},
"jaredcwhite": {
"avatar_url": "658496",
From 7953be559d347029c689a369cf12364f36d37fc6 Mon Sep 17 00:00:00 2001
From: JannisBush <33023300+JannisBush@users.noreply.github.com>
Date: Thu, 21 Nov 2024 16:26:00 +0100
Subject: [PATCH 07/14] Add comments to 2022, 2021, 2020 queries
---
sql/2020/security/iframe_attributes_usage.sql | 2 +-
sql/2021/security/iframe_attributes_usage.sql | 2 +-
sql/2022/security/iframe_attributes_usage.sql | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/sql/2020/security/iframe_attributes_usage.sql b/sql/2020/security/iframe_attributes_usage.sql
index f90354e3b8f..9903e3be37e 100644
--- a/sql/2020/security/iframe_attributes_usage.sql
+++ b/sql/2020/security/iframe_attributes_usage.sql
@@ -2,7 +2,7 @@
# usage of allow and sandbox attribute of iframe elements, per page and over all iframe elements
SELECT
client,
- COUNT(0) AS total_iframes,
+ COUNT(0) AS total_iframes, # Note: These are not the total number of iframes but only the number of iframes with allow/sandbox + 1 for each website without such iframes
COUNTIF(allow IS NOT NULL) AS freq_allow,
COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames,
COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,
diff --git a/sql/2021/security/iframe_attributes_usage.sql b/sql/2021/security/iframe_attributes_usage.sql
index 0e381cab432..eef600da564 100644
--- a/sql/2021/security/iframe_attributes_usage.sql
+++ b/sql/2021/security/iframe_attributes_usage.sql
@@ -2,7 +2,7 @@
# usage of allow and sandbox attribute of iframe elements, per page and over all iframe elements
SELECT
client,
- COUNT(0) AS total_iframes,
+ COUNT(0) AS total_iframes, # Note: These are not the total number of iframes but only the number of iframes with allow/sandbox + 1 for each website without such iframes
COUNTIF(allow IS NOT NULL) AS freq_allow,
COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames,
COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,
diff --git a/sql/2022/security/iframe_attributes_usage.sql b/sql/2022/security/iframe_attributes_usage.sql
index 2ded2721810..d1ef27993f1 100644
--- a/sql/2022/security/iframe_attributes_usage.sql
+++ b/sql/2022/security/iframe_attributes_usage.sql
@@ -2,7 +2,7 @@
# usage of allow and sandbox attribute of iframe elements, per page and over all iframe elements
SELECT
client,
- COUNT(0) AS total_iframes,
+ COUNT(0) AS total_iframes, # Note: These are not the total number of iframes but only the number of iframes with allow/sandbox + 1 for each website without such iframes
COUNTIF(allow IS NOT NULL) AS freq_allow,
COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames,
COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,
From c0a707cb61e0347811d3ca9938ca76f30b8b7257 Mon Sep 17 00:00:00 2001
From: JannisBush <33023300+JannisBush@users.noreply.github.com>
Date: Thu, 21 Nov 2024 16:29:06 +0100
Subject: [PATCH 08/14] Fix linting issues
---
sql/2024/security/iframe_attributes_usage.sql | 10 +++++-----
sql/2024/security/meta_csp_disallowed_directives.sql | 2 +-
2 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/sql/2024/security/iframe_attributes_usage.sql b/sql/2024/security/iframe_attributes_usage.sql
index 7cd9d7bf54e..50161995500 100644
--- a/sql/2024/security/iframe_attributes_usage.sql
+++ b/sql/2024/security/iframe_attributes_usage.sql
@@ -2,16 +2,16 @@
# Section: Content Inclusion - Iframe Sandbox/Permissions Policy
# Question: How often are the allow and sandbox attributes used on iframes? Both per page (used in at least one iframe on a page) and over all iframe elements
WITH total_iframe_count AS (
-SELECT
- client,
- date,
- SUM(SAFE_CAST(JSON_EXTRACT(custom_metrics, '$.num_iframes') AS INT64)) AS total_iframes
+ SELECT
+ client,
+ date,
+ SUM(SAFE_CAST(JSON_EXTRACT(custom_metrics, '$.num_iframes') AS INT64)) AS total_iframes
FROM
`httparchive.all.pages`
WHERE
(date = '2022-06-01' OR date = '2023-06-01' OR date = '2023-12-01' OR date = '2024-03-01' OR date = '2024-04-01' OR date = '2024-05-01' OR date = '2024-06-01') AND
is_root_page
-GROUP BY client, date
+ GROUP BY client, date
)
SELECT
diff --git a/sql/2024/security/meta_csp_disallowed_directives.sql b/sql/2024/security/meta_csp_disallowed_directives.sql
index 9aea88eeab9..aadf17c7f74 100644
--- a/sql/2024/security/meta_csp_disallowed_directives.sql
+++ b/sql/2024/security/meta_csp_disallowed_directives.sql
@@ -37,7 +37,7 @@ FROM (
),
UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node,
UNNEST(['Content-Security-Policy']) AS policy
-JOIN totals using (client)
+JOIN totals USING (client)
WHERE
LOWER(JSON_VALUE(meta_node, '$.http-equiv')) = 'content-security-policy' OR LOWER(JSON_VALUE(meta_node, '$.name')) = 'content-security-policy'
GROUP BY
From c9a2e7ce0bf3a900f1f5b14ef68c1a9006a284bc Mon Sep 17 00:00:00 2001
From: Gertjan Franken
Date: Fri, 6 Dec 2024 12:54:55 +0100
Subject: [PATCH 09/14] Adapt text with updated query results
---
src/content/en/2024/security.md | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/content/en/2024/security.md b/src/content/en/2024/security.md
index 7cc5a4f09a1..bb30f7d5bae 100644
--- a/src/content/en/2024/security.md
+++ b/src/content/en/2024/security.md
@@ -816,7 +816,7 @@ The Permissions Policy can also be defined individually for each embedded `
```
-Out of the 21.4 million `