diff --git a/sql/2020/security/iframe_attributes_usage.sql b/sql/2020/security/iframe_attributes_usage.sql
index f90354e3b8f..9903e3be37e 100644
--- a/sql/2020/security/iframe_attributes_usage.sql
+++ b/sql/2020/security/iframe_attributes_usage.sql
@@ -2,7 +2,7 @@
# usage of allow and sandbox attribute of iframe elements, per page and over all iframe elements
SELECT
client,
- COUNT(0) AS total_iframes,
+ COUNT(0) AS total_iframes, # Note: These are not the total number of iframes but only the number of iframes with allow/sandbox + 1 for each website without such iframes
COUNTIF(allow IS NOT NULL) AS freq_allow,
COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames,
COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,
diff --git a/sql/2021/security/iframe_attributes_usage.sql b/sql/2021/security/iframe_attributes_usage.sql
index 0e381cab432..eef600da564 100644
--- a/sql/2021/security/iframe_attributes_usage.sql
+++ b/sql/2021/security/iframe_attributes_usage.sql
@@ -2,7 +2,7 @@
# usage of allow and sandbox attribute of iframe elements, per page and over all iframe elements
SELECT
client,
- COUNT(0) AS total_iframes,
+ COUNT(0) AS total_iframes, # Note: These are not the total number of iframes but only the number of iframes with allow/sandbox + 1 for each website without such iframes
COUNTIF(allow IS NOT NULL) AS freq_allow,
COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames,
COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,
diff --git a/sql/2022/security/iframe_attributes_usage.sql b/sql/2022/security/iframe_attributes_usage.sql
index 2ded2721810..d1ef27993f1 100644
--- a/sql/2022/security/iframe_attributes_usage.sql
+++ b/sql/2022/security/iframe_attributes_usage.sql
@@ -2,7 +2,7 @@
# usage of allow and sandbox attribute of iframe elements, per page and over all iframe elements
SELECT
client,
- COUNT(0) AS total_iframes,
+ COUNT(0) AS total_iframes, # Note: These are not the total number of iframes but only the number of iframes with allow/sandbox + 1 for each website without such iframes
COUNTIF(allow IS NOT NULL) AS freq_allow,
COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames,
COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,
diff --git a/sql/2024/security/coep_header_prevalence.sql b/sql/2024/security/coep_header_prevalence.sql
index bf07e619a70..e3de007010b 100644
--- a/sql/2024/security/coep_header_prevalence.sql
+++ b/sql/2024/security/coep_header_prevalence.sql
@@ -1,7 +1,7 @@
#standardSQL
# Section: Attack Preventions - Preventing attacks using Cross-Origin policies
# Question: Which are the most common COEP values?
-# Note: Considers headers of main document responses
+# Note: Considers headers of main document responses only
SELECT
client,
coep_header,
diff --git a/sql/2024/security/coop_header_prevalence.sql b/sql/2024/security/coop_header_prevalence.sql
index 33f76fdd793..33bd048cded 100644
--- a/sql/2024/security/coop_header_prevalence.sql
+++ b/sql/2024/security/coop_header_prevalence.sql
@@ -1,7 +1,7 @@
#standardSQL
# Section: Attack Preventions - Preventing attacks using Cross-Origin policies
# Question: Which are the most common COOP values?
-# Note: Considers headers of main document responses
+# Note: Considers headers of main document responses only
SELECT
client,
coop_header,
diff --git a/sql/2024/security/csp_number_of_allowed_hosts.sql b/sql/2024/security/csp_number_of_allowed_hosts.sql
index 3ca4f68b5e9..8ead4c5cfb1 100644
--- a/sql/2024/security/csp_number_of_allowed_hosts.sql
+++ b/sql/2024/security/csp_number_of_allowed_hosts.sql
@@ -1,6 +1,7 @@
#standardSQL
# Section: Attack Preventions - Preventing attacks using CSP
# Question: CSP on home pages: number of unique headers, header length and number of allowed HTTP(S) hosts in all directives
+# Note: for CSP we checked whether the header value is NULL (empty?) (99.65% of CSP headers are not NULL on desktop), we did not do this for other headers?
CREATE TEMP FUNCTION getNumUniqueHosts(str STRING) AS (
(SELECT COUNT(DISTINCT x) FROM UNNEST(REGEXP_EXTRACT_ALL(str, r'(?i)(https*://[^\s;]+)[\s;]')) AS x)
);
@@ -8,8 +9,8 @@ CREATE TEMP FUNCTION getNumUniqueHosts(str STRING) AS (
SELECT
client,
percentile,
- COUNT(0) AS total_requests,
- COUNTIF(csp_header IS NOT NULL) AS total_csp_headers,
+ COUNT(0) AS total_csp_headers,
+ COUNTIF(csp_header IS NOT NULL) AS total_non_null_csp_headers,
COUNTIF(csp_header IS NOT NULL) / COUNT(0) AS pct_csp_headers,
COUNT(DISTINCT csp_header) AS num_unique_csp_headers,
APPROX_QUANTILES(LENGTH(csp_header), 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS csp_header_length,
diff --git a/sql/2024/security/csp_script_source_list_keywords.sql b/sql/2024/security/csp_script_source_list_keywords.sql
index c1bf47b19ee..14eca43395c 100644
--- a/sql/2024/security/csp_script_source_list_keywords.sql
+++ b/sql/2024/security/csp_script_source_list_keywords.sql
@@ -3,7 +3,7 @@
# Question: usage of default/script-src, and within the directive usage of strict-dynamic, nonce values, unsafe-inline and unsafe-eval
SELECT
client,
- total_pages,
+ total_pages_with_csp,
freq_csp,
freq_default_script_src,
SAFE_DIVIDE(freq_default_script_src, freq_csp) AS pct_default_script_src_over_csp,
@@ -22,7 +22,7 @@ SELECT
FROM (
SELECT
client,
- COUNT(0) AS total_pages,
+ COUNT(0) AS total_pages_with_csp,
COUNTIF(csp_header IS NOT NULL) AS freq_csp,
COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)(default|script)-src')) AS freq_default_script_src,
COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)(default|script)-src[^;]+strict-dynamic')) AS freq_strict_dynamic,
diff --git a/sql/2024/security/iframe_attribute_popular_hosts.sql b/sql/2024/security/iframe_attribute_popular_hosts.sql
index 68d416bf98d..d2a64e39fa0 100644
--- a/sql/2024/security/iframe_attribute_popular_hosts.sql
+++ b/sql/2024/security/iframe_attribute_popular_hosts.sql
@@ -12,9 +12,9 @@ SELECT
client,
policy_type,
hostname,
- total_iframes,
+ total_iframes_with_allow_or_sandbox,
COUNTIF(has_policy) AS freq,
- COUNTIF(has_policy) / total_iframes AS pct
+ COUNTIF(has_policy) / total_iframes_with_allow_or_sandbox AS pct
FROM (
SELECT
client,
@@ -37,7 +37,7 @@ FROM (
JOIN (
SELECT
client,
- SUM(ARRAY_LENGTH(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._security'), '$.iframe-allow-sandbox'))) AS total_iframes
+ SUM(ARRAY_LENGTH(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._security'), '$.iframe-allow-sandbox'))) AS total_iframes_with_allow_or_sandbox
FROM
`httparchive.all.pages`
WHERE
@@ -49,7 +49,7 @@ USING
(client)
GROUP BY
client,
- total_iframes,
+ total_iframes_with_allow_or_sandbox,
policy_type,
hostname
HAVING
diff --git a/sql/2024/security/iframe_attributes_usage.sql b/sql/2024/security/iframe_attributes_usage.sql
index 9b64593ba90..50161995500 100644
--- a/sql/2024/security/iframe_attributes_usage.sql
+++ b/sql/2024/security/iframe_attributes_usage.sql
@@ -1,16 +1,29 @@
#standardSQL
# Section: Content Inclusion - Iframe Sandbox/Permissions Policy
-# Question: How often are the allow and sandbox attributes used on iframes? Both per page and over all iframe elements
+# Question: How often are the allow and sandbox attributes used on iframes? Both per page (used in at least one iframe on a page) and over all iframe elements
+WITH total_iframe_count AS (
+ SELECT
+ client,
+ date,
+ SUM(SAFE_CAST(JSON_EXTRACT(custom_metrics, '$.num_iframes') AS INT64)) AS total_iframes
+ FROM
+ `httparchive.all.pages`
+ WHERE
+ (date = '2022-06-01' OR date = '2023-06-01' OR date = '2023-12-01' OR date = '2024-03-01' OR date = '2024-04-01' OR date = '2024-05-01' OR date = '2024-06-01') AND
+ is_root_page
+ GROUP BY client, date
+)
+
SELECT
client,
date,
- COUNT(0) AS total_iframes,
+ total_iframes,
COUNTIF(allow IS NOT NULL) AS freq_allow,
- COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames,
+ COUNTIF(allow IS NOT NULL) / total_iframes AS pct_allow_frames,
COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,
- COUNTIF(sandbox IS NOT NULL) / COUNT(0) AS pct_sandbox_frames,
+ COUNTIF(sandbox IS NOT NULL) / total_iframes AS pct_sandbox_frames,
COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) AS freq_both_frames,
- COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) / COUNT(0) AS pct_both_frames,
+ COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) / total_iframes AS pct_both_frames,
COUNT(DISTINCT url) AS total_urls,
COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) AS allow_freq_urls,
COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) / COUNT(DISTINCT url) AS allow_pct_urls,
@@ -36,8 +49,9 @@ FROM (
is_root_page
)
LEFT JOIN UNNEST(iframeAttrs) AS iframeAttr
- )
+ ) JOIN total_iframe_count USING (client, date)
GROUP BY
+ total_iframes,
client,
date
ORDER BY
diff --git a/sql/2024/security/iframe_attributes_usage_fix.sql b/sql/2024/security/iframe_attributes_usage_fix.sql
new file mode 100644
index 00000000000..16f52a47657
--- /dev/null
+++ b/sql/2024/security/iframe_attributes_usage_fix.sql
@@ -0,0 +1,58 @@
+#standardSQL
+# Section: Content Inclusion - Iframe Sandbox/Permissions Policy
+# Question: How often are the allow and sandbox attributes used on iframes? Both per page (used in at least one iframe on a page) and over all iframe elements
+WITH total_iframe_count AS (
+ SELECT
+ client,
+ date,
+ SUM(SAFE.INT64(custom_metrics.other.num_iframes)) AS total_iframes
+ FROM
+ `httparchive.crawl.pages`
+ WHERE
+ (date = '2020-08-01' OR date = '2021-07-01' OR date = '2022-06-01') AND
+ is_root_page
+ GROUP BY client, date
+)
+
+SELECT
+ client,
+ date,
+ total_iframes,
+ COUNTIF(allow IS NOT NULL) AS freq_allow,
+ COUNTIF(allow IS NOT NULL) / total_iframes AS pct_allow_frames,
+ COUNTIF(sandbox IS NOT NULL) AS freq_sandbox,
+ COUNTIF(sandbox IS NOT NULL) / total_iframes AS pct_sandbox_frames,
+ COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) AS freq_both_frames,
+ COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) / total_iframes AS pct_both_frames,
+ COUNT(DISTINCT url) AS total_urls,
+ COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) AS allow_freq_urls,
+ COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) / COUNT(DISTINCT url) AS allow_pct_urls,
+ COUNT(DISTINCT IF(sandbox IS NOT NULL, url, NULL)) AS sandbox_freq_urls,
+ COUNT(DISTINCT IF(sandbox IS NOT NULL, url, NULL)) / COUNT(DISTINCT url) AS sandbox_pct_urls
+FROM (
+ SELECT
+ client,
+ date,
+ url,
+ SAFE.STRING(iframeAttr.allow) AS allow,
+ SAFE.STRING(iframeAttr.sandbox) AS sandbox
+ FROM (
+ SELECT
+ client,
+ date,
+ page AS url,
+ JSON_EXTRACT_ARRAY(custom_metrics.security.`iframe-allow-sandbox`) AS iframeAttrs
+ FROM
+ `httparchive.crawl.pages`
+ WHERE
+ (date = '2020-08-01' OR date = '2021-07-01' OR date = '2022-06-01') AND
+ is_root_page
+ ) LEFT JOIN UNNEST(iframeAttrs) AS iframeAttr
+ ) JOIN total_iframe_count USING (client, date)
+GROUP BY
+ total_iframes,
+ client,
+ date
+ORDER BY
+ date,
+ client
diff --git a/sql/2024/security/meta_csp_disallowed_directives.sql b/sql/2024/security/meta_csp_disallowed_directives.sql
index 7816a584cd9..aadf17c7f74 100644
--- a/sql/2024/security/meta_csp_disallowed_directives.sql
+++ b/sql/2024/security/meta_csp_disallowed_directives.sql
@@ -2,9 +2,24 @@
# Section: Security misconfigurations - CSP directives that are ignored in
# Question: How many pages use invalid CSP directives in ?
# Note: uses the old payload._almanac metric location instead of custom_metrics.almanac (also the meta-nodes metric is in the generic almanac.js custom metric)
+WITH totals AS (
+ SELECT
+ client,
+ COUNT(0) AS total_pages
+ FROM
+ `httparchive.all.requests`
+ WHERE
+ date = '2024-06-01' AND
+ is_root_page
+ GROUP BY
+ client
+)
+
+
SELECT
client,
- COUNT(DISTINCT page) AS total_pages,
+ total_pages,
+ COUNT(DISTINCT page) AS total_pages_with_csp_meta,
COUNT(CASE WHEN REGEXP_CONTAINS(LOWER(JSON_VALUE(meta_node, '$.content')), r'(?i)frame-ancestors') THEN page END) AS count_frame_ancestors,
COUNT(CASE WHEN REGEXP_CONTAINS(LOWER(JSON_VALUE(meta_node, '$.content')), r'(?i)frame-ancestors') THEN page END) / COUNT(DISTINCT page) AS pct_frame_ancestors,
COUNT(CASE WHEN REGEXP_CONTAINS(LOWER(JSON_VALUE(meta_node, '$.content')), r'(?i)sandbox( allow-[a-z]+)*;') THEN page END) AS count_sandbox,
@@ -22,7 +37,9 @@ FROM (
),
UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node,
UNNEST(['Content-Security-Policy']) AS policy
+JOIN totals USING (client)
WHERE
LOWER(JSON_VALUE(meta_node, '$.http-equiv')) = 'content-security-policy' OR LOWER(JSON_VALUE(meta_node, '$.name')) = 'content-security-policy'
GROUP BY
- client
+ client,
+ total_pages
diff --git a/src/config/contributors.json b/src/config/contributors.json
index 6330ad22a57..7d7e8edb428 100644
--- a/src/config/contributors.json
+++ b/src/config/contributors.json
@@ -2124,6 +2124,7 @@
"JannisBush": {
"avatar_url": "33023300",
"github": "JannisBush",
+ "linkedin": "jannis-rautenstrauch",
"name": "Jannis Rautenstrauch",
"teams": {
"2024": [
@@ -2131,7 +2132,7 @@
]
},
"twitter": "jannis_r",
- "website": "https://cispa.de/en/people/c01jara"
+ "website": "https://jannisbush.github.io/"
},
"jaredcwhite": {
"avatar_url": "658496",
diff --git a/src/content/en/2020/security.md b/src/content/en/2020/security.md
index 7eb34dcc75d..c3aae40ba9f 100644
--- a/src/content/en/2020/security.md
+++ b/src/content/en/2020/security.md
@@ -621,7 +621,9 @@ In a similar fashion, by defining the `allow` attribute on `