diff --git a/sql/2020/security/iframe_attributes_usage.sql b/sql/2020/security/iframe_attributes_usage.sql index f90354e3b8f..9903e3be37e 100644 --- a/sql/2020/security/iframe_attributes_usage.sql +++ b/sql/2020/security/iframe_attributes_usage.sql @@ -2,7 +2,7 @@ # usage of allow and sandbox attribute of iframe elements, per page and over all iframe elements SELECT client, - COUNT(0) AS total_iframes, + COUNT(0) AS total_iframes, # Note: These are not the total number of iframes but only the number of iframes with allow/sandbox + 1 for each website without such iframes COUNTIF(allow IS NOT NULL) AS freq_allow, COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames, COUNTIF(sandbox IS NOT NULL) AS freq_sandbox, diff --git a/sql/2021/security/iframe_attributes_usage.sql b/sql/2021/security/iframe_attributes_usage.sql index 0e381cab432..eef600da564 100644 --- a/sql/2021/security/iframe_attributes_usage.sql +++ b/sql/2021/security/iframe_attributes_usage.sql @@ -2,7 +2,7 @@ # usage of allow and sandbox attribute of iframe elements, per page and over all iframe elements SELECT client, - COUNT(0) AS total_iframes, + COUNT(0) AS total_iframes, # Note: These are not the total number of iframes but only the number of iframes with allow/sandbox + 1 for each website without such iframes COUNTIF(allow IS NOT NULL) AS freq_allow, COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames, COUNTIF(sandbox IS NOT NULL) AS freq_sandbox, diff --git a/sql/2022/security/iframe_attributes_usage.sql b/sql/2022/security/iframe_attributes_usage.sql index 2ded2721810..d1ef27993f1 100644 --- a/sql/2022/security/iframe_attributes_usage.sql +++ b/sql/2022/security/iframe_attributes_usage.sql @@ -2,7 +2,7 @@ # usage of allow and sandbox attribute of iframe elements, per page and over all iframe elements SELECT client, - COUNT(0) AS total_iframes, + COUNT(0) AS total_iframes, # Note: These are not the total number of iframes but only the number of iframes with allow/sandbox + 1 for each website without such iframes COUNTIF(allow IS NOT NULL) AS freq_allow, COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames, COUNTIF(sandbox IS NOT NULL) AS freq_sandbox, diff --git a/sql/2024/security/coep_header_prevalence.sql b/sql/2024/security/coep_header_prevalence.sql index bf07e619a70..e3de007010b 100644 --- a/sql/2024/security/coep_header_prevalence.sql +++ b/sql/2024/security/coep_header_prevalence.sql @@ -1,7 +1,7 @@ #standardSQL # Section: Attack Preventions - Preventing attacks using Cross-Origin policies # Question: Which are the most common COEP values? -# Note: Considers headers of main document responses +# Note: Considers headers of main document responses only SELECT client, coep_header, diff --git a/sql/2024/security/coop_header_prevalence.sql b/sql/2024/security/coop_header_prevalence.sql index 33f76fdd793..33bd048cded 100644 --- a/sql/2024/security/coop_header_prevalence.sql +++ b/sql/2024/security/coop_header_prevalence.sql @@ -1,7 +1,7 @@ #standardSQL # Section: Attack Preventions - Preventing attacks using Cross-Origin policies # Question: Which are the most common COOP values? -# Note: Considers headers of main document responses +# Note: Considers headers of main document responses only SELECT client, coop_header, diff --git a/sql/2024/security/csp_number_of_allowed_hosts.sql b/sql/2024/security/csp_number_of_allowed_hosts.sql index 3ca4f68b5e9..8ead4c5cfb1 100644 --- a/sql/2024/security/csp_number_of_allowed_hosts.sql +++ b/sql/2024/security/csp_number_of_allowed_hosts.sql @@ -1,6 +1,7 @@ #standardSQL # Section: Attack Preventions - Preventing attacks using CSP # Question: CSP on home pages: number of unique headers, header length and number of allowed HTTP(S) hosts in all directives +# Note: for CSP we checked whether the header value is NULL (empty?) (99.65% of CSP headers are not NULL on desktop), we did not do this for other headers? CREATE TEMP FUNCTION getNumUniqueHosts(str STRING) AS ( (SELECT COUNT(DISTINCT x) FROM UNNEST(REGEXP_EXTRACT_ALL(str, r'(?i)(https*://[^\s;]+)[\s;]')) AS x) ); @@ -8,8 +9,8 @@ CREATE TEMP FUNCTION getNumUniqueHosts(str STRING) AS ( SELECT client, percentile, - COUNT(0) AS total_requests, - COUNTIF(csp_header IS NOT NULL) AS total_csp_headers, + COUNT(0) AS total_csp_headers, + COUNTIF(csp_header IS NOT NULL) AS total_non_null_csp_headers, COUNTIF(csp_header IS NOT NULL) / COUNT(0) AS pct_csp_headers, COUNT(DISTINCT csp_header) AS num_unique_csp_headers, APPROX_QUANTILES(LENGTH(csp_header), 1000 IGNORE NULLS)[OFFSET(percentile * 10)] AS csp_header_length, diff --git a/sql/2024/security/csp_script_source_list_keywords.sql b/sql/2024/security/csp_script_source_list_keywords.sql index c1bf47b19ee..14eca43395c 100644 --- a/sql/2024/security/csp_script_source_list_keywords.sql +++ b/sql/2024/security/csp_script_source_list_keywords.sql @@ -3,7 +3,7 @@ # Question: usage of default/script-src, and within the directive usage of strict-dynamic, nonce values, unsafe-inline and unsafe-eval SELECT client, - total_pages, + total_pages_with_csp, freq_csp, freq_default_script_src, SAFE_DIVIDE(freq_default_script_src, freq_csp) AS pct_default_script_src_over_csp, @@ -22,7 +22,7 @@ SELECT FROM ( SELECT client, - COUNT(0) AS total_pages, + COUNT(0) AS total_pages_with_csp, COUNTIF(csp_header IS NOT NULL) AS freq_csp, COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)(default|script)-src')) AS freq_default_script_src, COUNTIF(REGEXP_CONTAINS(csp_header, '(?i)(default|script)-src[^;]+strict-dynamic')) AS freq_strict_dynamic, diff --git a/sql/2024/security/iframe_attribute_popular_hosts.sql b/sql/2024/security/iframe_attribute_popular_hosts.sql index 68d416bf98d..d2a64e39fa0 100644 --- a/sql/2024/security/iframe_attribute_popular_hosts.sql +++ b/sql/2024/security/iframe_attribute_popular_hosts.sql @@ -12,9 +12,9 @@ SELECT client, policy_type, hostname, - total_iframes, + total_iframes_with_allow_or_sandbox, COUNTIF(has_policy) AS freq, - COUNTIF(has_policy) / total_iframes AS pct + COUNTIF(has_policy) / total_iframes_with_allow_or_sandbox AS pct FROM ( SELECT client, @@ -37,7 +37,7 @@ FROM ( JOIN ( SELECT client, - SUM(ARRAY_LENGTH(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._security'), '$.iframe-allow-sandbox'))) AS total_iframes + SUM(ARRAY_LENGTH(JSON_EXTRACT_ARRAY(JSON_EXTRACT_SCALAR(payload, '$._security'), '$.iframe-allow-sandbox'))) AS total_iframes_with_allow_or_sandbox FROM `httparchive.all.pages` WHERE @@ -49,7 +49,7 @@ USING (client) GROUP BY client, - total_iframes, + total_iframes_with_allow_or_sandbox, policy_type, hostname HAVING diff --git a/sql/2024/security/iframe_attributes_usage.sql b/sql/2024/security/iframe_attributes_usage.sql index 9b64593ba90..50161995500 100644 --- a/sql/2024/security/iframe_attributes_usage.sql +++ b/sql/2024/security/iframe_attributes_usage.sql @@ -1,16 +1,29 @@ #standardSQL # Section: Content Inclusion - Iframe Sandbox/Permissions Policy -# Question: How often are the allow and sandbox attributes used on iframes? Both per page and over all iframe elements +# Question: How often are the allow and sandbox attributes used on iframes? Both per page (used in at least one iframe on a page) and over all iframe elements +WITH total_iframe_count AS ( + SELECT + client, + date, + SUM(SAFE_CAST(JSON_EXTRACT(custom_metrics, '$.num_iframes') AS INT64)) AS total_iframes + FROM + `httparchive.all.pages` + WHERE + (date = '2022-06-01' OR date = '2023-06-01' OR date = '2023-12-01' OR date = '2024-03-01' OR date = '2024-04-01' OR date = '2024-05-01' OR date = '2024-06-01') AND + is_root_page + GROUP BY client, date +) + SELECT client, date, - COUNT(0) AS total_iframes, + total_iframes, COUNTIF(allow IS NOT NULL) AS freq_allow, - COUNTIF(allow IS NOT NULL) / COUNT(0) AS pct_allow_frames, + COUNTIF(allow IS NOT NULL) / total_iframes AS pct_allow_frames, COUNTIF(sandbox IS NOT NULL) AS freq_sandbox, - COUNTIF(sandbox IS NOT NULL) / COUNT(0) AS pct_sandbox_frames, + COUNTIF(sandbox IS NOT NULL) / total_iframes AS pct_sandbox_frames, COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) AS freq_both_frames, - COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) / COUNT(0) AS pct_both_frames, + COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) / total_iframes AS pct_both_frames, COUNT(DISTINCT url) AS total_urls, COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) AS allow_freq_urls, COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) / COUNT(DISTINCT url) AS allow_pct_urls, @@ -36,8 +49,9 @@ FROM ( is_root_page ) LEFT JOIN UNNEST(iframeAttrs) AS iframeAttr - ) + ) JOIN total_iframe_count USING (client, date) GROUP BY + total_iframes, client, date ORDER BY diff --git a/sql/2024/security/iframe_attributes_usage_fix.sql b/sql/2024/security/iframe_attributes_usage_fix.sql new file mode 100644 index 00000000000..16f52a47657 --- /dev/null +++ b/sql/2024/security/iframe_attributes_usage_fix.sql @@ -0,0 +1,58 @@ +#standardSQL +# Section: Content Inclusion - Iframe Sandbox/Permissions Policy +# Question: How often are the allow and sandbox attributes used on iframes? Both per page (used in at least one iframe on a page) and over all iframe elements +WITH total_iframe_count AS ( + SELECT + client, + date, + SUM(SAFE.INT64(custom_metrics.other.num_iframes)) AS total_iframes + FROM + `httparchive.crawl.pages` + WHERE + (date = '2020-08-01' OR date = '2021-07-01' OR date = '2022-06-01') AND + is_root_page + GROUP BY client, date +) + +SELECT + client, + date, + total_iframes, + COUNTIF(allow IS NOT NULL) AS freq_allow, + COUNTIF(allow IS NOT NULL) / total_iframes AS pct_allow_frames, + COUNTIF(sandbox IS NOT NULL) AS freq_sandbox, + COUNTIF(sandbox IS NOT NULL) / total_iframes AS pct_sandbox_frames, + COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) AS freq_both_frames, + COUNTIF(allow IS NOT NULL AND sandbox IS NOT NULL) / total_iframes AS pct_both_frames, + COUNT(DISTINCT url) AS total_urls, + COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) AS allow_freq_urls, + COUNT(DISTINCT IF(allow IS NOT NULL, url, NULL)) / COUNT(DISTINCT url) AS allow_pct_urls, + COUNT(DISTINCT IF(sandbox IS NOT NULL, url, NULL)) AS sandbox_freq_urls, + COUNT(DISTINCT IF(sandbox IS NOT NULL, url, NULL)) / COUNT(DISTINCT url) AS sandbox_pct_urls +FROM ( + SELECT + client, + date, + url, + SAFE.STRING(iframeAttr.allow) AS allow, + SAFE.STRING(iframeAttr.sandbox) AS sandbox + FROM ( + SELECT + client, + date, + page AS url, + JSON_EXTRACT_ARRAY(custom_metrics.security.`iframe-allow-sandbox`) AS iframeAttrs + FROM + `httparchive.crawl.pages` + WHERE + (date = '2020-08-01' OR date = '2021-07-01' OR date = '2022-06-01') AND + is_root_page + ) LEFT JOIN UNNEST(iframeAttrs) AS iframeAttr + ) JOIN total_iframe_count USING (client, date) +GROUP BY + total_iframes, + client, + date +ORDER BY + date, + client diff --git a/sql/2024/security/meta_csp_disallowed_directives.sql b/sql/2024/security/meta_csp_disallowed_directives.sql index 7816a584cd9..aadf17c7f74 100644 --- a/sql/2024/security/meta_csp_disallowed_directives.sql +++ b/sql/2024/security/meta_csp_disallowed_directives.sql @@ -2,9 +2,24 @@ # Section: Security misconfigurations - CSP directives that are ignored in # Question: How many pages use invalid CSP directives in ? # Note: uses the old payload._almanac metric location instead of custom_metrics.almanac (also the meta-nodes metric is in the generic almanac.js custom metric) +WITH totals AS ( + SELECT + client, + COUNT(0) AS total_pages + FROM + `httparchive.all.requests` + WHERE + date = '2024-06-01' AND + is_root_page + GROUP BY + client +) + + SELECT client, - COUNT(DISTINCT page) AS total_pages, + total_pages, + COUNT(DISTINCT page) AS total_pages_with_csp_meta, COUNT(CASE WHEN REGEXP_CONTAINS(LOWER(JSON_VALUE(meta_node, '$.content')), r'(?i)frame-ancestors') THEN page END) AS count_frame_ancestors, COUNT(CASE WHEN REGEXP_CONTAINS(LOWER(JSON_VALUE(meta_node, '$.content')), r'(?i)frame-ancestors') THEN page END) / COUNT(DISTINCT page) AS pct_frame_ancestors, COUNT(CASE WHEN REGEXP_CONTAINS(LOWER(JSON_VALUE(meta_node, '$.content')), r'(?i)sandbox( allow-[a-z]+)*;') THEN page END) AS count_sandbox, @@ -22,7 +37,9 @@ FROM ( ), UNNEST(JSON_QUERY_ARRAY(metrics, '$.meta-nodes.nodes')) meta_node, UNNEST(['Content-Security-Policy']) AS policy +JOIN totals USING (client) WHERE LOWER(JSON_VALUE(meta_node, '$.http-equiv')) = 'content-security-policy' OR LOWER(JSON_VALUE(meta_node, '$.name')) = 'content-security-policy' GROUP BY - client + client, + total_pages diff --git a/src/config/contributors.json b/src/config/contributors.json index 6330ad22a57..7d7e8edb428 100644 --- a/src/config/contributors.json +++ b/src/config/contributors.json @@ -2124,6 +2124,7 @@ "JannisBush": { "avatar_url": "33023300", "github": "JannisBush", + "linkedin": "jannis-rautenstrauch", "name": "Jannis Rautenstrauch", "teams": { "2024": [ @@ -2131,7 +2132,7 @@ ] }, "twitter": "jannis_r", - "website": "https://cispa.de/en/people/c01jara" + "website": "https://jannisbush.github.io/" }, "jaredcwhite": { "avatar_url": "658496", diff --git a/src/content/en/2020/security.md b/src/content/en/2020/security.md index 7eb34dcc75d..c3aae40ba9f 100644 --- a/src/content/en/2020/security.md +++ b/src/content/en/2020/security.md @@ -621,7 +621,9 @@ In a similar fashion, by defining the `allow` attribute on ` ``` -18.9% of 11.5 million frames in mobile contained the `allow` attribute to enable permission or feature policies. +12.6% of 17.4 million frames in mobile contained the `allow` attribute to enable permission or feature policies. + +

An earlier version of this chapter reported incorrect values for the total number of frames and the percentage of frames with the `allow` attribute. More information can be found in this GitHub PR.

The following is a list of the top 10 `allow` directives that were detected in frames: @@ -737,7 +739,9 @@ To mitigate these concerns the HTML specification (version 5) introduced the `sa The above chart of the 2022 data shows that more than 99% of websites with a `sandbox` attribute enable the `allow-scripts` and `allow-same-origin` permissions. -Of desktop websites that embed an iframe, 35.2% also include the `sandbox` attribute. +For all iframes found on desktop websites, 21.08% include the `sandbox` attribute. + +

An earlier version of this chapter reported the incorrect percentage of frames with the `sandbox` attribute. More information can be found in this GitHub PR.

We find that `Content-Security-Policy` headers which include a `sandbox` directive are at a mere 0.3% usage for mobile (desktop is similar at 0.4%) which may speak to the fact that this attribute is only applied on a per-case basis for the practice of embedding iframe content within pages, rather than ahead-of-time planning through a content security policy definition. diff --git a/src/content/en/2024/security.md b/src/content/en/2024/security.md index f9cf5f53585..87131970991 100644 --- a/src/content/en/2024/security.md +++ b/src/content/en/2024/security.md @@ -816,7 +816,7 @@ The Permissions Policy can also be defined individually for each embedded ` ``` -Out of the 21.4 million ` ``` -The `sandbox` attribute was observed in 28.4% and 27.5% of ` ``` -モバイルの1,150万フレームのうち18.9%に`allow`属性が含まれており、許可または機能ポリシーを有効にしています。 +モバイルの1,740万フレームのうち12.6%に`allow`属性が含まれており、許可または機能ポリシーを有効にしています。 + +

この章の以前のバージョンでは、フレームの合計数と `allow` 属性を持つフレームの割合の値が間違っていました。詳細については、この GitHub PR を参照してください。

以下は、フレームで検出されたトップ10の`allow`ディレクティブのリストです: @@ -737,7 +739,9 @@ function clickToGo() { 2022年の上記のグラフは、`sandbox`属性を持つウェブサイトの99%以上が`allow-scripts`と`allow-same-origin`の権限を有効にしていることを示しています。 -iframeを埋め込むデスクトップウェブサイトの35.2%が`sandbox`属性も含んでいます。 +デスクトップ ウェブサイトにあるすべての iframe のうち、21.08% に `sandbox` 属性が含まれています。 + +

この章の以前のバージョンでは、`sandbox` 属性を持つフレームの割合が誤って報告されていました。詳細については、この GitHub PR を参照してください。

モバイルの`Content-Security-Policy`ヘッダーに`sandbox`指令を含むものはわずか0.3%(デスクトップも同様に0.4%)であり、この属性がページ内でiframeコンテンツを埋め込む際にケースバイケースで適用されることが多く、事前のコンテンツセキュリティポリシー定義を通じて計画することは少ないことを示しています。 diff --git a/src/content/nl/2020/security.md b/src/content/nl/2020/security.md index caec7063ef2..44222e3a022 100644 --- a/src/content/nl/2020/security.md +++ b/src/content/nl/2020/security.md @@ -621,7 +621,9 @@ Op een vergelijkbare manier, door het `allow`-attribuut op ` ``` -在移动端的 1150 万个框架中,有 18.9% 包含 `allow` 属性,以启用权限或特性策略。 +在移动端的 1740 万个框架中,有 12.6% 包含 `allow` 属性,以启用权限或特性策略。 + +

本章的早期版本报告了总帧数和具有 `allow` 属性的帧百分比的错误值。更多信息可在此 GitHub PR 中找到。

以下是在框架中检测到的前 10 条 `allow` 指令列表: @@ -737,7 +739,9 @@ function clickToGo() { 上述 2022 年的数据图表显示,超过 99% 的具有 `sandbox` 属性的网站都启用了 `allow-scripts` 和 `allow-same-origin` 权限。 -在嵌入 iframe 的桌面端网站中,35.2% 的网站还包括 `sandbox` 属性。 +对于桌面网站上的所有 iframe,21.08% 具有 `sandbox` 属性。 + +

本章的早期版本报告了具有 `sandbox` 属性的帧百分比不正确。更多信息可在此 GitHub PR 中找到。

我们发现包含 `sandbox` 指令的 `Content-Security-Policy` 标头在移动端仅占 0.3% 的使用率(桌面端为 0.4%),这可能说明了这样一个事实,即该属性仅适用于在页面中嵌入 iframe 内容的实践,而不是通过内容安全策略定义进行预先规划。